From 1cbb0d6474f516a08d9152b17007e5cf713b6a2c Mon Sep 17 00:00:00 2001
From: Erik Stenman <erik@stenmans.org>
Date: Mon, 27 Mar 2017 00:55:43 +0200
Subject: [PATCH] Adding some more chapters and examples.

---
 .gitignore                                |    4 +-
 Makefile                                  |   19 +-
 ap-beam_instructions.asciidoc             |  742 ++++++++++
 beam_instructions.asciidoc                |  503 +++++++
 beam_internal_instructions.asciidoc       |    4 +
 book.asciidoc                             |   14 +-
 calls.asciidoc                            |  149 ++
 code/book/src/generate_op_doc.erl         |  120 ++
 code/compiler_chapter/src/json_tokens.xrl |   68 +
 code/compiler_chapter/src/world.E         |   17 +
 code/compiler_chapter/src/world.P         |   15 +
 code/compiler_chapter/src/world.S         |   37 +
 code/compiler_chapter/src/world.erl       |    7 +
 code/compiler_chapter/src/world.hrl       |    1 +
 code/memory_chapter/src/gc_example.erl    |   19 +
 compiler.asciidoc                         | 1517 ++++++++++-----------
 genop.tab                                 |  539 ++++++++
 memory.asciidoc                           | 1410 +++++++++++++++++++
 preface.asciidoc                          |    4 +-
 type_system.asciidoc                      |  354 +++++
 20 files changed, 4765 insertions(+), 778 deletions(-)
 create mode 100755 ap-beam_instructions.asciidoc
 create mode 100755 beam_instructions.asciidoc
 create mode 100755 beam_internal_instructions.asciidoc
 create mode 100755 calls.asciidoc
 create mode 100755 code/book/src/generate_op_doc.erl
 create mode 100644 code/compiler_chapter/src/json_tokens.xrl
 create mode 100644 code/compiler_chapter/src/world.E
 create mode 100644 code/compiler_chapter/src/world.P
 create mode 100644 code/compiler_chapter/src/world.S
 create mode 100644 code/compiler_chapter/src/world.erl
 create mode 100644 code/compiler_chapter/src/world.hrl
 create mode 100644 code/memory_chapter/src/gc_example.erl
 create mode 100755 genop.tab
 create mode 100644 memory.asciidoc
 create mode 100755 type_system.asciidoc

diff --git a/.gitignore b/.gitignore
index 90e89af..65a8b56 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,4 +12,6 @@ rel/example_project
 git-log.xml
 book-revhistory.xml
 beam-book-from-ab.xml
-beam-book.pdf
\ No newline at end of file
+beam-book.pdf
+opcodes_doc.asciidoc
+.#.gitignore
diff --git a/Makefile b/Makefile
index 5ad8f0b..337a206 100755
--- a/Makefile
+++ b/Makefile
@@ -4,14 +4,16 @@ adocs = book.asciidoc \
                         preface.asciidoc \
                         introduction.asciidoc \
                         compiler.asciidoc \
-		     	processes.asciidoc 
+		     	processes.asciidoc \
+			calls.asciidoc \
+                        type_system.asciidoc \
+			beam_internal_instructions.asciidoc \
+                        memory.asciidoc \
+                        ap-beam_instructions.asciidoc \
+			opcodes_doc.asciidoc
                    #     beam.asciidoc \
                    #     beam_modules.asciidoc \
-                   #     type_system.asciidoc \
-                   #     memory.asciidoc \
                    #     erts-book.asciidoc  \
-                   #     ap-beam_instructions.asciidoc \
-	           # opcodes_doc.asciidoc
 
 
 all: beam-book.pdf
@@ -33,6 +35,13 @@ beam-book.pdf: beam-book-from-ab.xml book-revhistory.xml
 # index.html: *.asciidoc
 # 	asciidoc -o index.html -a icons -a toc2 book.asciidoc 
 
+code/book/ebin/generate_op_doc.beam: code/book/src/generate_op_doc.erl
+	erlc -o $@ $<
+
+opcodes_doc.asciidoc: genop.tab code/book/ebin/generate_op_doc.beam
+	erl -noshell -s generate_op_doc from_shell genop.tab opcodes_doc.asciidoc
+
+
 # generate_op_doc.beam: generate_op_doc.erl
 # 	erlc generate_op_doc.erl
 
diff --git a/ap-beam_instructions.asciidoc b/ap-beam_instructions.asciidoc
new file mode 100755
index 0000000..09d769d
--- /dev/null
+++ b/ap-beam_instructions.asciidoc
@@ -0,0 +1,742 @@
+[[AP-Instructions]]
+== Appendix A: BEAM Instructions
+
+Here we will go through most of the instructions in the BEAM
+generic instruction set in detail. In the next section we list
+all instructions with a brief explanation generated from the
+documentaion in the code (see +lib/compiler/src/genop.tab+).
+
+=== Functions and Labels
+
+==== label Lbl
+
+Instruction number 1 in the generic instruction set is not really an
+instruction at all. It is just a module local label giving a name, or
+actually a number to the current position in the code.
+
+Each label potentially marks the beginning of a basic block since
+it is a potential destination of a jump.
+
+==== func_info Module Function Arity
+
+The code for each function starts with a func_info instruction.
+This instruction is used for generating a function clause error,
+and the execution of the code in the function actually starts
+at the label following the func_info instruction.
+
+Imagine a function with a guard:
+
+[source,erlang]
+------------------------------------------
+id(I) when is_integer(I) -> I.
+------------------------------------------
+
+The Beam code for this function might look like:
+
+[source,erlang]
+------------------------------------------
+
+{function, id, 1, 4}.
+  {label,3}.
+    {func_info,{atom,test1},{atom,id},1}.
+  {label,4}.
+    {test,is_integer,{f,3},[{x,0}]}.
+    return.
+
+------------------------------------------
+
+Here the meta information +{function, id, 1, 4}+ tells us that
+execution of the id/1 function will start at label 4. At label 4 we do
+an +is_integer+ on x0 and if we fail we jump to label 3 (f3) which
+points to the func_info instruction, which will generate a _function
+clause_ exception.  Otherwise we just fall through and return the
+argument (x0).
+
+=== Test instructions
+
+==== Type tests
+
+The type test instructions (+is_\* Lbl Argument+) checks whether the
+argument is of the given type and if not jumps to the label Lbl.
+The beam disassembler wraps all these instructions in a  +test+
+instruction. E.g.:
+
+[source,erlang]
+------------------------------------------
+    {test,is_integer,{f,3},[{x,0}]}.
+------------------------------------------
+
+The current type test instructions are is_integer, is_float,
+is_number, is_atom, is_pid, is_reference, is_port, is_nil, is_binary,
+is_list, is_nonempty_list, is_function, is_function2, is_boolean,
+is_bitstr, and is_tuple.
+
+And then there is also one type test instruction of Arity 3:
++test_arity Lbl Arg Arity+. This instruction tests that
+the arity of the argument (assumed to be a tuple) is of +Arity+.
+This instruction is usually preceded by an +is_tuple+
+instruction.
+
+==== Comparisons
+
+The comparison instructions (+is_\* Lbl Arg1 Arg2+) compares the
+two arguments according to the instructions and jumps to Lbl if the
+comparison fails.
+
+The comparison instructions are: : is_lt,  is_ge,  is_eq,  is_ne,
+is_eq_exact, and  is_ne_exact.
+
+Remember that all Erlang terms are ordered so these instructions can
+compare any two terms. You can for example test if the atom +self+
+is less than the pid  returned by +self()+. (It is.)
+
+Note that for numbers the comparison is done on the Erlang type
+_number_, see xref:CH-TypeSystem[].  That is, for a mixed float and
+integer comparison the number of lower precision is converted to the
+other type before comparison. For example on my system 1 and 0.1
+compares as equal, as well as 9999999999999999 and 1.0e16.
+Comparing floating point numbers is always risk and best avoided,
+the result may wary depending on the underlying hardware.
+
+If you want to make sure that the integer 1 and the floating point
+number 1.0 are compared different you can use is_eq_exact and
+is_ne_exact. This corresponds to the Erlang operators +=:=+ and
++=/=+.
+
+=== Function Calls
+
+In this chapter we will summarize what the different call instructions
+does. For a thorough description of how function calls work see
+xref:CH-Calls[].
+
+==== call Arity Label
+
+Does a call to the function of arity +Arity+ in the same
+module at label +Label+. First count down the reductions and
+if needed do a context switch.
+
+For all local calls the label is the second label of the
+function where the code starts. It is assumed that the preceding
+instruction at that label is +func_info+ in order to get the MFA if a
+context switch is needed.
+
+==== call_last Arity Label
+
+Do a tail recursive call the function of arity +Arity+ in the same
+module at label +Label+. First count down the reductions and if needed
+do a context switch.
+
+
+==== call_only Arity Label Deallocate
+
+Deallocate +Deallocate+ words of stack, then do a tail recursive call
+to the function of arity +Arity+ in the same module at label +Label+ First
+count down the reductions and if needed do a context switch.
+
+==== call_ext Arity Destination
+
+Does an external call to the function of arity +Arity+ given by
+ Destination. Destination is usually of the form +{extfunc, Module,
+ Function, Arity}+. First count down the reductions and if needed do a
+ context switch.
+
+==== call_ext_only Arity Destination
+
+Does a tail recursive external call to the function of arity +Arity+ given by
+ Destination. Destination is usually of the form +{extfunc, Module,
+ Function, Arity}+. First count down the reductions and if needed do a
+ context switch.
+
+
+==== call_ext_last Arity Destination Deallocate
+
+Deallocate +Deallocate+ words of stack, then do a tail recursive
+external call to the function of arity +Arity+ given by
+Destination. Destination is usually of the form +{extfunc, Module,
+Function, Arity}+. First count down the reductions and if needed do a
+context switch.
+
+
+==== +bif0 Bif Reg+, +bif1 Lbl Bif Arg Reg+, +bif2 Lbl Bif Arg1 Arg2 Reg+
+
+Call the bif +Bif+ with the given arguments, and store the result in
++Reg+.  If the bif fails jump to +Lbl+. No zero arity bif can fail an
+thus those calls doesn't take a fail label.
+
+// Bif called by these instructions may not allocate on the heap nor
+// trigger a garbage collection.
+
+==== +gc_bif1-3 Lbl Live Bif Arg1-3 Reg+
+
+Call the bif  +Bif+ with the given arguments,  and store the result in Reg.
+If the bif fails jump to Lbl.
+Store the arguments in x(Live),  x(Live+1) and x(live+2).
+
+==== +call_fun Arity+
+
+The instruction +call_fun+ assumes that the arguments are placed in
+the argument registers and that the fun (the pointer to the closure)
+is placed in the last argument register.
+
+That is, for a zero arity call, the closure is in x0. For a arity 1 call
+x0 contains the argument and x1 contains the closure.
+
+==== apply/1
+
+TODO
+
+==== apply_last/2
+
+TODO
+
+=== Stack (and Heap) Management
+
+The stack and the heap of an Erlang process on Beam share the same memory
+area see xref:CH-Processes[] and xref:CH-Memory[] for a full discussion.
+The stack grows toward lower addresses and the heap toward higher addresses.
+Beam will do a garbage collection if more space than what is available is
+needed on either the stack or the heap.
+
+**************************
+
+*A leaf function*:: A leaf function is a function which doesn't call
+                    any other function.
+
+*A non leaf function*:: A non leaf function is a function which may call
+                        another function.
+
+**************************
+
+
+These instructions are also used by non leaf functions for setting up
+and tearing down the stack frame for the current instruction. That is,
+on entry to the function the _continuation pointer_ (CP) is saved on
+the stack, and on exit it is read back from the stack.
+
+A function skeleton for a leaf function looks like this:
+
+----
+{function, Name, Arity, StartLabel}.
+  {label,L1}.
+    {func_info,{atom,Module},{atom,Name},Arity}.
+  {label,L2}.
+    ...
+    return.
+----
+
+
+A function skeleton for a non leaf function looks like this:
+
+----
+{function, Name, Arity, StartLabel}.
+  {label,L1}.
+    {func_info,{atom,Module},{atom,Name},Arity}.
+  {label,L2}.
+    {allocate,Need,Live}.
+
+    ...
+    call ...
+    ...
+
+    {deallocate,Need}.
+    return.
+----
+
+
+
+==== +allocate StackNeed Live+
+
+Save the continuation pointer (CP) and allocate space for +StackNeed+
+extra words on the stack. If a GC is needed during allocation save
++Live+ number of X registers. E.g. if +Live+ is 2 then registers X0
+and X1 are saved.
+
+When allocating on the stack, the stack pointer (E) is decreased.
+
+.Allocate 1 0
+====
+----
+       Before           After
+         | xxx |            | xxx |
+    E -> | xxx |            | xxx |
+         |     |            | ??? | caller save slot
+           ...         E -> | CP  |
+           ...                ...
+ HTOP -> |     |    HTOP -> |     |
+         | xxx |            | xxx |
+----
+====
+
+==== +allocate_heap StackNeed HeapNeed Live+
+
+Save the continuation pointer (CP) and allocate space for +StackNeed+
+extra words on the stack. Ensure that there also is space for HeapNeed
+words on the heap. If a GC is needed during allocation save +Live+
+number of X registers.
+
+Note that the heap pointer (HTOP) is not changed until the actual heap
+allocation takes place.
+
+==== +allocate_zero StackNeed Live+
+
+This instruction works the same way as allocate, but it also clears
+out the allocated stack slots with NIL.
+
+.allocate_zero 1 0
+====
+----
+       Before           After
+         | xxx |            | xxx |
+    E -> | xxx |            | xxx |
+         |     |            | NIL | caller save slot
+           ...         E -> | CP  |
+           ...                ...
+ HTOP -> |     |    HTOP -> |     |
+         | xxx |            | xxx |
+----
+====
+
+==== +allocate_heap_zero StackNeed HeapNeed Live+
+
+The allocate_heap_zero instruction works as the allocate_heap
+instruction, but it also clears out the allocated stack slots
+with NIL.
+
+==== +test_heap HeapNeed Live+
+
+The test_heap instruction ensures there is space for HeapNeed words on
+the heap. If a GC is needed save Live number of X registers.
+
+==== +init N+
+
+The init instruction clears N stack words. By writing NIL to them.
+
+==== +deallocate N+
+
+The deallocate instruction is the opposite of the allocate instruction,
+it restores the continuation pointer and deallocates N+1 stack words.
+
+==== +return+
+The return instructions jumps to the address in the continuation pointer (CP).
+
+==== +trim N Remaining+
+Removes N words of stack usage, while keeping the continuation pointer
+on the top of the stack. (The argument Remaining is to the best of my
+knowledge unused.)
+
+----
+Trim 2
+       Before           After
+         | ??? |            | ??? |
+         | xxx |       E -> | CP  |
+         | xxx |            | ... |
+    E -> | CP  |            | ... |
+         |     |            | ... |
+           ...                ...
+ HTOP -> |     |    HTOP -> |     |
+         | xxx |            | xxx |
+----
+
+
+=== Moving, extracting, modifying.
+==== move Source Destination
+
+Move the source Source (a litteral or a register) to the destination
+register Destination.
+
+==== get_list Source Head Tail
+
+Get the head and tail (or car and cdr) parts of a list (a cons cell) from
+Source and put them into the registers Head and Tail.
+
+==== get_tuple_element Source Element Destination
+
+Get element number Element from the tuple in Source and put it in the
+destination register Destination.
+
+==== set_tuple_element NewElement Tuple Position
+
+Update the element at postition Position of the tuple Tuple with the
+new element NewElement.
+
+=== Building terms.
+
+==== put_list/3
+TODO
+==== put_tuple/2
+TODO
+==== put/1
+
+====  make_fun2/1
+TODO
+
+=== Binary Syntax
+==== bs_put_integer/5
+TODO
+====  bs_put_binary/5
+TODO
+====  bs_put_float/5
+TODO
+====  bs_put_string/2
+TODO
+==== bs_init2/6
+TODO
+==== bs_add/5
+TODO
+==== bs_start_match2/5
+TODO
+==== bs_get_integer2/7
+TODO
+==== bs_get_float2/7
+TODO
+==== bs_get_binary2/7
+TODO
+==== bs_skip_bits2/5
+TODO
+==== bs_test_tail2/3
+TODO
+==== bs_save2/2
+TODO
+==== bs_restore2/2
+TODO
+==== bs_context_to_binary/1
+TODO
+==== bs_test_unit/3
+TODO
+==== bs_match_string/4
+TODO
+==== bs_init_writable/0
+TODO
+==== bs_append/8
+TODO
+==== bs_private_append/6
+TODO
+==== bs_init_bits/6
+TODO
+==== bs_get_utf8/5
+TODO
+==== bs_skip_utf8/4
+TODO
+==== bs_get_utf16/5
+TODO
+==== bs_skip_utf16/4
+TODO
+==== bs_get_utf32/5
+TODO
+==== bs_skip_utf32/4
+TODO
+==== bs_utf8_size/3
+TODO
+==== bs_put_utf8/3
+TODO
+==== bs_utf16_size/3
+TODO
+==== bs_put_utf16/3
+TODO
+==== bs_put_utf32/3
+TODO
+
+=== Floating Point Arithmetic
+==== fclearerror/0
+TODO
+==== fcheckerror/1
+TODO
+==== fmove/2
+TODO
+==== fconv/2
+TODO
+==== fadd/4
+TODO
+==== fsub/4
+TODO
+==== fmul/4
+TODO
+==== fdiv/4
+TODO
+==== fnegate/3
+TODO
+
+
+=== Pattern Matching
+
+==== select_val
+TODO
+==== select_arity_val
+TODO
+==== jump
+TODO
+=== Exception handling
+==== catch/2
+TODO
+==== catch_end/1
+TODO
+==== badmatch/1
+TODO
+==== if_end/0
+TODO
+==== case_end/1
+TODO
+
+=== Meta instructions
+==== on_load
+TODO
+==== line
+TODO
+
+
+include::opcodes_doc.asciidoc[]
+
+=== Specific Instructions
+
+Argument types 
+[options="header"]
+|==================================================
+|Type | Explanation
+|t|	A term, e.g. +[{foo,bar}]+
+|I|	An integer e.g. +42+
+|x|	A register, e.g. +5+
+|y|	A stack slot, e.g. +1+
+|c|	A constant (atom,nil,small int) // Pid?
+|a|	An atom, e.g. 'foo'
+|f|	A label, i.e. a code address
+|s|	Either a literal, a register or a stack slot
+|d|	Either a register or a stack slot
+|r|	A register R0
+|P|	A positive (unsigned) integer literal
+|j|	An optional code label
+|e|	A reference to an export table entry
+|l|	A floating-point register
+|==================================================
+
+List of all BEAM Instructions
+
+[options="header"]
+|===========
+|Instruction        | Arguments | Explanation 
+|allocate           | t t       | 
+|allocate_heap      |t I t      | 
+|deallocate         |I | 
+|init               |y | 
+|init2              |y y | 
+|init3              |y y y | 
+|i_trim             |I | 
+|test_heap          |I t | 
+|allocate_zero      |t t | 
+|allocate_heap_zero | t I t | 
+|i_select_val | r f I | 
+|i_select_val | x f I | 
+|i_select_val | y f I | 
+|i_select_val2 | r f c f c f | 
+|i_select_val2 | x f c f c f | 
+|i_select_val2 | y f c f c f | 
+|i_jump_on_val | rxy f I I | 
+|i_jump_on_val_zero | rxy f I | 
+|i_select_tuple_arity | r f I | 
+|i_select_tuple_arity | x f I | 
+|i_select_tuple_arity | y f I | 
+|i_select_tuple_arity2 | r f A f A f | 
+|i_select_tuple_arity2 | x f A f A f | 
+|i_select_tuple_arity2 | y f A f A f | 
+|i_func_info | I a a I | 
+|return             |          | 
+|get_list           | rxy rxy rxy | 
+|catch              | y f | 
+|catch_end          | y | 
+|try_end | y | 
+|try_case_end | s | 
+|set_tuple_element | s d P | 
+|i_get_tuple_element | rxy P rxy | 
+|is_number | f rxy | 
+|jump | f | 
+|case_end | rxy | 
+|badmatch | rxy | 
+|if_end             |              | 
+|raise | s s | 
+|badarg | j | 
+|system_limit | j | 
+|move_jump | f ncxy | 
+|move_x1 | c | 
+|move_x2 | c | 
+|move2 | x y x y | 
+|move2 | y x y x | 
+|move2 | x x x x | 
+|move | rxync rxy | 
+|recv_mark | f | 
+|i_recv_set | f | 
+|remove_message | | 
+|timeout | |
+|timeout_locked | | 
+|i_loop_rec | f r | 
+|loop_rec_end | f | 
+|wait | f | 
+|wait_locked | f | 
+|wait_unlocked | f | 
+|i_wait_timeout | f I | 
+|i_wait_timeout | f s | 
+|i_wait_timeout_locked | f I | 
+|i_wait_timeout_locked | f s | 
+|i_wait_error | |
+|i_wait_error_locked | | 
+|send |  |
+|i_is_eq_exact_immed | f rxy c | 
+|i_is_ne_exact_immed | f rxy c | 
+|i_is_eq_exact_literal | f rxy c | 
+|i_is_ne_exact_literal | f rxy c | 
+|i_is_eq_exact | f | 
+|i_is_ne_exact | f | 
+|i_is_lt | f | 
+|i_is_ge | f | 
+|i_is_eq | f | 
+|i_is_ne | f | 
+|i_put_tuple | rxy I | 
+|put_list | s s d | 
+|i_fetch | s s | 
+|move_return | xcn r | 
+|move_deallocate_return | xycn r Q | 
+|deallocate_return | Q | 
+|test_heap_1_put_list | I y | 
+|is_tuple_of_arity | f rxy A | 
+|is_tuple | f rxy | 
+|test_arity | f rxy A | 
+|extract_next_element | xy | 
+|extract_next_element2 | xy | 
+|extract_next_element3 | xy | 
+|is_integer_allocate | f rx I I | 
+|is_integer | f rxy | 
+|is_list | f rxy | 
+|is_nonempty_list_allocate | f rx I t | 
+|is_nonempty_list_test_heap | f r I t | 
+|is_nonempty_list | f rxy | 
+|is_atom | f rxy | 
+|is_float | f rxy | 
+|is_nil | f rxy | 
+|is_bitstring | f rxy | 
+|is_reference | f rxy | 
+|is_pid | f rxy | 
+|is_port | f rxy | 
+|is_boolean | f rxy | 
+|is_function2 | f s s | 
+|allocate_init | t I y | 
+|i_apply | | 
+|i_apply_last | P | 
+|i_apply_only |   | 
+|apply | I | 
+|apply_last | I P | 
+|i_apply_fun |  |
+|i_apply_fun_last | P | 
+|i_apply_fun_only |   | 
+|i_hibernate |  |
+|call_bif0 | e | 
+|call_bif1 | e | 
+|call_bif2 | e | 
+|call_bif3 | e | 
+|i_get | s d | 
+|self | rxy | 
+|node | rxy | 
+|i_fast_element | rxy j I d | 
+|i_element | rxy j s d | 
+|bif1 | f b s d | 
+|bif1_body | b s d | 
+|i_bif2 | f b d | 
+|i_bif2_body | b d | 
+|i_move_call | c r f | 
+|i_move_call_last | f P c r | 
+|i_move_call_only | f c r | 
+|move_call | xy r f | 
+|move_call_last | xy r f Q | 
+|move_call_only | x r f | 
+|i_call | f | 
+|i_call_last | f P | 
+|i_call_only | f | 
+|i_call_ext | e | 
+|i_call_ext_last | e P | 
+|i_call_ext_only | e | 
+|i_move_call_ext | c r e | 
+|i_move_call_ext_last | e P c r | 
+|i_move_call_ext_only | e c r | 
+|i_call_fun | I | 
+|i_call_fun_last | I P | 
+|i_make_fun | I t | 
+|is_function | f rxy | 
+|i_bs_start_match2 | rxy f I I d | 
+|i_bs_save2 | rx I | 
+|i_bs_restore2 | rx I | 
+|i_bs_match_string | rx f I I | 
+|i_bs_get_integer_small_imm | rx I f I d | 
+|i_bs_get_integer_imm | rx I I f I d | 
+|i_bs_get_integer | f I I d | 
+|i_bs_get_integer_8 | rx f d | 
+|i_bs_get_integer_16 | rx f d | 
+|i_bs_get_integer_32 | rx f I d | 
+|i_bs_get_binary_imm2 | f rx I I I d | 
+|i_bs_get_binary2 | f rx I s I d | 
+|i_bs_get_binary_all2 | f rx I I d | 
+|i_bs_get_binary_all_reuse | rx f I | 
+|i_bs_get_float2 | f rx I s I d | 
+|i_bs_skip_bits2_imm2 | f rx I | 
+|i_bs_skip_bits2 | f rx rxy I | 
+|i_bs_skip_bits_all2 | f rx I | 
+|bs_test_zero_tail2 | f rx | 
+|bs_test_tail_imm2 | f rx I | 
+|bs_test_unit | f rx I | 
+|bs_test_unit8 | f rx | 
+|bs_context_to_binary | rxy | 
+|i_bs_get_utf8 | rx f d | 
+|i_bs_get_utf16 | rx f I d | 
+|i_bs_validate_unicode_retract | j | 
+|i_bs_init_fail | rxy j I d | 
+|i_bs_init_fail_heap | I j I d | 
+|i_bs_init | I I d | 
+|i_bs_init_heap | I I I d | 
+|i_bs_init_heap_bin | I I d | 
+|i_bs_init_heap_bin_heap | I I I d | 
+|i_bs_init_bits_fail | rxy j I d | 
+|i_bs_init_bits_fail_heap | I j I d | 
+|i_bs_init_bits | I I d | 
+|i_bs_init_bits_heap | I I I d | 
+|i_bs_add | j I d | 
+|i_bs_init_writable | |
+|i_bs_append | j I I I d | 
+|i_bs_private_append | j I d | 
+|i_new_bs_put_integer | j s I s | 
+|i_new_bs_put_integer_imm | j I I s | 
+|i_bs_utf8_size | s d | 
+|i_bs_utf16_size | s d | 
+|i_bs_put_utf8 | j s | 
+|i_bs_put_utf16 | j I s | 
+|i_bs_validate_unicode | j s | 
+|i_new_bs_put_float | j s I s | 
+|i_new_bs_put_float_imm | j I I s | 
+|i_new_bs_put_binary | j s I s | 
+|i_new_bs_put_binary_imm | j I s | 
+|i_new_bs_put_binary_all | j s I | 
+|bs_put_string | I I | 
+|fmove | qdl ld | 
+|fconv | d l | 
+|i_fadd | l l l | 
+|i_fsub | l l l | 
+|i_fmul | l l l | 
+|i_fdiv | l l l | 
+|i_fnegate | l l l | 
+|i_fcheckerror | |
+|fclearerror |  |
+|i_increment | rxy I I d | 
+|i_plus | j I d | 
+|i_minus | j I d | 
+|i_times | j I d | 
+|i_m_div | j I d | 
+|i_int_div | j I d | 
+|i_rem | j I d | 
+|i_bsl | j I d | 
+|i_bsr | j I d | 
+|i_band | j I d | 
+|i_bor | j I d | 
+|i_bxor | j I d | 
+|i_int_bnot | j s I d | 
+|i_gc_bif1 | j I s I d | 
+|i_gc_bif2 | j I I d | 
+|i_gc_bif3 | j I s I d | 
+|int_code_end | |
+|label | L | 
+|line | I | 
+|============================= 
diff --git a/beam_instructions.asciidoc b/beam_instructions.asciidoc
new file mode 100755
index 0000000..b010997
--- /dev/null
+++ b/beam_instructions.asciidoc
@@ -0,0 +1,503 @@
+
+[[CH-Instructions]]
+== Generic BEAM Instructions (25p)
+
+Beam has two different instructions sets, an internal instructions
+set, called _specific_, and an external instruction set, called
+_generic_.
+
+The generic instruction set is what could be called the official
+instruction set, this is the set of instructions used by both the
+compiler and the Beam interpreter. If there was an official Erlang
+Virtual Machine specification it would specify this
+instruction set. If you want to write your own compiler to the Beam,
+this is the instruction set you should target. If you want to write
+your own EVM this is the instruction set you should handle.
+
+The external instruction set is quite stable, but it does change
+between Erlang versions, especially between major versions.
+
+This is the instruction set which we will cover in this chapter.
+
+The other instruction set, the specific, is an optimized instruction
+set used by the Beam to implement the external instruction set.  To
+give you an understanding of how the Beam works we will cover this
+instruction set in xref:CH-Internal_instructions[]. The internal
+instruction set can change without warning between minor version or
+even in patch releases. Basing any tool on the internal instruction
+set is risky.
+
+In this chapter I will go through the general syntax for the
+instructions and some instruction groups in detail, a complete list of
+instructions with short descriptions can be found in
+xref:AP-Instructions[].
+
+=== Instruction definitions
+
+The names and opcodes of the generic instructions are defined
+in +lib/compiler/src/genop.tab+.
+
+The file contains a version number for the Beam instruction format, which
+also is wirtten to +.beam+ files. This number has so far never changed
+and is still at version 0. If the external format would be changed in a
+non backwards compatible way this number would be changed.
+
+The file genop.tab is used as input by +beam_makeops+ which is a perl script
+which generate code from the ops tabs. The generator is used both to generate
+Erlang code for the compiler (beam_opcodes.hrl and beam_opcodes.erl) and to
+generate C code for the emulator ( TODO: Filenames).
+
+Any line in the file starting with "#" is a comment and ignored by
++beam_makeops+. The file can contain definitions, which turns into a
+binding in the perl script, of the form:
+
+ NAME=EXPR
+
+Like, e.g.:
+----
+BEAM_FORMAT_NUMBER=0
+----
+The Beam format number is the same as the +instructionset+ field in
+the external beam format. It is only bumped when a backwards
+icnompatible change to the instruction set is made.
+
+The main content of the file are the opcode definitions of the form:
+----
+OPNUM: [-]NAME/ARITY
+----
+Where OPNUM and ARITY are integers, NAME is an identifier starting
+with a lowercase letter (a-z), and _:_, _-_, and _/_ are litterals.
+
+For example:
+----
+1: label/1
+----
+
+The minus sign (-) indicates a depricated function. A depricated
+function keeps its opcode in order for the loader to be sort of
+backwards compatible (it will recognize depricated instructions and
+refuse to load the code).
+
+In the rest of this Chapter we will go through some BEAM instructions
+in detail. For a full list with brief descriptions see:
+xref:AP-Instructions[].
+
+=== BEAM code listings
+As we saw in xref:CH-Compiler[] we can give the option 'S' to the
+Erlang compiler to get a +.S+ file with the BEAM code for the module
+in a human and machine readable format (actually as Erlang terms).
+
+Given the file beamexample1.erl:
+
+++++
+-module(beamexample1).
+
+-export([id/1]).
+
+id(I) when is_integer(I) -> I.
+++++
+
+When compiled with +erlc -S beamexample.erl+ we get the following
+beamexmaple.S file:
+
+++++
+{module, beamexample1}.  %% version = 0
+
+{exports, [{id,1},{module_info,0},{module_info,1}]}.
+
+{attributes, []}.
+
+{labels, 7}.
+
+
+{function, id, 1, 2}.
+  {label,1}.
+    {line,[{location,"beamexample1.erl",5}]}.
+    {func_info,{atom,beamexample1},{atom,id},1}.
+  {label,2}.
+    {test,is_integer,{f,1},[{x,0}]}.
+    return.
+
+
+{function, module_info, 0, 4}.
+  {label,3}.
+    {line,[]}.
+    {func_info,{atom,beamexample1},{atom,module_info},0}.
+  {label,4}.
+    {move,{atom,beamexample1},{x,0}}.
+    {line,[]}.
+    {call_ext_only,1,{extfunc,erlang,get_module_info,1}}.
+
+
+{function, module_info, 1, 6}.
+  {label,5}.
+    {line,[]}.
+    {func_info,{atom,beamexample1},{atom,module_info},1}.
+  {label,6}.
+    {move,{x,0},{x,1}}.
+    {move,{atom,beamexample1},{x,0}}.
+    {line,[]}.
+    {call_ext_only,2,{extfunc,erlang,get_module_info,2}}.
+++++
+
+In addition to the actual beam code for the integer identity
+function we also get some meta instructions.
+
+The first line +{module, beamexample1}.  %% version = 0+ tells
+us the module name "beamexample1" and the version number for
+the instruction set "0".
+
+Then we get a list of exported functions "id/1, module_info/0,
+module_info/1". As we can see the compiler has added two auto
+generated functions to the code. These two functions are just
+dispatchers to the generic module info BIFs (erlang:module_info/1 and
+erlang:module_info/2) with the name of the module added as the first
+argument.
+
+The line {attributes, []} list all defined compiler attributes, none in
+our case.
+
+Then we get to know that there are less than 7 labels in the module,
++{labels, 7}+, which makes it easy to do code loading in one pass.
+
+The last type of meta instruction is the +function+ instruction on
+the format +{function, Name, Arity, StartLabel}+. As we can see with
+the +id+ function the start label is actually the second label in the
+code of the function.
+
+The instruction +{label, N}+ is not really an instruction, it does not
+take up any space in memory when loaded. It is just to give a local
+name (or number) to a position in the code. Each label potentially
+marks the beginning of a basic block since it is a potential
+destination of a jump.
+
+The first two instructions following the first label (+{label,1}+)
+are actually error generating code which adds the line number and
+module, function and arity information and throws an exception.
+That are the instructions +line+ and +func_info+.
+
+The meat of the function is after +{label,2}+, the instruction
++{test,is_integer,{f,1},[{x,0}]}+. The test instruction tests if its
+arguments (in the list at the end, that is variable {x,0} in this
+case) fulfills the test, in this case is an integer (is_integer).
+If the test succeeds the next instruction (+return+) is executed.
+Otherwise the functions fails to label 1 (+{f,1}+), that is,
+execution continues at label one where a function clause exception
+is thrown.
+
+The other two functions in the file are auto generated. If we look at
+the second function the instruction +{move,{x,0},{x,1}}+ moves the
+argument in register x0 to the second argument register x1.  Then the
+instruction +{move,{atom,beamexample1},{x,0}}+ moves the module name
+atom to the first argument register x1. Finally a tail call is made to
++erlang:get_module_info/2+ 
+(+{call_ext_only,2,{extfunc,erlang,get_module_info,2}}+). As we will
+see in the next section there are several different call instructions.
+
+=== Calls
+
+As we have seen in xref:CH-Calls[] there are several different types
+of calls in Erlang. To distinguish between local and remote calls
+in the instruction set, remote calls have +_ext+ in their instruction
+names. Local calls just have a label in the code of the module, while
+remote calls takes a destination of the form +{extfunc, Module, Function,
+Arity}+.
+
+To distinguish between ordinary (stack building) calls and
+tail-recursive calls, the latter have either +_only+ or +_last+ in
+their name. The variant with +_last+ will also deallocate as many
+stack slot as given by the last argument.
+
+There is also a +call_fun Arity+ instruction that calls the closure
+stored in register {x, Arity}. The arguments are stored in x0 to {x,
+Arity-1}.
+
+For a full listing of all types of call instructions see
+xref:AP-Instructions[].
+
+=== Stack (and Heap) Management
+
+The stack and the heap of an Erlang process on Beam share the same memory
+area see xref:CH-Processes[] and xref:CH-Memory[] for a full discussion.
+The stack grows toward lower addresses and the heap toward higher addresses.
+Beam will do a garbage collection if more space than what is available is
+needed on either the stack or the heap.
+
+**************************
+
+*A leaf function*:: A leaf function is a function which doesn't call
+                    any other function.
+
+*A non leaf function*:: A non leaf function is a function which may call
+                        another function.
+
+**************************
+
+
+On entry to a non leaf function the _continuation pointer_ (CP) is saved on
+the stack, and on exit it is read back from the stack. This is done by the
++allocate+ and +deallocate+ instructions, which are used for setting up
+and tearing down the stack frame for the current instruction.
+
+A function skeleton for a leaf function looks like this:
+
+++++
+{function, Name, Arity, StartLabel}.
+  {label,L1}.
+    {func_info,{atom,Module},{atom,Name},Arity}.
+  {label,L2}.
+    ...
+    return.
+++++
+
+
+A function skeleton for a non leaf function looks like this:
+
+++++
+{function, Name, Arity, StartLabel}.
+  {label,L1}.
+    {func_info,{atom,Module},{atom,Name},Arity}.
+  {label,L2}.
+    {allocate,Need,Live}.
+
+    ...
+    call ...
+    ...
+
+    {deallocate,Need}.
+    return.
+++++
+
+The instruction +allocate StackNeed Live+ saves the continuation
+pointer (CP) and allocate space for +StackNeed+ extra words on the
+stack. If a GC is needed during allocation save +Live+ number of X
+registers. E.g. if +Live+ is 2 then registers X0 and X1 are saved.
+
+When allocating on the stack, the stack pointer (E) is decreased.
+
+.Allocate 1 0
+++++
+       Before           After
+         | xxx |            | xxx |
+    E -> | xxx |            | xxx |
+         |     |            | ??? | caller save slot
+           ...         E -> | CP  |
+           ...                ...
+ HTOP -> |     |    HTOP -> |     |
+         | xxx |            | xxx |
+++++
+
+For a full listing of all types of allocate and deallocate
+instructions see xref:AP-Instructions[].
+
+
+=== Message Passing
+
+Sending a message is straight forward in beam code. You just use the
++send+ instruction. Note though that the send instruction does not
+take any arguments, it is more like a function call. It assumes that
+the arguments (the destination and the message) are in the argument
+registers X0 and X1. The message is also copied from X1 to X0.
+
+Receiving a message  is a bit more complicated since  it involves both
+selective receive with pattern  matching and introduces a yield/resume
+point within  a function  body. (There  is also  a special  feature to
+minimize message queue scanning using refs, more on that later.)
+
+==== A Minimal Receive Loop
+
+A minimal receive loop, which accepts any message and has no timeout
+(e.g. +receive _ -> ok end+) looks like this in BEAM code:
+
+++++
+<figure id="simple_receive">
+<title>A simple receive loop.</title>
+<programlisting>
+
+  L1: loop_rec L2 x0
+      remove_message
+      ...
+      jump L3
+
+  L2: wait L1
+
+  L3: ...
+</programlisting>
+</figure>
+++++
+
+The +loop_rec L2 x0+ instruction first checks if there is any message
+in the message queue. If there are no messages execution jumps to L2,
+where the process will be suspended waiting for a message to arrive.
+
+If there is a message in the message queue the +loop_rec+ instruction
+also moves the message from the _m-buf_ to the process heap. See
+xref:CH-Memory[] and xref:CH-Processes[] for details of the m-buf
+handling.
+
+For code like +receive _ -> ok end+, where we accept any messages,
+there is no pattern matching needed, we just do a +remove_message+
+which unlinks the next message from the message queue and stores a
+pointer in X0. (It also removes any timeout, more on this soon.)
+
+==== A Selective Receive Loop
+
+For a selective receive like e.g.  +receive [] -> ok end+ we will
+loop over the message queue to check if any message in the queue
+matches.
+
+++++
+<figure id="selective_receive">
+<title>A selective receive loop.</title>
+<programlisting>
+
+  L2: loop_rec L4  X0
+      test is_nil L3 X0
+      remove_message
+      move {atom,ok} X0
+      return
+  L3: loop_rec_end L2
+  L4: wait L2
+
+</programlisting>
+</figure>
+++++
+
+In this case we do a pattern match for Nil after the loop_rec
+instruction if there was a message in the mailbox. If the message
+doesn't match we end up at L3 where the instruction +loop_rec_end+
+advances the save pointer to the next message (+p->msg.save =
+&(*p->msg.save)->next+) and jumps back to L2.
+
+If there are no more messages in the message queue the process is
+suspended by the +wait+ instruction at L4 with the save pointer pointing
+to the end of the message queue. When the processes is rescheduled
+it will only look at new messages in the message queue (after the save
+point).
+
+==== A Receive Loop With a Timeout
+
+If we add a timeout to our selective receive the wait instruction is
+replaced by a wait_timeout instruction followed by a timeout
+instruction and the code following the timeout.
+
+
+++++
+<figure id="timeout_receive">
+<title>A receive loop with a timeout.</title>
+<programlisting>
+
+  L6: loop_rec L8 X0
+      test is_nil L7 X0
+      remove_message
+      move {atom,ok} X0
+      return
+  L7: loop_rec_end L6
+  L8: wait_timeout L6 {integer,1000}
+      timeout
+      move {atom,done} X0
+      return
+
+</programlisting>
+</figure>
+++++
+
+The +wait_timeout+ instructions sets up a timeout timer with the given
+time (1000 ms in our example) and it also saves the address of the
+next instruction (the +timeout+) in +p->def_arg_reg[0]+ and then
+when the timer is set,  +p->i+ is set to point to def_arg_reg.
+
+This means that if no matching message arrives while the process is
+suspended a timeout will be triggered after 1 second and execution for
+the process will continue at the timeout instruction.
+
+Note that if a message that doesn't match arrives in the mailbox, the
+process is scheduled for execution and will run the pattern matching
+code in the receive loop, but the timeout will not be canceled. It is
+the +remove_message+ code which also removes any timeout timer.
+
+The +timeout+ instruction resets the save point of the mailbox to the
+first element in the queue, and clears the timeout flag (F_TIMO) from
+the PCB.
+
+==== The Synchronous Call Trick (aka The Ref Trick)
+
+We have now come to the last version of our receive loop, where we
+use the ref trick alluded to earlier to avoid a long message box scan.
+
+A common pattern in Erlang code is to implement a type of "remote
+call" with send and a receive between two processes. This is for
+example used by gen_server. This code is often hidden behind a library
+of ordinary function calls. E.g., you call the function
++counter:increment(Counter)+ and behind the scene this turns into
+something like +Counter ! {self(), inc}, receive {Counter, Count} ->
+Count end+.
+
+This is usually a nice abstraction to encapsulate state in a
+process. There is a slight problem though when the mailbox of the
+calling process has many messages in it. In this case the receive will
+have to check each message in the mailbox to find out that no message
+except the last matches the return message.
+
+This can quite often happen if you have a server that receives many
+messages and for each message does a number of such remote calls, if
+there is no back throttle in place the servers message queue will
+fill up.
+
+To remedy this there is a hack in ERTS to recognize this pattern and
+avoid scanning the whole message queue for the return message.
+
+The compiler recognizes code that uses a newly created reference (ref)
+in a receive (see xref:ref_trick_code[]), and emits code to avoid the
+long inbox scan since the new ref can not already be in the inbox.
+
+++++
+<figure id="ref_trick_code">
+<title>The Ref Trick Pattern.</title>
+<programlisting source="Erlang">
+
+  Ref = make_ref(),
+  Counter ! {self(), inc, Ref},
+  receive
+    {Ref, Count} -> Count
+  end.
+</programlisting>
+</figure>
+++++
+
+This gives us the following skeleton for a complete receive, see
+xref:ref_receive[].
+
+++++
+<figure id="ref_receive">
+<title>A receive with the ref_trick.</title>
+<programlisting>
+     ...
+     recv_mark L11
+     call_ext 0 {extfunc,erlang,make_ref,0}
+     ...
+
+     recv_set L11
+L11: loop_rec L13 x0
+     test is_eq_exact L12 [X0, Y0]
+     remove_message
+     ...
+
+L12: loop_rec_end L11
+L13: wait_timeout L11 {integer,1000}
+     timeout.
+     ...
+
+</programlisting>
+</figure>
+++++
+
+The +recv_mark+ instruction saves the current position (the end
++msg.last+) in +msg.saved_last+ and the address of the label
+in +msg.mark+
+
+The +recv_set+ instruction checks that +msg.mark+ points to the next
+instruction and in that case moves the save point (+msg.save+) to the
+last message received before the creation of the ref
+(+msg.saved_last+). If the mark is invalid ( i.e. not equal to
++msg.save+) the instruction does nothing.
diff --git a/beam_internal_instructions.asciidoc b/beam_internal_instructions.asciidoc
new file mode 100755
index 0000000..0c7a940
--- /dev/null
+++ b/beam_internal_instructions.asciidoc
@@ -0,0 +1,4 @@
+
+[[CH-Internal-instructions]]
+== BEAM Internal Instructions
+
diff --git a/book.asciidoc b/book.asciidoc
index 6748e02..f372d4c 100644
--- a/book.asciidoc
+++ b/book.asciidoc
@@ -10,23 +10,23 @@ include::compiler.asciidoc[]
 
 include::processes.asciidoc[]
 
-// include::type_system.asciidoc[]
+include::type_system.asciidoc[]
 
 // include::beam.asciidoc[]
 
 // include::beam_modules.asciidoc[]
 
-// include::beam_instructions.asciidoc[]
+include::beam_instructions.asciidoc[]
 
-// include::calls.asciidoc[]
+include::calls.asciidoc[]
 
 // include::beam_loader.asciidoc[]
 
-// include::beam_internal_instructions.asciidoc[]
+include::beam_internal_instructions.asciidoc[]
 
 // include::scheduling.asciidoc[]
 
-// include::memory.asciidoc[]
+include::memory.asciidoc[]
 
 // include::data_structures.asciidoc[]
 
@@ -46,12 +46,12 @@ include::processes.asciidoc[]
 
 // include::tweak.asciidoc[]
 
-// // Appendix
+[appendix]
 
 // include::index.asciidoc[]
 
 
-// include::ap-beam_instructions.asciidoc[]
+include::ap-beam_instructions.asciidoc[]
 
 // include::ap-code_listings.asciidoc[]
 
diff --git a/calls.asciidoc b/calls.asciidoc
new file mode 100755
index 0000000..b065b70
--- /dev/null
+++ b/calls.asciidoc
@@ -0,0 +1,149 @@
+
+[[CH-Calls]]
+== Different Types of Calls, Linking and Hot Code Loading (5p)
+
+****
+Local calls, remote calls, closure calls, tuple calls, p-mod
+calls. The code server. Linking. Hot code loading, purging. (Overlap
+with Chapter 4, have to see what goes where.) Higher order functions,
+Implementation of higher order functions. Higher order functions and
+hot code loading. Higher order functions in a distributed system.
+****
+
+=== Hot Code Loading
+
+In Erlang there is a semantic difference between a local function call
+and a remote function call. A remote call, that is a call to a
+function in a named module, is guaranteed to go to the latest loaded
+version of that module. A local call, a unqualified call to a function
+within the same module, is guaranteed to go to the same version
+of the code as the caller.
+
+A call to a local function can be turned into a remote call by
+specifying the module name at the call site. This is usually
+done with the ?MODULE macro as in +?MODULE:foo()+.
+A remote call to a non local module can not be turned into
+a local call, i.e. there is no way to guarantee the version
+of the callee in the caller.
+
+This is an important feature of Erlang which makes _hot code loading_
+or _hot upgrades_ possible. Just make sure you have a remote
+call somewhere in your server loop and you can then load new code
+into the system while it is running; when execution reaches the
+remote call it will switch to executing the new code.
+
+A common way of writing server loops is to have a local call
+for the main loop and a code upgrade handler which does
+a remote call and possibly a state upgrade:
+
+[source,erlang]
+------------------------------------------
+loop(State) ->
+  receive
+    upgrade -> 
+       NewState = ?MODULE:code_upgrade(State),
+       ?MODULE:loop(NewState);
+     Msg -> 
+       NewState = handle_msg(Msg, State),
+       loop(NewState)
+   end.
+
+------------------------------------------
+
+With this construct, which is basically what gen_server uses,
+the programmer has control over when and how a code upgrade is done.
+
+The hot code upgrade is one of the most important features of Erlang
+which makes it possible to write servers that operates 24/7 year out
+and year in. It is also one of the main reasons why Erlang is
+dynamically typed. It is very hard in a statically typed language to
+give type for the code_upgrade function. (It is also hard to give the
+type of the loop function) These types will change in the future as
+the type of State changes to handle new features.
+
+For a language implementer concerned with performance, the hot code
+loading functionality is a burden though. Since each call to or from a
+remote module can change to new code in the future it is very hard to
+do whole program optimization across module boundaries. (Hard but not
+impossible, there are solutions but so far I have not seen one fully
+implemented.)
+
+=== Code Loading
+
+++++
+<!--
+Shouldn't Code Loading come before Hot Code Loading? Or are the two topics not related in that way? - bmacdonald
+-->
+++++
+
+
+In the Erlang Runtime System the code loading is handled by the
+code server. The code server will call the lover level bifs in the
++erlang+ module for the actual loading. But the code server also
+determines the purging policy.
+
+The runtime system can keep two versions of each module, a _current_
+version and an _old_ version. All fully qualified (remote) calls goes
+to the current version. Local calls in the old version and return
+addresses on the stack can still go to the old version.
+
+If a third version of a module is loaded and there still are processes
+running (have pointers on the stack to) old code the code server
+will kill does processes and purge the old code. Then the current
+version will become old and the third version will be loaded as the
+current version.
+
+
+//[[CH-Beam_loader]]
+// === The BEAM Loader
+
+// Translation to internal format.
+//   Optimizations.
+//   Rewrites
+//   Peephole optimisztions
+//   pack engine?
+//
+// ops.tab format/syntax
+//  Catches
+// Linking and Exports
+
+
+=== Transforming from Generic to Specific instructions
+
+The BEAM loader does not just take the external beam format and writes
+it to memory. It also does a number of transformations on the code
+and translates from the external (generic) format to the internal
+(specific) format.
+
+The code for the loader can be found in +beam_load.c+ (in
++erts/emulator/+) but most of the logic for the translations are in
+the file +ops.tab+ (in the same directory).
+
+The first step of the loader is to parse beam file, basically the same
+work as we did in Erlang in xref:CH-beam_modules[] but written in C.
+
+Then the rules in ops.tab are applied to instructions in the code
+chunk to translate the generic instruction to one or more specific
+instructions.
+
+The translation table works through pattern matching. Each line in the
+file defines a pattern of one or more generic instructions with
+arguments and optionally an arrow followed by one or more instructions
+to translate to.
+
+The transformations in ops tab tries to handle patterns of
+instructions generated by the compiler and peephole optimize them to
+fewer specific instructions. The ops tab transformations tries to
+generate jump tables for patterns of selects.
+
+The file ops.tab is not parsed at runtime, instead a pattern matching
+program is generated from ops.tab and stored in an array in a
+generated C file. The perl script +beam_makeops+ (in
++erts/emulator/utils+) generates a target specific set of opcodes and
+translation programs in the files +beam_opcodes.h+ and
++beam_opcodes.c+ (these files end up in the given target directory
+e.g. +erts/emulator/x86_64-unknown-linux-gnu/opt/smp/+).
+
+The same program (beam_makeops) also generates the Erlang code for the
+compiler back end +beam_opcodes.erl+.
+
diff --git a/code/book/src/generate_op_doc.erl b/code/book/src/generate_op_doc.erl
new file mode 100755
index 0000000..7ff8ce0
--- /dev/null
+++ b/code/book/src/generate_op_doc.erl
@@ -0,0 +1,120 @@
+-module(generate_op_doc).
+
+-export([docbook/2, from_shell/1]).
+
+-record(op, {name="", arity="", opcode, doc="", spec="", deprecated=false}).
+
+from_shell([In, Out]) ->
+    docbook(atom_to_list(In), atom_to_list(Out)),
+    halt().
+
+docbook(InFile, OutFile) ->
+    {ok, File} = file:open(InFile, [read]),
+    Ops = parse(File),
+    Doc = docbook_format(Ops),
+    file:write_file(OutFile, Doc).
+
+
+
+docbook_format(Ops) ->
+    docbook_format_line(lists:reverse(lists:keysort(2,Ops)))
+        ++
+        "|=================================================\n".
+
+docbook_format_line([#op{name=Name,
+                        arity=Arity,
+                        opcode=Opcode,
+                        doc=Doc,
+                        spec=Spec,
+                        deprecated=Deprecated}|
+                        Prev]) ->
+    docbook_format_line(Prev) ++
+        "|" ++ format_name(Name, Deprecated) ++
+        "|" ++ strip(Arity) ++
+        "|" ++ format_opcode(Opcode, Deprecated) ++
+        "|" ++ format_spec(Spec, Deprecated) ++
+        "|" ++ strip(Doc) ++ "\n";
+docbook_format_line([]) ->
+    "=== Generic Instructions\n" ++ 
+    "[options=" ++ [$"] ++ "header" ++ [$"] ++ "]\n"
+        "|=================================================\n" ++
+        "| Name | Arity | Op Code | Spec | Documentation\n".
+
+
+format_name(Name, Deprecated) ->
+    if
+        Deprecated -> "[line-through]#" ++ strip(Name) ++ "#";
+        true -> strip(Name)
+    end.
+
+format_spec([Name|Args], false) ->
+    "*"++strip(Name)++"*" ++ " "
+        ++ string:join([format_arg(A) || A<-Args], ", ");
+format_spec(_, true) -> "*DEPRECATED*";
+format_spec([], _) -> "".
+
+format_arg(A) -> "_"++strip(A)++"_".
+
+format_opcode(undefined, _Deprecated) ->
+    "";
+format_opcode(Opcode, Deprecated) ->
+    if
+        Deprecated -> "(";
+        true -> ""
+    end ++
+        strip(Opcode) ++
+        if
+            Deprecated -> ")";
+            true -> ""
+        end.
+
+
+
+strip(S) ->
+ [ esacpe(Char)
+  || Char <- string:strip(S, right, 10)].
+
+esacpe(Char) ->
+    case Char of
+        $| -> "\|";
+        $\n -> " ";
+        _ -> Char
+    end.
+
+parse(File) ->
+    parse(File, #op{}, []).
+
+parse(File, Op, Ops) ->
+    case file:read_line(File) of
+        {ok, Line} ->
+            {NewOp, NewOps} = parse_line(Line, Op, Ops),
+            parse(File,  NewOp, NewOps);
+        eof -> case Op#op.name of
+                   "" -> Ops;
+                   _ -> [Op|Ops]
+               end
+    end.
+
+parse_line("##" ++ Rest, Op, Ops) ->
+    {parse_doc(Rest, Op), Ops};
+parse_line("#" ++ _, Op, Ops) -> {Op, Ops};
+parse_line([N|_]=Line, Op, Ops) when N >= $0, N =< $9 ->
+    [OpNo, NA] = string:tokens(Line, ":"),
+    [Name, Arity] = string:tokens(string:strip(NA, left, $ ), "/"),
+    NewOp =
+        case Name of
+            "-" ++ OpName ->
+                Op#op{name=OpName, deprecated=true};
+            _ ->
+                Op#op{name=Name}
+        end,
+    {#op{}, [NewOp#op{opcode=OpNo, arity=Arity} | Ops]};
+parse_line(_, Op, Ops) ->
+    {Op, Ops}.
+
+parse_doc(Line, Op) ->
+    case string:tokens(Line, " ") of
+        ["@spec" | Rest] -> Op#op{spec=Rest};
+        ["@doc" | Rest] -> Op#op{doc=string:join(Rest, " ")};
+        _ -> Op#op{doc=Op#op.doc++Line}
+    end.
diff --git a/code/compiler_chapter/src/json_tokens.xrl b/code/compiler_chapter/src/json_tokens.xrl
new file mode 100644
index 0000000..31569ba
--- /dev/null
+++ b/code/compiler_chapter/src/json_tokens.xrl
@@ -0,0 +1,68 @@
+Definitions.
+
+Digit         = [0-9]
+Digit1to9     = [1-9]
+HexDigit      = [0-9a-f]
+UnescapedChar = [^\"\\]
+EscapedChar   = (\\\\)|(\\\")|(\\b)|(\\f)|(\\n)|(\\r)|(\\t)|(\\/)
+Unicode       = (\\u{HexDigit}{HexDigit}{HexDigit}{HexDigit})
+Quote         = [\"]
+Delim         = [\[\]:,{}]
+Space         = [\n\s\t\r]
+
+Rules.
+
+{Quote}{Quote} : {token, {string, TokenLine, ""}}.
+{Quote}({EscapedChar}|({UnescapedChar})|({Unicode}))+{Quote} :
+  {token, {string, TokenLine, drop_quotes(TokenChars)}}.
+
+null  : {token, {null,  TokenLine}}.
+true  : {token, {true,  TokenLine}}.
+false : {token, {false, TokenLine}}.
+
+{Delim} : {token, {list_to_atom(TokenChars), TokenLine}}.
+
+{Space} : skip_token.
+
+-?{Digit1to9}+{Digit}*\.{Digit}+((E|e)(\+|\-)?{Digit}+)? :
+  {token, {number, TokenLine, list_to_float(TokenChars)}}.
+-?{Digit1to9}+{Digit}* :
+  {token, {number, TokenLine, list_to_integer(TokenChars)+0.0}}.
+
+Erlang code.
+-export([t/0]).
+
+drop_quotes([$" | QuotedString]) -> literal(lists:droplast(QuotedString)).
+literal([$\\,$" | Rest]) ->
+  [$"|literal(Rest)];
+literal([$\\,$\\ | Rest]) ->
+  [$\\|literal(Rest)];
+literal([$\\,$/ | Rest]) ->
+  [$/|literal(Rest)];
+literal([$\\,$b | Rest]) ->
+  [$\b|literal(Rest)];
+literal([$\\,$f | Rest]) ->
+  [$\f|literal(Rest)];
+literal([$\\,$n | Rest]) ->
+  [$\n|literal(Rest)];
+literal([$\\,$r | Rest]) ->
+  [$\r|literal(Rest)];
+literal([$\\,$t | Rest]) ->
+  [$\t|literal(Rest)];
+literal([$\\,$u,D0,D1,D2,D3|Rest]) ->
+  Char = list_to_integer([D0,D1,D2,D3],16),
+  [Char|literal(Rest)];
+literal([C|Rest]) ->
+  [C|literal(Rest)];
+literal([]) ->[].
+
+t() ->
+  {ok,
+   [{'{',1},
+    {string,2,"no"},
+    {':',2},
+    {number,2,1.0},
+    {'}',3}
+   ],
+   4}.
+
diff --git a/code/compiler_chapter/src/world.E b/code/compiler_chapter/src/world.E
new file mode 100644
index 0000000..def7d8f
--- /dev/null
+++ b/code/compiler_chapter/src/world.E
@@ -0,0 +1,17 @@
+-vsn("\002").
+
+-file("world.erl", 1).
+
+-file("world.hrl", 1).
+
+-file("world.erl", 5).
+
+hello() ->
+    "hello world".
+
+module_info() ->
+    erlang:get_module_info(world).
+
+module_info(X) ->
+    erlang:get_module_info(world, X).
+
diff --git a/code/compiler_chapter/src/world.P b/code/compiler_chapter/src/world.P
new file mode 100644
index 0000000..31904ad
--- /dev/null
+++ b/code/compiler_chapter/src/world.P
@@ -0,0 +1,15 @@
+-file("world.erl", 1).
+
+-module(world).
+
+-export([hello/0]).
+
+-file("world.hrl", 1).
+
+-file("world.erl", 4).
+
+hello() ->
+    "hello world".
+
+
+
diff --git a/code/compiler_chapter/src/world.S b/code/compiler_chapter/src/world.S
new file mode 100644
index 0000000..7a67d7e
--- /dev/null
+++ b/code/compiler_chapter/src/world.S
@@ -0,0 +1,37 @@
+{module, world}.  %% version = 0
+
+{exports, [{hello,0},{module_info,0},{module_info,1}]}.
+
+{attributes, []}.
+
+{labels, 7}.
+
+
+{function, hello, 0, 2}.
+  {label,1}.
+    {line,[{location,"world.erl",6}]}.
+    {func_info,{atom,world},{atom,hello},0}.
+  {label,2}.
+    {move,{literal,"hello world"},{x,0}}.
+    return.
+
+
+{function, module_info, 0, 4}.
+  {label,3}.
+    {line,[]}.
+    {func_info,{atom,world},{atom,module_info},0}.
+  {label,4}.
+    {move,{atom,world},{x,0}}.
+    {line,[]}.
+    {call_ext_only,1,{extfunc,erlang,get_module_info,1}}.
+
+
+{function, module_info, 1, 6}.
+  {label,5}.
+    {line,[]}.
+    {func_info,{atom,world},{atom,module_info},1}.
+  {label,6}.
+    {move,{x,0},{x,1}}.
+    {move,{atom,world},{x,0}}.
+    {line,[]}.
+    {call_ext_only,2,{extfunc,erlang,get_module_info,2}}.
diff --git a/code/compiler_chapter/src/world.erl b/code/compiler_chapter/src/world.erl
new file mode 100644
index 0000000..e45b464
--- /dev/null
+++ b/code/compiler_chapter/src/world.erl
@@ -0,0 +1,7 @@
+-module(world).
+-export([hello/0]).
+
+-include("world.hrl").
+
+hello() -> ?GREETING.
+
diff --git a/code/compiler_chapter/src/world.hrl b/code/compiler_chapter/src/world.hrl
new file mode 100644
index 0000000..ad003c7
--- /dev/null
+++ b/code/compiler_chapter/src/world.hrl
@@ -0,0 +1 @@
+-define(GREETING, "hello world").
diff --git a/code/memory_chapter/src/gc_example.erl b/code/memory_chapter/src/gc_example.erl
new file mode 100644
index 0000000..29d7202
--- /dev/null
+++ b/code/memory_chapter/src/gc_example.erl
@@ -0,0 +1,19 @@
+-module(gc_example).
+-export([example/0]).
+
+example() ->
+  T = gen_data(),
+  S = element(1, T),
+  erlang:garbage_collect(),
+  S.
+
+gen_data() ->
+ S = gen_string($H, $e, $l, $l, $o),
+ T = gen_tuple([S,S],S),
+ T.
+
+gen_string(A,B,C,D,E) ->
+   [A,B,C,D,E].
+
+gen_tuple(A,B) ->
+ {A,B}.
diff --git a/compiler.asciidoc b/compiler.asciidoc
index 5e147c2..b7a92d5 100755
--- a/compiler.asciidoc
+++ b/compiler.asciidoc
@@ -1,773 +1,764 @@
 [[ch.compiler]]
 == The Compiler
 
-// This book will not cover the programming language Erlang, but since
-// the goal of the ERTS is to run Erlang code you will need to know how to
-// compile Erlang code. In this chapter I will cover the compiler
-// options needed to generate readable beam code and how to add 
-// debug information to the generated beam file.
-
-// For those readers interested in compiling their own favorite language
-// to ERTS this chapter will also contain detailed information about the
-// different intermediate formats in the compiler and how to plug your
-// compiler into the beam compiler backend. I will also present parse
-// transforms and give examples of how to use them to tweak the Erlang
-// language.
-
-// If your main interest in reading this book is to understand
-// the Erlang VM and how to debug and tweak it, you can safely 
-// skip this chapter.
-
-
-// === Compiling Erlang
-
-// Erlang is compiled from source code modules in +.erl+ files
-// to fat binary +.beam+ files.
-
-// The compiler can be run from the OS shell with the +erlc+ command:
-// [source,bash]
-// ----
-// > erlc foo.erl
-// ----
-
-// Alternatively the compiler can be invoked from the Erlang shell with
-// the default shell command +c+ or by calling +compile:file/{1,2}+
-
-// [source,erlang]
-// ----
-// 1> c(foo).
-// ----
-// or
-
-// [source,erlang]
-// ----
-// 1> compile:file(foo).
-// ----
-
-// The optional second argument to +compile:file+ is a list of compiler
-// options. A full list of the options can be found in the documentation
-// of the compile module: see link:http://www.erlang.org/doc/man/compile.html[].
-
-// Normally the compiler will compile Erlang source code from a +.erl+
-// file and write the resulting binary beam code to a +.beam+ file. You
-// can also get the resulting binary back as an Erlang term
-// by giving the option +binary+ to the compiler. This
-// option has then been overloaded to mean return any intermediate format
-// as a term instead of writing to a file. If you for example want the
-// compiler to return Core Erlang code you can give the options +[core,
-// binary]+.
-
-
-// The compiler is made up of a number of passes as illustrated in 
-// xref:fig_compiler_passes[Compiler Passes].
-
-
-// // <title>Compiler Passes. [] = Compiler options, () = files, {} = erlang terms, boxes = passes </title>
-
-// [[fig_compiler_passes]]
-// ++++
-// <pre data-type="programlisting">       (.erl)
-//            |
-//            v
-//    +---------------+
-//    |    Scanner    |
-//    | (Part of epp) |
-//    +---------------+
-//            |
-//            v
-//    +---------------+
-//    | Pre-processor |
-//    |      epp      |
-//    +---------------+
-//            |
-//            v
-//    +---------------+    +---------------+    
-//    |     Parse     | -> | user defined  |
-//    |   Transform   | &lt;- | transformation|
-//    +---------------+    +---------------+
-//            |
-//            +---------> (.Pbeam) [makedep]
-//            +---------> {dep} [makedep, binary]
-//            |
-//            +---------> (.pp) [dpp]
-//            +---------> {AST} [dpp, binary]
-//            |
-//            v
-//    +---------------+
-//    |    Linter     |
-//    |               |
-//    +---------------+
-//            |
-//            +---------> (.P) ['P']
-//            +---------> {AST} ['P',binary]
-//            |
-//            v
-//    +---------------+
-//    |    Save AST   |
-//    |               |
-//    +---------------+
-//            |
-//            v
-//    +---------------+
-//    |     Expand    |
-//    |               |
-//    +---------------+
-//            |
-//            +---------> (.E) ['E']
-//            +---------> {.E} ['E', binary]
-//            |
-//            v
-//    +---------------+
-//    |     Core      |
-//    |    Erlang     |
-//    +---------------+
-//            |
-//            +---------> (.core) [dcore|to_core0]
-//            +---------> {core} [to_core0,binary]
-//            |
-//            v
-//    +---------------+
-//    |      Core     |+
-//    |     Passes    ||+
-//    +---------------+||
-//     +---------------+|
-//      +---------------+
-//            |
-//            +---------> (.core) [to_core]
-//            +---------> {core} [to_core,binary]
-//            |
-//            v
-//    +---------------+
-//    |    Kernel     |
-//    |    Erlang     |
-//    +---------------+
-//            |
-//            v
-//    +---------------+
-//    |    Kernel     |+
-//    |    Passes     ||+
-//    +---------------+||
-//     +---------------+|
-//      +---------------+
-//            |
-//            v
-//    +---------------+
-//    |   BEAM Code   |
-//    |               |
-//    +---------------+
-//            |
-//            v
-//    +---------------+
-//    |      ASM      |+
-//    |     Passes    ||+
-//    +---------------+||
-//     +---------------+|
-//      +---------------+
-//            |
-//            +---------> (.S) ['S']
-//            +---------> {.S} ['S', binary]
-//            |
-//            v
-//    +---------------+
-//    |  Native Code  |
-//    |               |
-//    +---------------+
-//            |
-//            v
-//         (.beam)</pre>
-// ++++
-// //</figure>
-
-// If you want to see a complete and up to date list of compiler passes
-// you can run the function +compile:options/0+ in an Erlang shell.
-// The definitive source for information about the compiler is of course
-// the source:
-//  link:https://github.com/erlang/otp/blob/maint/lib/compiler/src/compile.erl[compile.erl]
-
-
-
-// === Generating Intermediate Output
-
-// Looking at the code produced by the compiler is a great help in trying
-// to understand how the virtual machine works. Fortunately, the compiler
-// can show os the intermediate code after each compiler pass and the
-// final beam code.
-
-// Let us try out our newfound knowledge to look at the generated code.
-
-
-// [source,erlang]
-// ----
-// 1> compile:options().
-// dpp - Generate .pp file
-// 'P' - Generate .P source listing file
-// ----
-// ...
-// ----
-// 'E' - Generate .E source listing file
-// ----
-// ...
-// ----
-// 'S' - Generate .S file
-// ----
-
-// Let us try with a small example program "world.erl":
-// [source,erlang]
-// ----
-// include::world.erl[]
-// ----
-
-// And the include file "world.hrl"
-// [source,erlang]
-// ----
-// include::world.hrl[]
-// ----
-
-// If you now compile this with the 'P' option to get the parsed file you
-// get a file "world.P":
-
-// [source,erlang]
-// ----
-// 2> c(world, ['P']). 
-// ** Warning: No object file created - nothing loaded **
-// ok
-// ----
-
-// In the resulting +.P+ file you can see the a pretty printet version of
-// the code after the preprocessor (and parse transformation) has been
-// applied:
-
-// [source,erlang]
-// ----
-// include::world.P[]
-// ----
-
-// To see how the code looks after all source code transformations are
-// done, you can compile the code with the +'E'+-flag.
-
-// [source,erlang]
-// ----
-// 3> c(world, ['E']). 
-// ** Warning: No object file created - nothing loaded **
-// ok
-// ----
-
-// This gives us an +.E+ file, in this case all compiler directives have
-// been removed and the build in functions +module_info/{1,2}+ have been
-// added to the source:
-
-// [source,erlang]
-// ----
-// include::world.E[]
-// ----
-
-// We will make use of the 'P' and 'E' options when we look at parse
-// transforms in xref:SEC-parse_transform[], but first we will take a
-// look at an "assembler" view of generated BEAM code. Bu giving the
-// option +'S'+ to the compiler you get a +.S+ file with Erlang terms
-// for each BEAM instruction in the code.
-
-// [source,erlang]
-// ----
-// 3> c(world, ['S']). 
-// ** Warning: No object file created - nothing loaded **
-// ok
-// ----
-
-// The file +world.S+ should look like this:
-
-// [source,erlang]
-// ----
-// include::world.S[]
-// ----
-
-// Since this is a file with dot ("_._") separated Erlang terms, you can 
-// read the file back into the Erlang shell with:
-// ----
-// {ok, BEAM_Code} = file:consult("world.S").
-// ----
-
-// The assembler code mostly follows the layout of the original source
-// code.
-
-// The first instruction defines the module name of the code. The version
-// mentioned in the comment (+%% version = 0+) is the version of the beam
-// opcode format (as given by +beam_opcodes:format_number/0+).
-
-// Then comes a list of exports and any compiler attributes (none in this
-// example) much like in any Erlang source module.
-
-// The first real beam-like instruction is +{labels, 7}+ which tells the
-// VM the number of lables in the code to make it possible to allocate 
-// room for all lables in one pass over the code.
-
-// After that there is the actual code for each function. The first
-// instruction gives us the function name, the arity and the entry point
-// as a label number.
- 
-// You can use the +'S'+ option with great effect to help you understand
-// how the BEAM works, and we will use it like that in later chapters. It
-// is also invaluable if you develop your own language that you compile 
-// to the BEAM through Core Erlang, to see the generated code.
-
-// === Compiler Passes
-
-// In the following sections we will go through most of the compiler
-// passes shown in xref:fig_compiler_passes[]. For a language designer
-// targeting the BEAM this is interesting since it will show you what you
-// can accomplish with the different approaches: macros, parse
-// transforms, core erlang, and BEAM code, and how they depend on each
-// other.
-
-// When tuning Erlang code, it is good to know what optimizations are
-// applied when, and how you can look at generated code before and
-// after optimizations.
-
-
-// ==== Compiler Pass: The Erlang Preprocessor (epp)
-
-// The compilation starts with a combined tokenizer (or scanner) and
-// preprocessor. That is, the preprosessor drives the tokenizer.
-// This means that macros are expanded as tokens, so
-// it is not a pure string replacement (as for example m4 or cpp).
-// You can not use Erlang macros to define your own syntax, a macro
-// will expand as a separate token from its surrounding characters.
-// You can not concatenate a macro and a character to a token:
-
-// ----
-// -define(plus,+).
-// t(A,B) -> A?plus+B.
-// ----
-// This will expand to
-// ----
-// t(A,B) -> A + + B.
-// ----
-// and not
-// ----
-// t(A,B) -> A ++ B.
-// ----
-
-// On the other hand since macro expansion is done on the token
-// level, you do not need to have a valid Erlang term in the
-// right hand side of the macro, as long as you use it in a way
-// that gives you a valid term. E.g.:
-
-// ----
-// -define(p,o, o]).
-// t() -> [f,?p.
-// ----
-
-// I do not know any real use for this other than to win the
-// obfuscated Erlang code contest. The main point to remember is that 
-// you can not really use the Erlang preprocessor to define a language 
-// with a syntax that differs from Erlang. Fortunately there are
-// other ways to do this, as you shall see later.
-
-
-
-
-// [[SEC-parse_transform]]
-// ==== Compiler Pass: Parse Transformations
-
-// The easiest way to tweak the Erlang language is through Parse
-// Transformations (or parse transforms). Parse Transformations comes
-// with all sorts of warnings, like this note in the OTP documentation:
-
-// ----
-// Programmers are strongly advised not to engage in parse
-// transformations and no support is offered for problems encountered.
-// ----
-
-// When you use a parse transform you are basically writing an extra pass
-// in the compiler and that can if you are not careful lead to very
-// unexpected results. But to use a parse transform you have to declare
-// the usage in the module using it, and it will be local to that module,
-// so as far as compiler tweaks goes this one is quite safe.
-
-// The biggest problem with parse transforms as I see it is that you
-// are inventing your own syntax, and it will make it more difficult
-// for anyone else reading your code. At least until your parse transform
-// has become as popular and widely used as e.g. QLC.
-
-// OK, so you know you shouldn't use it, but if you have to, here is what
-// you need to know. A parse transforms is a function that works on the
-// abstract syntax tree (AST) (see
-// link:http://www.erlang.org/doc/apps/erts/absform.html[] ).  The compiler
-// does preprocessing, tokenization and parsing and then it will call the
-// parse transform function with the AST and expects to get back a
-// new AST.
-
-// This means that you can't change the Erlang syntax fundamentally, but
-// you can change the semantics. Lets say for example that you for some
-// reason would like to write json code directly in your Erlang code,
-// then you are in luck since the tokens of json and of Erlang are
-// basically the same. Also, since the Erlang compiler does most of
-// its sanity checks in the linter pass which follows the parse transform
-// pass, you can allow an AST which does not represent valid Erlang.
-
-// To write a parse transform you need to write an Erlang module (lets
-// call it _p_) which exports the function +parse_transform/2+. This
-// function is called by the compiler during the parse transform pass if
-// the module being compiled (lets call it _m_) contains the compiler
-// option +{parse_transform, p}+. The arguments to the funciton is the
-// AST of the module m and the compiler options given to the call to the
-// compiler.
-
-// [NOTE]
-// ====
-// Note that you will not get any compiler options given in the file, this
-// is a bit of a nuisance since you can't give options to the parse transform
-// from the code.
-
-// The compiler does not expand compiler options until the _expand_ pass
-// which occures after the parse transform pass.
-// ====
-
-// The documenation of the abstract format is somewhat dense and it is
-// quite hard to get a grip on the abstract format by reading the
-// documentation.  I encourage you to use the _syntax_tools_ and
-// especially +erl_syntax_lib+ for any serious work on the AST.
-
-// Here we will develop a a simple parse transform just to get an
-// understanding of the AST. Therefore we will work directly on the AST
-// and use the old reliable +io:format+ approach instead of syntax_tools.
-
-// First we create an example of what we would like to be able to compile
-// json_test.erl:
-
-// [source,erlang]
-// ----
-// -module(json_test).
-// -compile({parse_transform, json_parser}).
-// -export([test/1]).
-
-// test(V) ->
-//     <<{{
-//       "name"  : "Jack (\"Bee\") Nimble",
-//       "format": {
-//                   "type"      : "rect",
-//                   "widths"     : [1920,1600],
-//                   "height"    : (-1080),
-//                   "interlace" : false,
-//                   "frame rate": V
-//                 }
-//      }}>>.
-// ----
-
-// Then we create a minimal parse transform module +json_parser.erl+:
-
-// [source,erlang]
-// ----
-// -module(json_parser).
-// -export([parse_transform/2]).
-
-// parse_transform(AST, _Options) ->
-//   io:format("~p~n", [AST]),
-//   AST.
-// ----
-
-// This identity parse transform returns an unchanged AST but it also prints
-// it out so that you can see what an AST looks like.
-
-// ----
-// > c(json_parser).
-// {ok,json_parser}
-// 2> c(json_test).
-// [{attribute,1,file,{"./json_test.erl",1}},
-//  {attribute,1,module,json_test},
-//  {attribute,3,export,[{test,1}]},
-//  {function,5,test,1,
-//   [{clause,5,
-//     [{var,5,'V'}],
-//     [],
-//     [{bin,6,
-//       [{bin_element,6,
-//         {tuple,6,
-//          [{tuple,6,
-//            [{remote,7,{string,7,"name"},{string,7,"Jack (\"Bee\") Nimble"}},
-//             {remote,8,
-//              {string,8,"format"},
-//              {tuple,8,
-//               [{remote,9,{string,9,"type"},{string,9,"rect"}},
-//                {remote,10,
-//                 {string,10,"widths"},
-//                 {cons,10,
-//                  {integer,10,1920},
-//                  {cons,10,{integer,10,1600},{nil,10}}}},
-//                {remote,11,{string,11,"height"},{op,11,'-',{integer,11,1080}}},
-//                {remote,12,{string,12,"interlace"},{atom,12,false}},
-//                {remote,13,{string,13,"frame rate"},{var,13,'V'}}]}}]}]},
-//         default,default}]}]}]},
-//  {eof,16}]
-// ./json_test.erl:7: illegal expression
-// ./json_test.erl:8: illegal expression
-// ./json_test.erl:5: Warning: variable 'V' is unused
-// error
-// ----
-
-// The compilation of +json_test+ fails since the module contains invalid
-// Erlang syntax, but you get to see what the AST looks like. Now we can
-// just write some functions to traverse the AST and rewrite the json
-// code into Erlang code.footnote:[The translation here is done in
-// accordance with EEP 18 (Erlang Enhancement Proposal 18: "JSON bifs")
-// link:http://www.erlang.org/eeps/eep-0018.html]
-
-// [source,erlang]
-// ----
-// -module(json_parser).
-// -export([parse_transform/2]).
-
-// parse_transform(AST, _Options) ->
-//     json(AST, []).
-
-// -define(FUNCTION(Clauses), {function, Label, Name, Arity, Clauses}).
-
-// %% We are only interested in code inside functions.
-// json([?FUNCTION(Clauses) | Elements], Res) ->
-//     json(Elements, [?FUNCTION(json_clauses(Clauses)) | Res]);
-// json([Other|Elements], Res) -> json(Elements, [Other | Res]);
-// json([], Res) -> lists:reverse(Res).
-
-// %% We are interested in the code in the body of a function.
-// json_clauses([{clause, CLine, A1, A2, Code} | Clauses]) ->
-//     [{clause, CLine, A1, A2, json_code(Code)} | json_clauses(Clauses)];
-// json_clauses([]) -> [].
-
-
-// -define(JSON(Json), {bin, _, [{bin_element
-//                                           , _
-//                                           , {tuple, _, [Json]}
-//                                           , _
-//                                           , _}]}).
-
-// %% We look for: <<"json">> = Json-Term
-// json_code([])                     -> [];
-// json_code([?JSON(Json)|MoreCode]) -> [parse_json(Json) | json_code(MoreCode)];
-// json_code(Code)                   -> Code.
-
-// %% Json Object -> [{}] | [{Lable, Term}]
-// parse_json({tuple,Line,[]})            -> {cons, Line, {tuple, Line, []}};
-// parse_json({tuple,Line,Fields})        -> parse_json_fields(Fields,Line);
-// %% Json Array -> List
-// parse_json({cons, Line, Head, Tail})   -> {cons, Line, parse_json(Head),
-//                                                        parse_json(Tail)};
-// parse_json({nil, Line})                -> {nil, Line};
-// %% Json String -> <<String>>
-// parse_json({string, Line, String})     -> str_to_bin(String, Line);
-// %% Json Integer -> Intger
-// parse_json({integer, Line, Integer})   -> {integer, Line, Integer};
-// %% Json Float -> Float
-// parse_json({float, Line, Float})       -> {float, Line, Float};
-// %% Json Constant -> true | false | null
-// parse_json({atom, Line, true})         -> {atom, Line, true};
-// parse_json({atom, Line, false})        -> {atom, Line, false};
-// parse_json({atom, Line, null})         -> {atom, Line, null};
-
-// %% Variables, should contain Erlang encoded Json
-// parse_json({var, Line, Var})         -> {var, Line, Var};
-// %% Json Negative Integer or Float
-// parse_json({op, Line, '-', {Type, _, N}}) when Type =:= integer
-//                                                ; Type =:= float ->
-//                                           {Type, Line, -N}.
-// %% parse_json(Code)                  -> io:format("Code: ~p~n",[Code]), Code.
-
-// -define(FIELD(Lable, Code), {remote, L, {string, _, Label}, Code}).
-
-// parse_json_fields([], L) -> {nil, L};
-// %% Label : Json-Term  --> [{<<Label>>, Term} | Rest]
-// parse_json_fields([?FIELD(Lable, Code) | Rest], _) ->
-//     cons(tuple(str_to_bin(Label, L), parse_json(Code), L)
-//          , parse_json_fields(Rest, L)
-//          , L).
-
-
-// tuple(E1, E2, Line)    -> {tuple, Line, [E1, E2]}.
-// cons(Head, Tail, Line) -> {cons, Line, Head, Tail}.
-
-// str_to_bin(String, Line) ->
-//     {bin
-//      , Line
-//      , [{bin_element
-//          , Line
-//          , {string, Line, String}
-//          , default
-//          , default
-//         }
-//        ]
-//     }.
-// ----
-
-// And now we can compile +json_test+ without errors:
-
-// [source,erlang]
-// ----
-// 1> c(json_parser).
-// {ok,json_parser}
-// 2> c(json_test).
-// {ok,json_test}
-// 3> json_test:test(42).
-// [{<<"name">>,<<"Jack (\"Bee\") Nimble">>},
-//  {<<"format">>,
-//   [{<<"type">>,<<"rect">>},
-//    {<<"widths">>,[1920,1600]},
-//    {<<"height">>,-1080},
-//    {<<"interlace">>,false},
-//    {<<"frame rate">>,42}]}]
-// ----
-
-// The AST generated by +parse_teansfom/2+ must correspond to valid
-// Erlang code. Unless you apply several parse transforms, which is
-// possible. The validity of the code is checked by the following
-// compiler pass.
-
-// ==== Compiler Pass: Linter
-
-// The linter (+erl_lint.erl+) generaters warnings for syntactically
-// correct but otherwise bad code, like "export_all flag enabled".
-
-
-// ==== Compiler Pass: Save AST
-
-// In order to enable debugging of a module, you can "debug compile" the
-// module, that is pass the option +debug_info+ to the compiler. The
-// abstract syntax tree will then be saved by the "Save AST" until the
-// end of the compilation, where it will be written to the .beam file.
-
-// It is important to note that the code is saved before any
-// optimisations are appilied, so if there is a bug in an optimisation
-// pass in the compiler and you run code in the debugger you will get a
-// diffferent behavior. If you are implementing your own compiler
-// optimisations this can trick you up badly.
-
-// ==== Compiler Pass: Expand
-
-// In the expand phase source erlang constructs, such as records, are
-// expanded to lower level erlang constructs. Compiler options,
-// "+-compile(...)+", are also _expanded_ to meta data.
-
-// ==== Compiler Pass: Core Erlang
-
-// Core Erlang is a strict functional language suitable for compiler
-// optimizations. It makes code transformations easier by reducing the
-// number of ways to express the same operation. One way it does this is
-// by introducing _let_ and _letrec_ expressions to make scoping more
-// explicit.
-
-// Core Erlang is the best target for a language you want to run in
-// ERTS. It changes very seldom and it contains all aspects of Erlang in
-// a clean way. If you target the beam instruction set directly you will
-// have to deal with much more detail, and that instruction set usually
-// changes slightly between each major release of ERTS. If you on the other
-// hand target Erlang directly you will be more restricted in what you
-// can describe, and you will also have to deal with more details, since
-// Core Erlang is a cleaner language.
-
-// To compile an Erlang file to core you can give the option "to_core",
-// note though that this writes the Erlang core program to a file with
-// the ".core" extension. To compile an Erlang core program from a ".core"
-// file you can give the option "from_core" to the compiler.
-
-// ----
-// 1> c(world, to_core).
-// ** Warning: No object file created - nothing loaded **
-// ok
-// 2> c(world, from_core).
-// {ok,world}
-// ----
-
-// Note that the +.core+ files are text files written in the human
-// readable core format. To get the core program as an Erlang term
-// you can add the +binary+ option to the compilation.
-
-
-// ==== Compiler Pass: Kernel Erlang
-
-// Kernel Erlang is a flat version of Core Erlang with a few differences.
-// For example, each variable is unique and the scope is a whole function. Pattern
-// matching is compiled to more primitive operations.
-
-
-// ==== Compiler Pass: BEAM Code
-
-// The last step of a normal compilation is the external beam code
-// format.  Some low level optimizations such as dead code elimination and
-// peep hole optimisations are done on this level.
-
-// %% Describe all optimizations?
-
-// The BEAM code is described in detail in
-// xref:CH-Instructions[] and xref:AP-Instructions[]
-
-// ==== Compiler Pass: Native Code
-
-// If you add the flag +native+ to the compilation, and you have a HiPE
-// enabled runtime system, then the compiler will generate native code
-// for your module and store the native code along with the beam code
-// in the +.beam.+ file.
-
-// == Other Compiler Tools
+This book will not cover the programming language Erlang, but since
+the goal of the ERTS is to run Erlang code you will need to know how
+to compile Erlang code. In this chapter we will cover the compiler
+options needed to generate readable beam code and how to add debug
+information to the generated beam file. At the end of the chapter
+there is also a section on the Elixir compiler.
+
+For those readers interested in compiling their own favorite language
+to ERTS this chapter will also contain detailed information about the
+different intermediate formats in the compiler and how to plug your
+compiler into the beam compiler backend. I will also present parse
+transforms and give examples of how to use them to tweak the Erlang
+language.
+
+=== Compiling Erlang
+
+Erlang is compiled from source code modules in +.erl+ files
+to fat binary +.beam+ files.
+
+The compiler can be run from the OS shell with the +erlc+ command:
+
+[source,bash]
+----
+> erlc foo.erl
+----
+
+Alternatively the compiler can be invoked from the Erlang shell with
+the default shell command +c+ or by calling +compile:file/{1,2}+
+
+[source,erlang]
+----
+1> c(foo).
+----
+
+or
+
+[source,erlang]
+----
+1> compile:file(foo).
+----
+
+The optional second argument to +compile:file+ is a list of compiler
+options. A full list of the options can be found in the documentation
+of the compile module: see link:http://www.erlang.org/doc/man/compile.html[].
+
+Normally the compiler will compile Erlang source code from a +.erl+
+file and write the resulting binary beam code to a +.beam+ file. You
+can also get the resulting binary back as an Erlang term
+by giving the option +binary+ to the compiler. This
+option has then been overloaded to mean return any intermediate format
+as a term instead of writing to a file. If you for example want the
+compiler to return Core Erlang code you can give the options +[core,
+ binary]+.
+
+
+The compiler is made up of a number of passes as illustrated in 
+xref:fig_compiler_passes[Compiler Passes].
+
+[[fig_compiler_passes]]
+.Compiler Passes.
+----
+[] = Compiler options, () = files, {} = erlang terms, boxes = passes </title>
+         (.erl)
+            |
+            v
+    +---------------+
+    |    Scanner    |
+    | (Part of epp) |
+    +---------------+
+            |
+            v
+    +---------------+
+    | Pre-processor |
+    |      epp      |
+    +---------------+
+            |
+            v
+    +---------------+    +---------------+    
+    |     Parse     | -> | user defined  |
+    |   Transform   | &lt;- | transformation|
+    +---------------+    +---------------+
+            |
+            +---------> (.Pbeam) [makedep]
+            +---------> {dep} [makedep, binary]
+            |
+            +---------> (.pp) [dpp]
+            +---------> {AST} [dpp, binary]
+            |
+            v
+    +---------------+
+    |    Linter     |
+    |               |
+    +---------------+
+            |
+            +---------> (.P) ['P']
+            +---------> {AST} ['P',binary]
+            |
+            v
+    +---------------+
+    |    Save AST   |
+    |               |
+    +---------------+
+            |
+            v
+    +---------------+
+    |     Expand    |
+    |               |
+    +---------------+
+            |
+            +---------> (.E) ['E']
+            +---------> {.E} ['E', binary]
+            |
+            v
+    +---------------+
+    |     Core      |
+    |    Erlang     |
+    +---------------+
+            |
+            +---------> (.core) [dcore|to_core0]
+            +---------> {core} [to_core0,binary]
+            |
+            v
+    +---------------+
+    |      Core     |+
+    |     Passes    ||+
+    +---------------+||
+     +---------------+|
+      +---------------+
+            |
+            +---------> (.core) [to_core]
+            +---------> {core} [to_core,binary]
+            |
+            v
+    +---------------+
+    |    Kernel     |
+    |    Erlang     |
+    +---------------+
+            |
+            v
+    +---------------+
+    |    Kernel     |+
+    |    Passes     ||+
+    +---------------+||
+     +---------------+|
+      +---------------+
+            |
+            v
+    +---------------+
+    |   BEAM Code   |
+    |               |
+    +---------------+
+            |
+            v
+    +---------------+
+    |      ASM      |+
+    |     Passes    ||+
+    +---------------+||
+     +---------------+|
+      +---------------+
+            |
+            +---------> (.S) ['S']
+            +---------> {.S} ['S', binary]
+            |
+            v
+    +---------------+
+    |  Native Code  |
+    |               |
+    +---------------+
+            |
+            v
+         (.beam)</pre>
+----
+
+If you want to see a complete and up to date list of compiler passes
+you can run the function +compile:options/0+ in an Erlang shell.
+The definitive source for information about the compiler is of course
+the source:
+  link:https://github.com/erlang/otp/blob/maint/lib/compiler/src/compile.erl[compile.erl]
+
+
+
+=== Generating Intermediate Output
+
+Looking at the code produced by the compiler is a great help in trying
+to understand how the virtual machine works. Fortunately, the compiler
+can show os the intermediate code after each compiler pass and the
+final beam code.
+
+Let us try out our newfound knowledge to look at the generated code.
+
+
+[source,erlang]
+----
+ 1> compile:options().
+ dpp - Generate .pp file
+ 'P' - Generate .P source listing file
+----
+ ...
+----
+ 'E' - Generate .E source listing file
+----
+ ...
+----
+ 'S' - Generate .S file
+----
+
+Let us try with a small example program "world.erl":
+[source,erlang]
+----
+include::code/compiler_chapter/src/world.erl[]
+----
+
+And the include file "world.hrl"
+[source,erlang]
+----
+include::code/compiler_chapter/src/world.hrl[]
+----
+
+If you now compile this with the 'P' option to get the parsed file you
+get a file "world.P":
+
+[source,erlang]
+----
+2> c(world, ['P']). 
+** Warning: No object file created - nothing loaded **
+ok
+----
+
+In the resulting +.P+ file you can see the a pretty printet version of
+the code after the preprocessor (and parse transformation) has been
+applied:
+
+[source,erlang]
+----
+include::code/compiler_chapter/src/world.P[]
+----
+
+To see how the code looks after all source code transformations are
+done, you can compile the code with the +'E'+-flag.
+
+[source,erlang]
+----
+3> c(world, ['E']). 
+** Warning: No object file created - nothing loaded **
+ok
+----
+
+This gives us an +.E+ file, in this case all compiler directives have
+been removed and the build in functions +module_info/{1,2}+ have been
+added to the source:
+
+[source,erlang]
+----
+include::code/compiler_chapter/src/world.E[]
+----
+
+We will make use of the 'P' and 'E' options when we look at parse
+transforms in xref:SEC-parse_transform[], but first we will take a
+look at an "assembler" view of generated BEAM code. Bu giving the
+option +'S'+ to the compiler you get a +.S+ file with Erlang terms
+for each BEAM instruction in the code.
+
+[source,erlang]
+----
+3> c(world, ['S']). 
+** Warning: No object file created - nothing loaded **
+ok
+----
+
+The file +world.S+ should look like this:
+
+[source,erlang]
+----
+include::code/compiler_chapter/src/world.S[]
+----
+
+Since this is a file with dot ("_._") separated Erlang terms, you can 
+read the file back into the Erlang shell with:
+----
+{ok, BEAM_Code} = file:consult("world.S").
+----
+
+The assembler code mostly follows the layout of the original source
+code.
+
+The first instruction defines the module name of the code. The version
+mentioned in the comment (+%% version = 0+) is the version of the beam
+opcode format (as given by +beam_opcodes:format_number/0+).
+
+Then comes a list of exports and any compiler attributes (none in this
+example) much like in any Erlang source module.
+
+The first real beam-like instruction is +{labels, 7}+ which tells the
+VM the number of lables in the code to make it possible to allocate 
+room for all lables in one pass over the code.
+
+After that there is the actual code for each function. The first
+instruction gives us the function name, the arity and the entry point
+as a label number.
+
+You can use the +'S'+ option with great effect to help you understand
+how the BEAM works, and we will use it like that in later chapters. It
+is also invaluable if you develop your own language that you compile 
+to the BEAM through Core Erlang, to see the generated code.
+
+=== Compiler Passes
+
+In the following sections we will go through most of the compiler
+passes shown in xref:fig_compiler_passes[]. For a language designer
+targeting the BEAM this is interesting since it will show you what you
+can accomplish with the different approaches: macros, parse
+transforms, core erlang, and BEAM code, and how they depend on each
+other.
+
+When tuning Erlang code, it is good to know what optimizations are
+applied when, and how you can look at generated code before and
+after optimizations.
+
+
+==== Compiler Pass: The Erlang Preprocessor (epp)
+
+The compilation starts with a combined tokenizer (or scanner) and
+preprocessor. That is, the preprosessor drives the tokenizer.
+This means that macros are expanded as tokens, so
+it is not a pure string replacement (as for example m4 or cpp).
+You can not use Erlang macros to define your own syntax, a macro
+will expand as a separate token from its surrounding characters.
+You can not concatenate a macro and a character to a token:
+
+[source,erlang]
+----
+-define(plus,+).
+t(A,B) -> A?plus+B.
+----
+
+This will expand to
+
+----
+t(A,B) -> A + + B.
+----
+and not
+----
+t(A,B) -> A ++ B.
+----
+
+On the other hand since macro expansion is done on the token
+level, you do not need to have a valid Erlang term in the
+right hand side of the macro, as long as you use it in a way
+that gives you a valid term. E.g.:
+
+----
+-define(p,o, o]).
+ t() -> [f,?p.
+----
+
+There are few real useages for this other than to win the
+obfuscated Erlang code contest. The main point to remember is that 
+you can not really use the Erlang preprocessor to define a language 
+with a syntax that differs from Erlang. Fortunately there are
+other ways to do this, as you shall see later.
+
+
+
+
+[[SEC-parse_transform]]
+==== Compiler Pass: Parse Transformations
+
+The easiest way to tweak the Erlang language is through Parse
+Transformations (or parse transforms). Parse Transformations comes
+with all sorts of warnings, like this note in the OTP documentation:
+
+WARNING: Programmers are strongly advised not to engage in parse
+transformations and no support is offered for problems encountered.
+
+When you use a parse transform you are basically writing an extra pass
+in the compiler and that can if you are not careful lead to very
+unexpected results. But to use a parse transform you have to declare
+the usage in the module using it, and it will be local to that module,
+so as far as compiler tweaks goes this one is quite safe.
+
+The biggest problem with parse transforms as I see it is that you
+are inventing your own syntax, and it will make it more difficult
+for anyone else reading your code. At least until your parse transform
+has become as popular and widely used as e.g. QLC.
+
+OK, so you know you shouldn't use it, but if you have to, here is what
+you need to know. A parse transforms is a function that works on the
+abstract syntax tree (AST) (see
+link:http://www.erlang.org/doc/apps/erts/absform.html[] ).  The compiler
+does preprocessing, tokenization and parsing and then it will call the
+parse transform function with the AST and expects to get back a
+new AST.
+
+This means that you can't change the Erlang syntax fundamentally, but
+you can change the semantics. Lets say for example that you for some
+reason would like to write json code directly in your Erlang code,
+then you are in luck since the tokens of json and of Erlang are
+basically the same. Also, since the Erlang compiler does most of
+its sanity checks in the linter pass which follows the parse transform
+pass, you can allow an AST which does not represent valid Erlang.
+
+To write a parse transform you need to write an Erlang module (lets
+call it _p_) which exports the function +parse_transform/2+. This
+function is called by the compiler during the parse transform pass if
+the module being compiled (lets call it _m_) contains the compiler
+option +{parse_transform, p}+. The arguments to the funciton is the
+AST of the module m and the compiler options given to the call to the
+compiler.
+
+[NOTE]
+====
+Note that you will not get any compiler options given in the file, this
+is a bit of a nuisance since you can't give options to the parse transform
+from the code.
+
+The compiler does not expand compiler options until the _expand_ pass
+which occures after the parse transform pass.
+====
+
+The documenation of the abstract format is somewhat dense and it is
+quite hard to get a grip on the abstract format by reading the
+documentation.  I encourage you to use the _syntax_tools_ and
+especially +erl_syntax_lib+ for any serious work on the AST.
+
+Here we will develop a a simple parse transform just to get an
+understanding of the AST. Therefore we will work directly on the AST
+and use the old reliable +io:format+ approach instead of syntax_tools.
+
+First we create an example of what we would like to be able to compile
+json_test.erl:
+
+[source,erlang]
+----
+-module(json_test).
+-compile({parse_transform, json_parser}).
+-export([test/1]).
+
+test(V) ->
+    <<{{
+      "name"  : "Jack (\"Bee\") Nimble",
+      "format": {
+                 "type"      : "rect",
+                 "widths"     : [1920,1600],
+                 "height"    : (-1080),
+                 "interlace" : false,
+                 "frame rate": V
+                }
+      }}>>.
+----
+
+Then we create a minimal parse transform module +json_parser.erl+:
+
+[source,erlang]
+----
+-module(json_parser).
+-export([parse_transform/2]).
+
+parse_transform(AST, _Options) ->
+  io:format("~p~n", [AST]),
+  AST.
+----
+
+This identity parse transform returns an unchanged AST but it also prints
+it out so that you can see what an AST looks like.
+
+----
+> c(json_parser).
+{ok,json_parser}
+2> c(json_test).
+[{attribute,1,file,{"./json_test.erl",1}},
+ {attribute,1,module,json_test},
+ {attribute,3,export,[{test,1}]},
+ {function,5,test,1,
+  [{clause,5,
+    [{var,5,'V'}],
+    [],
+    [{bin,6,
+      [{bin_element,6,
+        {tuple,6,
+         [{tuple,6,
+           [{remote,7,{string,7,"name"},{string,7,"Jack (\"Bee\") Nimble"}},
+            {remote,8,
+             {string,8,"format"},
+             {tuple,8,
+              [{remote,9,{string,9,"type"},{string,9,"rect"}},
+               {remote,10,
+                {string,10,"widths"},
+                {cons,10,
+                 {integer,10,1920},
+                 {cons,10,{integer,10,1600},{nil,10}}}},
+               {remote,11,{string,11,"height"},{op,11,'-',{integer,11,1080}}},
+               {remote,12,{string,12,"interlace"},{atom,12,false}},
+               {remote,13,{string,13,"frame rate"},{var,13,'V'}}]}}]}]},
+        default,default}]}]}]},
+ {eof,16}]
+./json_test.erl:7: illegal expression
+./json_test.erl:8: illegal expression
+./json_test.erl:5: Warning: variable 'V' is unused
+error
+----
+
+The compilation of +json_test+ fails since the module contains invalid
+Erlang syntax, but you get to see what the AST looks like. Now we can
+just write some functions to traverse the AST and rewrite the json
+code into Erlang code.footnote:[The translation here is done in
+accordance with EEP 18 (Erlang Enhancement Proposal 18: "JSON bifs")
+link:http://www.erlang.org/eeps/eep-0018.html]
+
+[source,erlang]
+----
+-module(json_parser).
+-export([parse_transform/2]).
+
+parse_transform(AST, _Options) ->
+    json(AST, []).
+
+-define(FUNCTION(Clauses), {function, Label, Name, Arity, Clauses}).
+
+%% We are only interested in code inside functions.
+json([?FUNCTION(Clauses) | Elements], Res) ->
+    json(Elements, [?FUNCTION(json_clauses(Clauses)) | Res]);
+json([Other|Elements], Res) -> json(Elements, [Other | Res]);
+json([], Res) -> lists:reverse(Res).
+
+%% We are interested in the code in the body of a function.
+json_clauses([{clause, CLine, A1, A2, Code} | Clauses]) ->
+    [{clause, CLine, A1, A2, json_code(Code)} | json_clauses(Clauses)];
+json_clauses([]) -> [].
+
+
+-define(JSON(Json), {bin, _, [{bin_element
+                                         , _
+                                         , {tuple, _, [Json]}
+                                         , _
+                                         , _}]}).
+
+%% We look for: <<"json">> = Json-Term
+json_code([])                     -> [];
+json_code([?JSON(Json)|MoreCode]) -> [parse_json(Json) | json_code(MoreCode)];
+json_code(Code)                   -> Code.
+
+%% Json Object -> [{}] | [{Lable, Term}]
+parse_json({tuple,Line,[]})            -> {cons, Line, {tuple, Line, []}};
+parse_json({tuple,Line,Fields})        -> parse_json_fields(Fields,Line);
+%% Json Array -> List
+parse_json({cons, Line, Head, Tail})   -> {cons, Line, parse_json(Head),
+                                                       parse_json(Tail)};
+parse_json({nil, Line})                -> {nil, Line};
+%% Json String -> <<String>>
+parse_json({string, Line, String})     -> str_to_bin(String, Line);
+%% Json Integer -> Intger
+parse_json({integer, Line, Integer})   -> {integer, Line, Integer};
+%% Json Float -> Float
+parse_json({float, Line, Float})       -> {float, Line, Float};
+%% Json Constant -> true | false | null
+parse_json({atom, Line, true})         -> {atom, Line, true};
+parse_json({atom, Line, false})        -> {atom, Line, false};
+parse_json({atom, Line, null})         -> {atom, Line, null};
+
+%% Variables, should contain Erlang encoded Json
+parse_json({var, Line, Var})         -> {var, Line, Var};
+%% Json Negative Integer or Float
+parse_json({op, Line, '-', {Type, _, N}}) when Type =:= integer
+                                             ; Type =:= float ->
+                                          {Type, Line, -N}.
+%% parse_json(Code)                  -> io:format("Code: ~p~n",[Code]), Code.
+
+-define(FIELD(Lable, Code), {remote, L, {string, _, Label}, Code}).
+
+parse_json_fields([], L) -> {nil, L};
+%% Label : Json-Term  --> [{<<Label>>, Term} | Rest]
+parse_json_fields([?FIELD(Lable, Code) | Rest], _) ->
+    cons(tuple(str_to_bin(Label, L), parse_json(Code), L)
+         , parse_json_fields(Rest, L)
+         , L).
+
+
+tuple(E1, E2, Line)    -> {tuple, Line, [E1, E2]}.
+cons(Head, Tail, Line) -> {cons, Line, Head, Tail}.
+
+str_to_bin(String, Line) ->
+    {bin
+     , Line
+     , [{bin_element
+         , Line
+         , {string, Line, String}
+         , default
+         , default
+        }
+       ]
+    }.
+----
+
+And now we can compile +json_test+ without errors:
+
+[source,erlang]
+----
+1> c(json_parser).
+{ok,json_parser}
+2> c(json_test).
+{ok,json_test}
+3> json_test:test(42).
+[{<<"name">>,<<"Jack (\"Bee\") Nimble">>},
+{<<"format">>,
+  [{<<"type">>,<<"rect">>},
+   {<<"widths">>,[1920,1600]},
+   {<<"height">>,-1080},
+   {<<"interlace">>,false},
+   {<<"frame rate">>,42}]}]
+----
+
+The AST generated by +parse_teansfom/2+ must correspond to valid
+Erlang code. Unless you apply several parse transforms, which is
+possible. The validity of the code is checked by the following
+compiler pass.
+
+==== Compiler Pass: Linter
+
+The linter (+erl_lint.erl+) generaters warnings for syntactically
+correct but otherwise bad code, like "export_all flag enabled".
+
+
+==== Compiler Pass: Save AST
+
+In order to enable debugging of a module, you can "debug compile" the
+module, that is pass the option +debug_info+ to the compiler. The
+abstract syntax tree will then be saved by the "Save AST" until the
+end of the compilation, where it will be written to the .beam file.
+
+It is important to note that the code is saved before any
+optimisations are appilied, so if there is a bug in an optimisation
+pass in the compiler and you run code in the debugger you will get a
+diffferent behavior. If you are implementing your own compiler
+optimisations this can trick you up badly.
+
+==== Compiler Pass: Expand
+
+In the expand phase source erlang constructs, such as records, are
+expanded to lower level erlang constructs. Compiler options,
+"+-compile(...)+", are also _expanded_ to meta data.
+
+==== Compiler Pass: Core Erlang
+
+Core Erlang is a strict functional language suitable for compiler
+optimizations. It makes code transformations easier by reducing the
+number of ways to express the same operation. One way it does this is
+by introducing _let_ and _letrec_ expressions to make scoping more
+explicit.
+
+Core Erlang is the best target for a language you want to run in
+ERTS. It changes very seldom and it contains all aspects of Erlang in
+a clean way. If you target the beam instruction set directly you will
+have to deal with much more detail, and that instruction set usually
+changes slightly between each major release of ERTS. If you on the other
+hand target Erlang directly you will be more restricted in what you
+can describe, and you will also have to deal with more details, since
+ Core Erlang is a cleaner language.
+
+To compile an Erlang file to core you can give the option "to_core",
+note though that this writes the Erlang core program to a file with
+the ".core" extension. To compile an Erlang core program from a ".core"
+file you can give the option "from_core" to the compiler.
+
+----
+1> c(world, to_core).
+** Warning: No object file created - nothing loaded **
+ok
+2> c(world, from_core).
+{ok,world}
+----
+
+Note that the +.core+ files are text files written in the human
+readable core format. To get the core program as an Erlang term
+you can add the +binary+ option to the compilation.
+
+==== Compiler Pass: Kernel Erlang
+Kernel Erlang is a flat version of Core Erlang with a few differences.
+For example, each variable is unique and the scope is a whole function.
+Pattern matching is compiled to more primitive operations.
+
+
+==== Compiler Pass: BEAM Code
+The last step of a normal compilation is the external beam code
+format.  Some low level optimizations such as dead code elimination and
+peep hole optimisations are done on this level.
+
+The BEAM code is described in detail in
+xref:CH-Instructions[] and xref:AP-Instructions[]
+
+==== Compiler Pass: Native Code
+If you add the flag +native+ to the compilation, and you have a HiPE
+enabled runtime system, then the compiler will generate native code
+for your module and store the native code along with the beam code
+in the +.beam.+ file.
+
+== Other Compiler Tools
+
+There are a number of tools available to help you work with code
+generation and code manipulation. These tools are written in Erlang
+and not really part of the runtime system but they are very nice to
+know about if you are implementing another language on top of the
+BEAM.
+
+In this section we will cover three of the most useful code tools:
+the lexer -- Leex, the parser generator -- Yecc, and a general set
+of functions to manipulate abstract forms -- Syntax Tools.
+
+=== Leex
 
-// There are a number of tools available to help you work with code
-// generation and code manipulation. These tools are written in Erlang
-// and not really part of the runtime system but they are very nice to
-// know about if you are implementing another language on top of the
-// BEAM.
-
-// In this section we will cover three of the most useful code tools:
-// the lexer -- Leex, the parser generator -- Yecc, and a general set
-// of functions to manipulate abstract forms -- Syntax Tools.
-
-// === Leex
-
-// Leex is the Erlang lexer generator.
-// The lexer generator takes a description of a DFA from a definitions
-// file (<fileextension>xrl</fileextension>) and produces an Erlang
-// program that matches tokens described by the DFA.
-
-// The details of how to write a DFA definition for a tokenizer
-// is beyond the scope of this book. For a thorough explanation
-// I recommend the "Dragon book" (Compiler ... by Aho, Sethi and Ullman).
-// Other good resources are the man and info entry for "flex" the lexer program that
-// inspired leex, and the leex documentation itself.
-// If you have info and flex installed you can read the full manual by typing:
-
-// ~~~
-// > info flex
-// ~~~
-
-// The online Erlang documentation also has the leex manual
-// (see [yecc.html](http://erlang.org/doc/man/yecc.html)).
-
-// We can use the lexer generator to create an Erlang program which
-// recognizes JSON tokens. By looking at the JSON definition
-// http://www.ecma-international.org/publications/files/ECMA-ST/ECMA-404.pdf
-// we can see that there are only a handful of tokens that we need to handle.
-
-// <embed file="code/json_tokens.xrl" verbatim="yes"/>
-
-// By using the Leex compiler we can compile this DFA to Erlang code,
-// and by giving the option dfa_graph we also produce a dot-file
-// which can be viewed with e.g. Graphviz.
-
-// {:language="erlang"}
-// ~~~
-// 1> leex:file(json_tokens, [dfa_graph]).
-// {ok, "./json_tokens.erl"}
-// 2>
-// ~~~
-
-// You can view the DFA graph using for example dotty.
-
-// {:language="sh"}
-// ~~~
-// > dotty json_tokens.dot
-// ~~~
+Leex is the Erlang lexer generator.
+The lexer generator takes a description of a DFA from a definitions
+file (<fileextension>xrl</fileextension>) and produces an Erlang
+program that matches tokens described by the DFA.
 
-// ![](code/json_tokens.png)
+The details of how to write a DFA definition for a tokenizer
+is beyond the scope of this book. For a thorough explanation
+I recommend the "Dragon book" (Compiler ... by Aho, Sethi and Ullman).
+Other good resources are the man and info entry for "flex" the lexer program t
+inspired leex, and the leex documentation itself.
+If you have info and flex installed you can read the full manual by typing:
 
-// We can try our tokenizer on an example json file (test.json).
+----
+> info flex
+----
+
+The online Erlang documentation also has the leex manual
+(see [yecc.html](http://erlang.org/doc/man/yecc.html)).
+
+We can use the lexer generator to create an Erlang program which
+recognizes JSON tokens. By looking at the JSON definition
+http://www.ecma-international.org/publications/files/ECMA-ST/ECMA-404.pdf
+we can see that there are only a handful of tokens that we need to handle.
+
+include::code/compiler_chapter/src/json_tokens.xrl
+
+By using the Leex compiler we can compile this DFA to Erlang code,
+and by giving the option dfa_graph we also produce a dot-file
+which can be viewed with e.g. Graphviz.
+
+[source,erlang]
+----
+1> leex:file(json_tokens, [dfa_graph]).
+{ok, "./json_tokens.erl"}
+2>
+----
+
+You can view the DFA graph using for example dotty.
+
+[source, sh]
+----
+> dotty json_tokens.dot
+----
+
+image::code/compiler_chapter/json_tokens.png
+
+We can try our tokenizer on an example json file (test.json).
 
-// <embed file="code/test.json" verbatim="yes"/>
+include::code/compiler_chapter/src/test.json
 
 // First we need to compile our tokenizer, then we read the file
 // and convert it to a string. Finally we can use
diff --git a/genop.tab b/genop.tab
new file mode 100755
index 0000000..8124729
--- /dev/null
+++ b/genop.tab
@@ -0,0 +1,539 @@
+#
+# %CopyrightBegin%
+#
+# Copyright Ericsson AB 1998-2011. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# %CopyrightEnd%
+#
+BEAM_FORMAT_NUMBER=0
+
+#
+# Generic instructions, generated by the compiler.  If any of them change number,
+# arity or semantics, the format number above must be bumped.
+#
+
+## @spec label Lbl
+## @doc Specify a module local label.
+##      Label gives this code address a name (Lbl) and marks the start of
+##      a basic block.
+1: label/1
+
+## @spec func_info M F A
+## @doc Define a function M:F/A
+2: func_info/3
+
+3: int_code_end/0
+
+#
+# Function and BIF calls.
+#
+
+## @spec call Arity Label
+## @doc Call the function at Label.
+##      Save the next instruction as the return address in the CP register.
+4: call/2
+
+## @spec call_last Arity Label Dellocate
+## @doc Deallocate and do a tail recursive call to the function at Label.
+##      Do not update the CP register.
+##      Before the call deallocate Deallocate words of stack.
+5: call_last/3
+
+## @spec call_only Arity Label
+## @doc Do a tail recursive call to the function at Label.
+##      Do not update the CP register.
+6: call_only/2
+
+## @spec call_ext Arity Destination
+## @doc Call the function of arity Arity pointed to by Destination.
+##      Save the next instruction as the return address in the CP register.
+7: call_ext/2
+
+## @spec call_ext_last Arity Destination Deallocate
+## @doc Deallocate and do a tail call to function of arity Arity
+##      pointed to by Destination.
+##      Do not update the CP register.
+##      Deallocate Deallocate words from the stack before the call.
+8: call_ext_last/3
+
+## @spec bif0 Bif Reg
+## @doc Call the bif Bif and store the result in Reg.
+9: bif0/2
+
+## @spec bif1 Lbl Bif Arg Reg
+## @doc Call the bif Bif with the argument Arg, and store the result in Reg.
+##      On failure jump to Lbl.
+10: bif1/4
+
+## @spec bif2 Lbl Bif Arg1 Arg2 Reg
+## @doc Call the bif Bif with the arguments Arg1 and Arg2,
+##      and store the result in Reg.
+##      On failure jump to Lbl.
+11: bif2/5
+
+#
+# Allocating, deallocating and returning.
+#
+
+## @spec allocate StackNeed Live
+## @doc Allocate space for StackNeed words on the stack. If a GC is needed
+##      during allocation there are Live number of live X registers.
+##      Also save the continuation pointer (CP) on the stack.
+12: allocate/2
+
+## @spec allocate_heap StackNeed HeapNeed Live
+## @doc Allocate space for StackNeed words on the stack and ensure there is
+##      space for HeapNeed words on the heap. If a GC is needed
+##      save Live number of X registers.
+##      Also save the continuation pointer (CP) on the stack.
+13: allocate_heap/3
+
+## @spec allocate_zero StackNeed Live
+## @doc Allocate space for StackNeed words on the stack. If a GC is needed
+##      during allocation there are Live number of live X registers.
+##      Clear the new stack words. (By writing NIL.)
+##      Also save the continuation pointer (CP) on the stack.
+14: allocate_zero/2
+
+## @spec allocate_heap_zero StackNeed HeapNeed Live
+## @doc Allocate space for StackNeed words on the stack and HeapNeed words
+##      on the heap. If a GC is needed
+##      during allocation there are Live number of live X registers.
+##      Clear the new stack words. (By writing NIL.)
+##      Also save the continuation pointer (CP) on the stack.
+15: allocate_heap_zero/3
+
+## @spec test_heap HeapNeed Live
+## @doc Ensure there is space for HeapNeed words on the heap. If a GC is needed
+##      save Live number of X registers.
+16: test_heap/2
+
+## @spec init N
+## @doc  Clear the Nth stack word. (By writing NIL.)
+17: init/1
+
+## @spec deallocate N
+## @doc  Restore the continuation pointer (CP) from the stack and deallocate
+##       N+1 words from the stack (the + 1 is for the CP).
+18: deallocate/1
+
+## @spec return
+## @doc  Return to the address in the continuation pointer (CP).
+19: return/0
+
+#
+# Sending & receiving.
+#
+## @spec send
+## @doc  Send argument in x(0) as a message to the destination process in x(0).
+##       The message in x(1) ends up as the result of the send in x(0).
+20: send/0
+
+## @spec remove_message
+## @doc  Unlink the current message from the message queue and store a
+##       pointer to the message in x(0). Remove any timeout.
+21: remove_message/0
+
+## @spec timeout
+## @doc  Reset the save point of the mailbox and clear the timeout flag.
+22: timeout/0
+
+## @spec loop_rec Label Source
+## @doc  Loop over the message queue, if it is empty jump to Label.
+23: loop_rec/2
+
+## @spec loop_rec_end Label
+## @doc  Advance the save pointer to the next message and jump back to Label.
+24: loop_rec_end/1
+
+## @spec wait Label
+## @doc  Suspend the processes and set the entry point to the beginning of the
+##       receive loop at Label.
+25: wait/1
+
+## @spec wait_timeout Lable Time
+## @doc  Sets up a timeout of Time milllisecons and saves the address of the
+##       following instruction as the entry point if the timeout triggers.
+26: wait_timeout/2
+
+#
+# Arithmethic opcodes.
+#
+27: -m_plus/4
+28: -m_minus/4
+29: -m_times/4
+30: -m_div/4
+31: -int_div/4
+32: -int_rem/4
+33: -int_band/4
+34: -int_bor/4
+35: -int_bxor/4
+36: -int_bsl/4
+37: -int_bsr/4
+38: -int_bnot/3
+
+#
+# Comparision operators.
+#
+
+## @spec is_lt Lbl Arg1 Arg2
+## @doc Compare two terms and jump to Lbl if Arg1 is not less than Arg2.
+39: is_lt/3
+
+## @spec is_ge Lbl Arg1 Arg2
+## @doc Compare two terms and jump to Lbl if Arg1 is less than Arg2.
+40: is_ge/3
+
+## @spec is_eq Lbl Arg1 Arg2
+## @doc Compare two terms and jump to Lbl if Arg1 is not (numerically) equal to Arg2.
+41: is_eq/3
+
+## @spec is_ne Lbl Arg1 Arg2
+## @doc Compare two terms and jump to Lbl if Arg1 is (numerically) equal to Arg2.
+42: is_ne/3
+
+## @spec is_eq_exact Lbl Arg1 Arg2
+## @doc Compare two terms and jump to Lbl if Arg1 is not exactly equal to Arg2.
+43: is_eq_exact/3
+
+## @spec is_ne_exact Lbl Arg1 Arg2
+## @doc Compare two terms and jump to Lbl if Arg1 is exactly equal to Arg2.
+44: is_ne_exact/3
+
+#
+# Type tests.
+#
+
+## @spec is_integer Lbl Arg1
+## @doc Test the type of Arg1 and jump to Lbl if it is not an integer.
+45: is_integer/2
+
+## @spec is_float Lbl Arg1
+## @doc Test the type of Arg1 and jump to Lbl if it is not a float.
+46: is_float/2
+
+## @spec is_number Lbl Arg1
+## @doc Test the type of Arg1 and jump to Lbl if it is not a number.
+47: is_number/2
+
+## @spec is_atom Lbl Arg1
+## @doc Test the type of Arg1 and jump to Lbl if it is not an atom.
+48: is_atom/2
+
+## @spec is_pid Lbl Arg1
+## @doc Test the type of Arg1 and jump to Lbl if it is not a pid.
+49: is_pid/2
+
+## @spec is_reference Lbl Arg1
+## @doc Test the type of Arg1 and jump to Lbl if it is not a reference.
+50: is_reference/2
+
+## @spec is_port Lbl Arg1
+## @doc Test the type of Arg1 and jump to Lbl if it is not a port.
+51: is_port/2
+
+## @spec is_nil Lbl Arg1
+## @doc Test the type of Arg1 and jump to Lbl if it is not nil.
+52: is_nil/2
+
+## @spec is_binary Lbl Arg1
+## @doc Test the type of Arg1 and jump to Lbl if it is not a binary.
+53: is_binary/2
+
+54: -is_constant/2
+
+## @spec is_list Lbl Arg1
+## @doc Test the type of Arg1 and jump to Lbl if it is not a cons or nil.
+55: is_list/2
+
+## @spec is_nonempty_list Lbl Arg1
+## @doc Test the type of Arg1 and jump to Lbl if it is not a cons.
+56: is_nonempty_list/2
+
+## @spec is_tuple Lbl Arg1
+## @doc Test the type of Arg1 and jump to Lbl if it is not a tuple.
+57: is_tuple/2
+
+## @spec test_arity Lbl Arg1 Arity
+## @doc Test the arity of (the tuple in) Arg1 and jump
+## to Lbl if it is not equal to Arity.
+58: test_arity/3
+
+#
+# Indexing & jumping.
+#
+
+## @spec select_val Arg FailLabel Destinations
+## @doc Jump to the destination label corresponding to Arg
+##      in the Destinations list, if no arity matches, jump to FailLabel.
+59: select_val/3
+
+## @spec select_tuple_arity Tuple FailLabel Destinations
+## @doc Check the arity of the tuple Tuple and jump to the corresponding
+##      destination label, if no arity matches, jump to FailLabel.
+60: select_tuple_arity/3
+
+## @spec jump Label
+## @doc Jump to Label.
+61: jump/1
+
+#
+# Catch.
+#
+62: catch/2
+63: catch_end/1
+
+#
+# Moving, extracting, modifying.
+#
+
+## @spec move Source Destination
+## @doc Move the source Source (a literal or a register) to
+##      the destination register Destination.
+64: move/2
+
+## @spec get_list  Source Head Tail
+## @doc  Get the head and tail (or car and cdr) parts of a list
+##       (a cons cell) from Source and put them into the registers
+##       Head and Tail.
+65: get_list/3
+
+## @spec get_tuple_element Source Element Destination
+## @doc  Get element number Element from the tuple in Source and put
+##       it in the destination register Destination.
+66: get_tuple_element/3
+
+## @spec set_tuple_element NewElement Tuple Position
+## @doc  Update the element at postition Position of the tuple Tuple
+##       with the new element NewElement.
+67: set_tuple_element/3
+
+#
+# Building terms.
+#
+68: -put_string/3
+69: put_list/3
+70: put_tuple/2
+71: put/1
+
+#
+# Raising errors.
+#
+72: badmatch/1
+73: if_end/0
+74: case_end/1
+
+#
+# 'fun' support.
+#
+## @spec call_fun Arity
+## @doc Call a fun of arity Arity. Assume arguments in
+##      registers x(0) to x(Arity-1) and that the fun is in x(Arity).
+##      Save the next instruction as the return address in the CP register.
+75: call_fun/1
+
+76: -make_fun/3
+
+## @spec is_function Lbl Arg1
+## @doc Test the type of Arg1 and jump to Lbl if it is not a
+##      function (i.e. fun or closure).
+77: is_function/2
+
+#
+# Late additions to R5.
+#
+
+## @spec call_ext_only Arity Label
+##      Do a tail recursive call to the function at Label.
+##      Do not update the CP register.
+78: call_ext_only/2
+
+#
+# Binary matching (R7).
+#
+79: -bs_start_match/2
+80: -bs_get_integer/5
+81: -bs_get_float/5
+82: -bs_get_binary/5
+83: -bs_skip_bits/4
+84: -bs_test_tail/2
+85: -bs_save/1
+86: -bs_restore/1
+
+#
+# Binary construction (R7A).
+#
+87: -bs_init/2
+88: -bs_final/2
+89: bs_put_integer/5
+90: bs_put_binary/5
+91: bs_put_float/5
+92: bs_put_string/2
+
+#
+# Binary construction (R7B).
+#
+93: -bs_need_buf/1
+
+#
+# Floating point arithmetic (R8).
+#
+94: fclearerror/0
+95: fcheckerror/1
+96: fmove/2
+97: fconv/2
+98: fadd/4
+99: fsub/4
+100: fmul/4
+101: fdiv/4
+102: fnegate/3
+
+# New fun construction (R8).
+103: make_fun2/1
+
+# Try/catch/raise (R10B).
+104: try/2
+105: try_end/1
+106: try_case/1
+107: try_case_end/1
+108: raise/2
+
+# New instructions in R10B.
+109: bs_init2/6
+110: -bs_bits_to_bytes/3
+111: bs_add/5
+112: apply/1
+113: apply_last/2
+## @spec is_boolean Lbl Arg1
+## @doc Test the type of Arg1 and jump to Lbl if it is not a Boolean.
+114: is_boolean/2
+
+# New instructions in R10B-6.
+## @spec is_function2 Lbl Arg1 Arity
+## @doc Test the type of Arg1 and jump to Lbl if it is not a
+##      function of arity Arity.
+115: is_function2/3
+
+# New bit syntax matching in R11B.
+
+116: bs_start_match2/5
+117: bs_get_integer2/7
+118: bs_get_float2/7
+119: bs_get_binary2/7
+120: bs_skip_bits2/5
+121: bs_test_tail2/3
+122: bs_save2/2
+123: bs_restore2/2
+
+# New GC bifs introduced in R11B.
+
+## @spec gc_bif1 Lbl Live Bif Arg Reg
+## @doc Call the bif Bif with the argument Arg, and store the result in Reg.
+##      On failure jump to Lbl.
+##      Do a garbage collection if necessary to allocate space on the heap
+##      for the result (saving Live number of X registers).
+124: gc_bif1/5
+
+## @spec gc_bif2 Lbl Live Bif Arg1 Arg2 Reg
+## @doc Call the bif Bif with the arguments Arg1 and Arg2,
+##      and store the result in Reg.
+##      On failure jump to Lbl.
+##      Do a garbage collection if necessary to allocate space on the heap
+##      for the result (saving Live number of X registers).
+125: gc_bif2/6
+
+# Experimental new bit_level bifs introduced in R11B.
+# NOT used in R12B.
+126: -bs_final2/2
+127: -bs_bits_to_bytes2/2
+
+# R11B-4
+128: -put_literal/2
+
+# R11B-5
+## @spec is_bitstr Lbl Arg1
+## @doc Test the type of Arg1 and jump to Lbl if it is not a bit string.
+129: is_bitstr/2
+
+# R12B
+130: bs_context_to_binary/1
+131: bs_test_unit/3
+132: bs_match_string/4
+133: bs_init_writable/0
+134: bs_append/8
+135: bs_private_append/6
+
+## @spec trim N Remaining
+## @doc Reduce the stack usage by N words,
+##      keeping the CP on the top of the stack.
+136: trim/2
+
+137: bs_init_bits/6
+
+# R12B-5
+138: bs_get_utf8/5
+139: bs_skip_utf8/4
+
+140: bs_get_utf16/5
+141: bs_skip_utf16/4
+
+142: bs_get_utf32/5
+143: bs_skip_utf32/4
+
+144: bs_utf8_size/3
+145: bs_put_utf8/3
+
+146: bs_utf16_size/3
+147: bs_put_utf16/3
+
+148: bs_put_utf32/3
+
+# R13B03
+
+149: on_load/0
+
+# R14A
+
+## @spec recv_mark Label
+## @doc  Save the end of the message queue and the address of
+##       the label Label so that a recv_set instruction can start
+##       scanning the inbox from this position.
+150: recv_mark/1
+
+## @spec recv_set Label
+## @doc Check that the saved mark points to Label and set the
+##      save pointer in the message queue to the last position
+##      of the message queue saved by the recv_mark instruction.
+151: recv_set/1
+
+## @spec gc_bif3 Lbl Live Bif Arg1 Arg2 Arg3 Reg
+## @doc Call the bif Bif with the arguments Arg1, Arg2 and Arg3,
+##      and store the result in Reg.
+##      On failure jump to Lbl.
+##      Do a garbage collection if necessary to allocate space on the heap
+##      for the result (saving Live number of X registers).
+152: gc_bif3/7
+
+# R15A
+
+153: line/1
+
+# R17
+
+154: put_map_assoc/5
+155: put_map_exact/5
+156: is_map/2
+157: has_map_fields/3
+158: get_map_elements/3
diff --git a/memory.asciidoc b/memory.asciidoc
new file mode 100644
index 0000000..3ce637e
--- /dev/null
+++ b/memory.asciidoc
@@ -0,0 +1,1410 @@
+[[CH-Memory]]
+== The Memory Subsystem: Stacks, Heaps and Garbage Collection (7p)
+
+// Cover these fields:
+// high_water, old_hend, old_htop, old_heap,
+// gen_gcs, max_gen_gcs, off_heap,  mbuf, mbuf_sz, psd, bin_vheap_sz,
+// bin_vheap_mature, bin_old_vheap_sz, bin_old_vheap
+
+
+Before we dive into the memory subsystem of ERTS, we need to have some
+basic vocabulary and understanding of the general memory layout of a
+program in a modern operating system. In this review section I will
+assume the program is compiled to an ELF executable and running on
+Linux on something like an IA-32/AMD64 architecture. The layout and
+terminology is basically the same for all operating systems that ERTS
+compile on.
+
+A program's memory layout looks something like this:
+
+++++
+<pre data-type="programlisting">
+ high
+ addresses
+        +--------------+
+        |   Arguments  |
+        |     ENV      |
+        +--------------+
+        |    Stack     | --+
+        |      |       |   | Can grow
+        |      v       |   | dynamically
+        |              | --+
+        +--------------+
+        |              | -------------------------------+
+        +--------------+                                |
+        |    memory    |                                |
+        |      map     | -- files or anonymous          |
+        |    segment   |                                |
+        +--------------+                                |
+        |              |                                | Memory
+        +--------------+                                | Mapping
+        | Thread Stack | --+                            | Region
+        |      |       |   | Statically allocated       |
+        |      v       |   | on thread start.           |
+        |              | --+                            |
+        +--------------+                                |
+        |              |                                |
+        +--------------+                                |
+        | Thread Stack | --+                            |
+        |      |       |   | Statically allocated       |
+        |      v       |   | on thread start.           |
+        |              | --+                            |
+        +--------------+                                |
+        |              | -------------------------------+
+        +--------------+ brk
+        |              | --+
+        |      ^       |   | Can grow
+        |      |       |   | dynamically
+        |     Heap     | --+
+        +--------------+ start_brk
+        |     BSS      | --  Static variables initialized to zero
+        +--------------+
+        |     Data     | --+
+        +--------------+   | Binary (disk image)
+        |     Code     | --+
+        +--------------+
+ low
+ addresses
+</pre>
+++++
+
+Even though this picture might look daunting it is still a
+simplification. (For a full understanding of the memory subsystem read
+a book like "Understanding the Linux Kernel" or "Linux System
+Programming") What I want you to take away from this is that there are
+two types of dynamically allocatable memory: the heap and memory
+mapped segments. I will try to call this heap the _C-heap_ from now
+on, to distinguish it from an Erlang process heap. I will call a
+memory mapped segment for just a _segment_, and any of the stacks in
+this picture for the _C-stack_.
+
+The C-heap is allocated through malloc and a segment is allocated with
+mmap.
+
+// TODO: should this be in the preface?
+
+. A note on pictures of memory
+****
+
+*Note* When drawing overview pictures of system memory and stacks we
+will follow the convention that memory addresses grows upward. That is
+low memory addresses on the bottom of the page and high memory
+addresses on the top of the page. (Stacks most often grow downward
+starting at high addresses, so that new elements are pushed at the
+lowest address.)
+
+However when we draw a c-structure we will draw the fields from the
+top and down, even though the first field of the structure will be
+at the lowest address and the following fields at higher addresses.
+So pictures of structures have low address at the top of the page
+and high address at the bottom of the page.
+
+This means that a picture of a c-structure and a picture of a memory
+area will have their address positions on the page mirrored. This becomes
+somewhat confusing when we try to pictures structures and heaps in the
+same picture.
+
+****
+
+
+
+=== The memory subsystem
+
+Now that we dive into the memory subsystem it will once again
+be apparent that ERTS is more like an operating system than just a
+programming language environment. Not only does ERTS provide a garbage
+collector for Erlang terms on the Erlang process level, but it also
+provides a plethora of low level memory allocators and memory
+allocation strategies.
+
+For an overview of memory allocators see the erts_alloc documentation
+at: http://www.erlang.org/doc/man/erts_alloc.html
+
+All these allocators also comes with a number of parameters that
+can be used to tweak their behavior, and this is probably one
+of the most important areas from an operational point of view.
+This is where we can configure the system behavior to fit anything
+from a small embedded control system (like a Raspberry Pi) to an
+Internet scale 2TB database server.
+
+There are currently eleven different allocators, six different
+allocation strategies, and more than 18 other different settings,
+some of which are taking arbitrary numerical values. This
+means that there basically is an infinite number of possible
+configurations. (OK, strictly speaking it is not infinite, since
+each number is bounded, but there are more configurations
+than you can shake a stick at.)
+
+In order to be able to use these settings in any meaningful way
+we will have to understand how these allocators work and
+how each setting impacts the performance of the allocator.
+
+The erts_alloc manual goes as far as to give the following warning:
+
+.Warning
+[quote, Ericsson AB, http://www.erlang.org/doc/man/erts_alloc.html]
+____
+*Warning*
+
+Only use these flags if you are absolutely sure what you are
+doing. Unsuitable settings may cause serious performance degradation
+and even a system crash at any time during operation.
+____
+
+Making you absolutely sure that you know what you are doing, that is
+what this chapter is about.
+
+Oh yes, we will also go into details of how the garbage collector
+works.
+
+
+[[SS-Memory_Allocators]]
+=== Different type of memory allocators
+The Erlang run-time system is trying its best to handle memory
+in all situations and under all types of loads, but there are
+always corner cases. In this chapter we will look at the details
+of how memory is allocated and how the different allocators work.
+With this knoweledge and some tools that we will look at later
+you should be able to detect and fix problems if your system
+ends up in one of these corner cases.
+
+For a nice story about the troubles the system might get into
+and how to analyze and correct the behavior read
+Fred H&eacute;bert's essay https://blog.heroku.com/archives/2013/11/7/logplex-down-the-rabbit-hole["Troubleshooting Down the Logplex Rabbit Hole"].
+
+
+When we are talking about a memory allocator in this book we
+have a specific meaning in mind. Each memory allocator manage
+allocations and deallocations of memory of a certain type.
+Each allocator is intended for a specific type of data and is
+often specialized for one size of data.
+
+Each memory allocator implements the allocator interface but
+can used different algorithms and settings for the actual
+memory allocation.
+
+The goal with having different allocators is to reduce
+fragmentation, by grouping allocations of the same size,
+and to increase performance, by making frequent allocations
+cheap.
+
+There are two special, fundamental or generic, memory allocator types
+_sys_alloc_ and _mseg_alloc_, and nine specific allocators implemented
+through the _alloc_util_ framework.
+
+In the following sections we will go though the different allocators,
+with a little detour into the general framework for allocators
+(alloc_util).
+
+Each allocator has several names used in the documentation and in the
+C code. See xref:table-allocators[] for a short list of all allocators
+and their names. The C-name is used in the C-code to refer to the
+allocator. The Type-name is used in erl_alloc.types to bind allocation
+types to an allocator. The Flag is the letter used for setting
+parameters of that allocator when starting Erlang.
+
+
+.List of memory allocators.
+[[table-allocators]]
+[options="header"]
+|===============================================================================
+|Name                    | Description           | C-name     | Type-name | Flag
+| Basic allocator        | malloc interface      | sys_alloc  | SYSTEM    | Y
+|Memory segment allocator| mmap interface        | mseg_alloc | -         | M
+| Temporary allocator    | Temporary allocations | temp_alloc | TEMPORARY | T
+| Heap allocator         | Erlang heap data      | eheap_alloc| EHEAP     | H
+| Binary allocator       | Binary data           |binary_alloc| BINARY    | B
+| ETS allocator          | ETS data              | ets_alloc  | ETS       | E
+| Driver allocator       | Driver data           |driver_alloc| DRIVER    | R
+| Short lived allocator  | Short lived memory    | sl_alloc   |SHORT_LIVED| S
+| Long lived allocator   | Long lived memory     | ll_alloc   |LONG_LIVED | L
+| Fixed allocator        | Fixed size data       | fix_alloc  |FIXED_SIZE | F
+| Standard allocator     | For most other data   | std_alloc  | STANDARD  | D
+|===============================================================================
+
+
+
+==== The basic allocator: sys_alloc
+
+The allocator sys_alloc can not be disabled, and is basically a
+straight mapping to the underlying OS malloc implementation in
+libc.
+
+If a specific allocator is disabled then sys_alloc is used instead.
+
+All specific allocators uses either sys_alloc or mseg_alloc to
+allocate memory from the operating system as needed.
+
+When memory is allocated from the OS sys_alloc can add (pad) a fixed
+number of kilobytes to the requested number. This can reduce the
+number system calls by over allocating memory. The default padding
+is zero.
+
+When memory is freed, sys_alloc will keep some free memory allocated
+in the process. The size of this free memory is called the trim
+threshold, and the default is 128 kilobytes. This also reduces the
+number of system calls at the cost of a higher memory footprint.
+This means that if you are running the system with the default
+settings you can experience that the Beam process does not give
+memory back to the OS directly as memory is freed up.
+
+Memory areas allocated by sys_alloc are stored in the C-heap of the
+beam process which will grow as needed through system calls to brk.
+
+==== The memory segment allocator: mseg_alloc
+
+If the underlying operating system supports mmap a specific memory
+allocator can use mseg_alloc instead of sys_alloc to allocate
+memory from the operating system.
+
+Memory areas allocated through mseg_alloc are called segments.  When a
+segment is freed it is not immediately returned to the OS, instead it
+is kept in a segment cache.
+
+When a new segment is allocated a cached segment is reused if
+possible, i.e. if it is the same size or larger than the requested
+size but not too large. The value of _absolute max cache bad fit_
+determines the number of kilobytes of extra size which is considered
+not too large. The default is 4096 kilobytes.
+
+In order not to reuse a 4096 kilobyte segment for really small
+allocations there is also a _relative_max_cache_bad_fit_ value which
+states that a cached segment may not be used if it is more than
+that many percent larger. The default value is 20 percent. That
+is a 12 KB segment may be used when asked for a 10 KB segment.
+
+The number of entries in the cache defaults to 10 but can be
+set to any value from zero to thirty.
+
+==== The memory allocator framework: alloc_util
+
+Building on top of the two generic allocators (sys_alloc and mseg_alloc)
+is a framework called _alloc_util_ which is used to implement specific
+memory allocators for different types of usage and data.
+
+The framework is implemented in _erl_alloc_util.[ch]_ and the different
+allocators used by ERTS are defined in erl_alloc.types in
+the directory "erts/emulator/beam/".
+
+In a smp system there is usually one allocator of each type per
+scheduler thread.
+
+The smallest unit of memory that an allocator work with is called a
+_block_.  When you call an allocator to allocate a certain amount of
+memory what you get back is a block. It is also blocks that you give
+as an argument to the allocator when you want to deallocate memory.
+
+The allocator does not allocate blocks from the operating system
+directly though. Instead the allocator allocates a _carrier_ from the
+operating system, either through sys_alloc or through mseg_alloc,
+which in turn uses malloc or mmap. If sys_alloc is used the carrier
+is placed on the C-heap and if mseg_alloc is used the carrier
+is placed in a segment.
+
+Small blocks are placed in a multiblock carrier. A multiblock carrier
+can as the name suggests contain many blocks. Larger blocks are placed
+in a singleblock carrier, which as the name implies on contains one
+block.
+
+What's considered a small and a large block is determined by the
+parameter _singleblock carrier threshold_ (+sbct+), see the list
+of system flags below.
+
+Most allocators also have one "main multiblock carrier" which is never
+deallocated.
+
+++++
+<pre data-type="programlisting">
+ high
+ addresses
+           |FREE OS MEMORY |
+           +---------------+ brk
+           |   FREE HEAP   |       | less than MYtt kb
+           +---------------+
+     /     |  Unused PAD   |  | multiple of Muycs
+    |      |---------------|  |
+    S      |               |  |    |
+singleblock|               |  |    |
+ carrier 1 |     Block     |  |    | larger than MSsbct kb
+    |      |               |  |    |
+     \     |               |  |    |
+           +---------------+
+     /     |Free in Carrier|       |
+    |      |---------------|       |
+    S      |               |       |
+  main     |               |       |
+multiblock |     Block 2   |       | MSmmbcs kb
+ carrier   |---------------|       |
+    |      |               |       |
+     \     |     Block 1   |       |
+           +---------------+
+           |               |
+           |    U S E D    |
+           |               |
+           +---------------+ start_brk
+               C-Heap
+ low
+ addresses
+</pre>
+++++
+
+
+// Want most data un multiblock carriers mbc
+// increase sbct, then increase smbcs and lmbcs
+
+===== Memory allocation strategies
+
+++++
+<!--
+Are you intending for readers to take advantage of these allocation strategies in their code? If so, this section needs to be much more prominent, and not a subheading in a reference section.  - bmacdonald
+-->
+++++
+
+
+To find a free block of memory in a multi block carrier an
+allocation strategy is used. Each type of allocator has
+a default allocation strategy, but you can also set the
+allocation strategy with the +as+ flag.
+
+The Erlang Run-Time System Application Reference Manual lists
+the following allocation strategies:
+
+[quote,'http://www.erlang.org/doc/man/erts_alloc.html[erts_alloc]']
+__________________________
+
+_Best fit_: Find the smallest block that satisfies the requested block size.
+(bf)
+
+_Address order best fit_: Find the smallest block that satisfies the
+requested block size. If multiple blocks are found, choose the one
+with the lowest address.
+(aobf)
+
+_Address order first fit_: Find the block with the lowest address that
+satisfies the requested block size.
+(aoff)
+
+_Address order first fit carrier best fit_
+Find the carrier with the lowest address that can satisfy the
+requested block size, then find a block within that carrier using the
+"best fit" strategy.  (aoffcbf)
+
+_Address order first fit carrier address order best fit_: Find the
+carrier with the lowest address that can satisfy the requested block
+size, then find a block within that carrier using the "address order
+best fit" strategy.
+ aoffcaobf (address order first fit carrier address order best fit)
+
+
+_Good fit_: Try to find the best fit, but settle for the best fit found
+during a limited search.
+(gf)
+
+_A fit_: Do not search for a fit, inspect only one free block to see if
+it satisfies the request. This strategy is only intended to be used
+for temporary allocations.
+(af)
+
+__________________________
+
+
+
+
+
+==== The temporary allocator: temp_alloc
+
+The allocator _temp_alloc_, is used for temporary
+allocations. That is very short lived allocations.  Memory allocated
+by temp_alloc may not be allocated over a Erlang process context
+switch.
+
+You can use temp_alloc as a small scratch or working area while doing
+some work within a function. Look at it as an extension of the C-stack
+and free it in the same way. That is, to be on the safe side, free
+memory allocated by temp_alloc before returning from the function that
+did the allocation. There is a note in erl_alloc.types saying that
+you should free a temp_alloc block before the emulator starts
+executing Erlang code.
+
+Note that no Erlang process running on the same scheduler as the
+allocator may start executing Erlang code before the block is freed.
+This means that you can not use a temporary allocation over a bif
+or nif trap (yield).
+
+In a default R16 smp system there is N+1 temp_alloc allocators where N
+is the number of schedulers. The temp_alloc uses the "A fit" (+af+)
+strategy. Since the allocation pattern of the temp_alloc basically is
+that of a stack (mostly of size 0 or 1), this strategy works fine.
+
+The temporary allocator is, in R16, used by the following types of
+data: TMP_HEAP, MSG_ROOTS, ROOTSET, LOADER_TEMP, NC_TMP, TMP,
+DCTRL_BUF, TMP_DIST_BUF, ESTACK, DB_TMP, DB_MC_STK, DB_MS_CMPL_HEAP,
+LOGGER_DSBUF, TMP_DSBUF, DDLL_TMP_BUF, TEMP_TERM, SYS_READ_BUF,
+ENVIRONMENT, CON_VPRINT_BUF.
+
+For an up to date list of allocation types allocated with each
+allocator, see erl_alloc.types
+(e.g. +grep TEMPORARY erts/emulator/beam/erl_alloc.types+).
+
+I will not go through each of these different types, but in
+general as you can guess by their names, they are temporary
+buffers or work stacks.
+
+
+==== The heap allocator: eheap_alloc
+
+The heap allocator, is used for allocating memory blocks
+where tagged Erlang terms are stored, such as Erlang process heaps
+(all generations), heap fragments, and the beam_registers.
+
+This is probably the memory areas you are most interested in as an
+Erlang developer or when tuning an Erlang system.  We will talk more
+about how these areas are managed in the upcoming sections on garbage
+collection and process memory. There we will also cover what a heap
+fragment is.
+
+
+==== The binary allocator: binary_alloc
+
+The binary allocator is used for, yes you guessed it, binaries.
+Binaries can be of quite varying sizes and have varying life spans.
+This allocator uses the _best fit_ allocation strategy by default.
+
+==== The ETS allocator: ets_alloc
+
+The ETS allocator is used for most ets related data,
+except for some short lived or temporary data used by ets tables-
+
+==== The driver allocator: driver_alloc
+
+The driver allocator is used for ports, linked in drivers and nifs.
+
+==== The short lived allocator: sl_alloc
+
+The short lived allocator is used for lists and buffers that are
+expected to be short lived. Short lived data can live longer than
+temporary data.
+
+// alloc_info_request async bif_timer_sl binary_buffer busy_caller
+// busy_caller_table code_ix_lock_q db_fixation db_match_spec_run_heap
+// db_proc_cleanup_state ethread_short_lived external_term_data
+// extra_port_list extra_root fd_list fixed_del gc_info_request
+// misc_aux_work misc_op_list pending_suspend pollset_update_req
+// port_names port_task port_task_handle_list prepared_code proc_list
+// ptab_list_chunk_info ptab_list_deleted_el ptab_list_pids ptimer_sl
+// re_stack re_subject sched_wall_time_request short_lived_thr_queue
+// sl_migration_paths ssb system_messages_queue temp_thr_prgr_data
+// tmp_cpu_ids unicode_buffer
+
+
+==== The long lived allocator: ll_alloc
+
+The long lived allocator is used for long lived data, such
+as atoms, modules, funs and long lived tables
+
+// atom_entry atom_tab atom_text aux_work_timeouts bif_timer_table code
+// code cpu_data cpu_groups_map cs_prog_path db_match_pseudo_proc
+// db_match_pseudo_proc db_tabs ddll_errcodes driver_event_state drv_tab
+// ethread_long_lived export_entry export_entry export_tab fd_status
+// fd_tab fp_exception fun_entry instr_info internal_async_data
+// ll_migration_paths ll_temp_term ll_temp_term long_lived_thr_queue
+// misc_aux_work_q module_entry module_tab poll_fds poll_result_events
+// pollset port_tab pre_alloc_data preloaded proc_lock_waiter proc_tab
+// process_interval run_queue_balancing run_queues scheduler_data
+// scheduler_data scheduler_sleep_info select_fds taint_list
+// thr_prgr_data thr_prgr_internal_data timer_wheel waiter_object
+
+
+
+==== The fixed size allocator: fix_alloc
+
+The fixed allocator is used for objects of a fixed size, such as PCBs,
+message refs and a few other. The fixed size allocator uses the
+_address order best fit_ allocation strategy by default.
+
+// driver_event_data_state driver_select_data_state monitor_sh msg_ref
+// nlink_sh proc sl_thr_q_element
+
+
+==== The standard allocator: std_alloc
+
+The standard allocator is used by the other types of data.
+(active_procs alloc_info_request arg_reg bif_timer_ll bits_buf bpd
+calls_buf db_heir_data db_heir_data db_named_table_entry dcache
+ddll_handle ddll_processes ddll_processes dist_entry dist_tab
+driver_lock ethread_standard fd_entry_buf fun_tab gc_info_request
+io_queue line_buf link_lh module_refs monitor_lh monitor_lh monitor_sh
+nlink_lh nlink_lh nlink_sh node_entry node_tab nodes_monitor
+port_data_heap port_lock port_report_exit port_specific_data proc_dict
+process_specific_data ptimer_ll re_heap reg_proc reg_tab
+sched_wall_time_request stack suspend_monitor thr_q_element thr_queue
+zlib )
+
+
+=== TODO: system flags for memory
+
+TODO
+
+=== Process Memory
+
+As we saw in xref:CH-Processes[] a process i really just a number
+of memory areas, in this chapter we will look a bit closer at how
+the stack, the heap and the mailbox are managed.
+
+The default size of the stack and heap is 233 words. This default
+size can be changed globally when starting Erlang through the
++ +h + flag. You can also set the minimum heap size by when starting
+a process with +spawn_opt+ by setting +min_heap_size+. More
+on this in xref:CH-Tweak[].
+
+Erlang terms are tagged as we saw in xref:CH-TypeSystem[], and when
+they are stored on the heap they are either cons cells or boxed
+objects.
+
+
+==== Term sharing
+
+Objects on the heap are passed by references within the context of one
+process.  If you call one function with a tuple as an argument, then
+only a tagged reference to that tuple is passed to the called
+function. When you build new terms you will also only use references
+to sub terms.
+
+For example if you have the string "hello" (which is the same as this
+list of integers: [104,101,108,108,111]) you would get a stack layout
+similar to xref:fig-list_layout[]
+
+
+[[fig-list_layout]]
+++++
+<pre data-type="programlisting">
+         ADR                               BINARY  VALUE  +  DESCRIPTION
+ hend -&gt;     +-------- -------- -------- --------+
+             |              ...                  |
+             |              ...                  |
+             |00000000 00000000 00000000 10000001| 128 + list tag  ---------------+
+ stop -&gt;     |                                   |                                |
+                                                                                  |
+ htop -&gt;     |                                   |                                |
+         132 |00000000 00000000 00000000 01111001| 120 + list tag  -------------- | -+
+         128 |00000000 00000000 00000110 10001111| (H) 104 bsl 4 + small int tag &lt;+  |
+         124 |00000000 00000000 00000000 01110001| 112 + list tag  ----------------- | -+
+         120 |00000000 00000000 00000110 01011111| (e) 101 bsl 4 + small int tag &lt;---+  |
+         116 |00000000 00000000 00000000 01110001| 112 + list tag  -------------------- | -+
+         112 |00000000 00000000 00000110 11001111| (l) 108 bsl 4 + small int tag &lt;------+  |
+         108 |00000000 00000000 00000000 01110001|  96 + list tag  ----------------------- | -+
+         104 |00000000 00000000 00000110 11001111| (l) 108 bsl 4 + small int tag &lt;---------+  |
+         100 |11111111 11111111 11111111 11111011| NIL                                        |
+          96 |00000000 00000000 00000110 11111111| (o) 111 bsl 4 + small int tag &lt;------------+
+             |                ...                |
+ heap -&gt;     +-----------------------------------+
+</pre>
+++++
+
+If you then create a tuple with two instances of the list, all that is repeated is
+the tagged pointer to the list: 00000000000000000000000001000001. The code
+
+----
+L = [104, 101, 108, 108, 111],
+T = {L, L}.
+----
+
+would result in a memory layout as in xref:fig-sharing[]. That is,
+a boxed header saying that this is a tuple of size 2 and then two
+pointers to the same list.
+
+[[fig-sharing]]
+----
+ADR VALUE                            DESCRIPTION
+144 00000000000000000000000001000001 128+CONS
+140 00000000000000000000000001000001 128+CONS
+136 00000000000000000000000010000000 2+ARITYVAL
+----
+
+This is nice, since it is cheap to do and uses very little space. But if
+you send the tuple to another process or do any other type of IO, or any
+operations which results in something called a _deep copy_, then the
+data structure is expanded. So if we send out tuple +T+ to another process
+P2 (+P2 ! T+) then the heap of T2 will look like in xref:fig-sharing-expanded[].
+
+[[fig-sharing-expanded]]
+----
+ ..
+----
+
+You can quickly bring down your Erlang node by expanding a highly shared term,
+see xref:listing-share[].
+
+----
+-module(share).
+
+-export([share/2, size/0]).
+
+share(0, Y) -> {Y,Y};
+share(N, Y) -> [share(N-1, [N|Y]) || _ <- Y].
+
+size() ->
+    T = share:share(5,[a,b,c]),
+    {{size, erts_debug:size(T)},
+     {flat_size, erts_debug:flat_size(T)}}.
+
+
+
+ 1> timer:tc(fun() -> share:share(10,[a,b,c]), ok end).
+ {1131,ok}
+
+ 2> share:share(10,[a,b,c]), ok.
+ ok
+
+ 3> byte_size(list_to_binary(test:share(10,[a,b,c]))), ok.
+ HUGE size (13695500364)
+ Abort trap: 6
+
+----
+
+You can calculate the memory size of a shared term and the size of the
+expanded size of the term with the functions +erts_debug:size/1+ and
++erts_debug:flat_size/1+.
+
+----
+> share:size().
+{{size,19386},{flat_size,94110}}
+
+----
+
+For most applications this is not a problem, but you should be aware
+of the problem, which can come up in many situations. A deep copy is
+used for IO, ETS tables, binary_to_term, and message passing.
+
+Let us look in more detail how message passing works.
+
+==== Message passing
+
+When a process P1 sends a message M to another (local) process P2, the
+process P1 first calculates the flat size of M. Then it allocates a
+new message buffer of that size by doing a heap_alloc of a heap_frag in
+the local scheduler context.
+
+Given the code in xref:listing-send[] the state of the system could
+look like in xref:fig-pre_send[] just before the send in p1/1.
+
+
+[[fig-pre_send]]
+++++
+<pre data-type="programlisting">P1
+
+    x0       |00000000 00000000 00000000 00100011| Pid 2
+    x1       |00000000 00000000 00000000 01001010| 136 + boxed tag -----------+
+                                                                              |
+                                                                              |
+         ADR                               BINARY  VALUE  +  DESCRIPTION      |
+ hend -&gt;     +-------- -------- -------- --------+                            |
+             |              ...                  |                            |
+             |              ...                  |                            |
+ stop -&gt;     |                                   |                            |
+                                                                              |
+ htop -&gt;     |                                   |                            |
+         144 |00000000 00000000 00000000 01000001| 128+CONS        ---------------+
+         140 |00000000 00000000 00000000 01000001| 128+CONS        ---------------+
+         136 |00000000 00000000 00000000 10000000| 2+ARITYVAL             &lt;---+   |
+         132 |00000000 00000000 00000000 01111001| 120+CONS        -------------- | -+
+         128 |00000000 00000000 00000110 10001111| (H) 104 bsl 4 + small int tag &lt;+  |
+         124 |00000000 00000000 00000000 01110001| 112+CONS        ----------------- | -+
+         120 |00000000 00000000 00000110 01011111| (e) 101 bsl 4 + small int tag &lt;---+  |
+         116 |00000000 00000000 00000000 01110001| 112+CONS        -------------------- | -+
+         112 |00000000 00000000 00000110 11001111| (l) 108 bsl 4 + small int tag &lt;------+  |
+         108 |00000000 00000000 00000000 01110001|  96+CONS        ----------------------- | -+
+         104 |00000000 00000000 00000110 11001111| (l) 108 bsl 4 + small int tag &lt;---------+  |
+         100 |11111111 11111111 11111111 11111011| NIL                                        |
+          96 |00000000 00000000 00000110 11111111| (o) 111 bsl 4 + small int tag &lt;------------+
+             |                ...                |
+ heap -&gt;     +-----------------------------------+
+
+
+P2</pre>
+++++
+
+When P1 start sending the message M to P2. It (through the code in
+erl_message.c) first calculates the flat size of M (which in our example is
+23 words)footnote:[We ignore tracing here which will add a trace token
+to the size of the message, and always use a heap fragment.]. 
+Then (in a SMP system) if it can take a lock on P2 and there is enough
+room on the heap of P2 it will copy the message to the heap of P2.
+
+If P2 is running (or exiting) or there isn't enough space on the heap,
+then a new heap fragment is allocated
+(of sizeof ErlHeapFragment - sizeof(Eterm) + 23*sizeof(Eterm))
+footnote:[The -sizeof(Eterm) comes from mem in ErlHeapFragment already
+having the size of 1 Eterm] which after initialization will look like:
+
+----
+erl_heap_fragment:
+    ErlHeapFragment* next;	  NULL
+    ErlOffHeap off_heap:
+      erl_off_heap_header* first; NULL
+      Uint64 overhead;               0
+    unsigned alloc_size;	    23
+    unsigned used_size;             23
+    Eterm mem[1];		     ?
+      ... 22 free words
+----
+
+Then the message is copied into the heap fragment:
+
+
+++++
+<pre data-type="programlisting">erl_heap_fragment:
+    ErlHeapFragment* next;	  NULL
+    ErlOffHeap off_heap:
+      erl_off_heap_header* first; Boxed tag+&amp;mem+2*WS-+
+      Uint64 overhead;               0                |
+    unsigned alloc_size;	    23                |
+    unsigned used_size;             23                |
+    Eterm mem:                    2+ARITYVAL   &lt;------+
+                                  &amp;mem+3*WS+1  ---+
+                                  &amp;mem+13*WS+1 ------+
+                                  (H*16)+15    &lt;--+  |
+                                  &amp;mem+5*WS+1  --+   |
+                                  (e*16)+15    &lt;-+   |
+                                  &amp;mem+7*WS+1  ----| |
+                                  (l*16)+15    &lt;---+ |
+                                  &amp;mem+9*WS+1  ---+  |
+                                  (l*16)+15    &lt;--+  |
+                                  &amp;mem+11*WS+1 ----+ |
+                                  (o*16)+15    &lt;---+ |
+                                  NIL                |
+                                  (H*16)+15    &lt;-----+
+                                  &amp;mem+15*WS+1 --+
+                                  (e*16)+15    &lt;-+
+                                  &amp;mem+17*WS+1 ----|
+                                  (l*16)+15    &lt;---+
+                                  &amp;mem+19*WS+1 ---+
+                                  (l*16)+15    &lt;--+
+                                  &amp;mem+21*WS+1 ----+
+                                  (o*16)+15    &lt;---+
+                                  NIL</pre>
+++++
+
+In either case a a new mbox (+ErlMessage+) is allocated, a lock
+ (+ERTS_PROC_LOCK_MSGQ+) is taken on the receiver and the message
+ on the heap or the in the new heap fragment is linked into the mbox.
+
+----
+ erl_mesg {
+    struct erl_mesg* next = NULL;
+    data:  ErlHeapFragment *heap_frag = bp;
+    Eterm m[0]            = message;
+ } ErlMessage;
+
+----
+
+Then the mbox is linked into the in message queue (+msg_inq+) of the
+receiver, and the lock is released. Note that +msg_inq.last+ points to
+the +next+ field of the last message in the queue. When a new mbox is
+linked in this next pointer is updated to point to the new mbox, and
+the last pointer is updated to point to the next field of the new
+mbox.
+
+[[SS-Binaries]]
+==== Binaries
+
+As we saw in xref:CH-TypeSystem[] there are four types of binaries
+internally. Three of these types, _heap binaries_, _sub binaries_ and
+_match contexts_ are stored on the local heap and handled by the
+garbage collector and message passing as any other object, copied as
+needed.
+
+
+===== Reference Counting
+
+The fourth type.  large binaries or _refc binaries_ on the other hand
+are partially stored outside of the process heap and they are
+reference counted.
+
+The payload of a refc binary is stored in memory allocated by the
+binary allocator. There is also a small reference to the payload call
+a ProcBin which is stored on the process heap. This reference is
+copied by message passing and by the GC, but the payload is
+untouched. This makes it relatively cheap to send large binaries to
+other processes since the whole binary doesn't need to be copied.
+
+All references through a ProcBin to a refc binary increases the
+reference count of the binary by one. All ProcBin objects on a
+process heap are linked together in a linked list. After a
+GC pass this linked list is traversed and the reference count
+of the binary is decreased with one for each ProcBin that
+has deceased. If the reference count of the refc binary
+reaches zero that binary is deallocated.
+
+Having large binaries reference counted and not copied by send or
+garbage collection is a big win, but there is one problem
+with having a mixed environment of garbage collection and
+reference counting. In a pure reference counted implementation
+the reference count would be reduce as soon as a reference to
+the object dies, and when the reference count reaches zero the
+object is freed. In the ERTS mixed environment a reference to a
+reference counted object does not die until a garbage collection
+detects that the reference is dead.
+
+This means that binaries, which has a tendency to be large or even
+huge, can hang around for a long time after all references to the
+binary are dead. Note that since binaries are allocated globally,
+all references from all processes need to be dead, that is all
+processes that has seen a binary need to do a GC.
+
+Unfortunately it is not always easy, as a developer, to see which
+processes have seen a binary in the GC sense of the word seen. Imagine
+for example that you have a load balancer that receives work items
+and dispatches them to workers.
+
+In xref:load_balancer[] there is an example of a loop which
+doesn't need to do GC. (See xref:listing-lb[] for a full example.)
+
+[[laod_balancer]]
+----
+loop(Workers, N) ->
+  receive
+    WorkItem ->
+       Worker = lists:nth(N+1, Workers),
+       Worker ! WorkItem,
+       loop(Workers, (N+1) rem length(Workers)) 
+  end.
+----
+
+This server will just keep on grabbing references to binaries and
+never free them, eventually using up all system memory.
+
+When one is aware of the problem it is easy to fix, one can either do
+a garbage_collect on each iteration of _loop_ or one could do it every
+five seconds or so by adding an after clause to the receive. (_after
+5000 -> garbage_collect(), loop(Workers, N)_ ).
+
+===== Sub Binaries and Matching
+
+When you match out a part of a binary you get a sub binary.
+This sub binary will be a small structure just containing
+pointers into the real binary. This increases the reference
+count for the binary but uses very little extra space.
+
+If a match would create a new copy of the matched part of the binary
+it would cost both space and time. So in most cases just doing a
+pattern match on a binary and getting a sub binary to work on is just
+what you want.
+
+There are some degenerate cases, imagine for example that you load
+huge file like a book into memory and then you match out a small part
+like a chapter to work on. The problem is then that the whole of the
+rest of the book is still kept in memory until you are done with
+processing the chapter. If you do this for many books, perhaps you
+want to get the introduction of every book in your file system, then
+you will keep the whole of each book in memory and not just the
+introductory chapter. This might lead to huge memory usage.
+
+The solution in this case, when you know you only want one small
+part of a large binary and you want to have the small part hanging
+around for some time, is to use +binary:copy/1+. This function
+is only used for its side effect, which is to actually copy
+the sub binary out of the real binary removing the reference to
+the larger binary and therefore hopefully letting it be garbage
+collected.
+
+There is a pretty thorough explanation of how binary construction
+and matching is done in the Erlang documentation:
+link:http://www.erlang.org/doc/efficiency_guide/binaryhandling.html[].
+
+
+==== Garbage Collection
+
+++++
+<!--
+This part of the content seems to be good, and probably worthy of being a top-level heading. It might be a bit long, though. - bmacdonald
+-->
+++++
+
+
+When a process runs out of space on the stack and heap the process
+will try to reclaim space by doing a minor garbage collection.  The
+code for this can be found in
+link:https://github.com/erlang/otp/blob/maint/erts/emulator/beam/erl_gc.c[erl_gc.c].
+
+
+ERTS uses a generational copying garbage collector. A copying
+collector means that during garbage collection all live young terms
+are copied from the old heap to a new heap. Then the old heap is
+discarded. A generational collector works on the principle that
+most terms die young, they are temporary terms created, used,
+and thrown away. Older terms are promoted to the old generation
+which is collected more seldom, with the rational that once
+a term has become old it will probably live for a long time.
+
+Conceptually a garbage collection cycle works as follows:
+
+* First you collect all roots (e.g. the stack).
+* Then for each root, if the root points to a heap allocated object
+which doesn't have a forwarding pointer you copy the object to the new
+heap. For each copied object update the original with a forwarding
+pointer to the new copy.
+* Now go through the new heap and do the same as for the roots.
+
+We will go through an example to see how this is done in
+detail. We will go through a minor collection without an
+old generation, and we will only use the stack as the root set.
+In reality the process dictionary, trace data and probe data
+among other things are also included in the rootset.
+
+Let us look at how the call to garbage_collect in the gc_example
+behaves. The code will generate a string which is shared by two
+elements of a cons and a tuple, the tuple will the be eliminated
+resulting in garbage. After the GC there should only be one string on
+the heap. That is, first we generate the term 
++{["Hello","Hello"], "Hello"}+ (sharing the same string "Hello" in 
+all instances. Then we just keep the term +["Hello","Hello"]+ when
+triggering a gc.
+
+NOTE: We will take the opportunity to go through how you, on a
+linux system, can used gdb to examine the behavior of ERTS.
+You can of course use the debugger of your choice. If you already know
+how to use gdb or if you have no interest in going into the debugger
+you can just ignore the meta text about how to inspect the system and
+just look at the diagrams and the explanations of how the GC works.
+
+
+[source,erlang]
+----
+include::code/memory_chapter/src/gc_example.erl[]
+----
+
+After compiling the example I start an erlang shell, test the call
+and prepare for a new call to the example (without hitting return):
+
+----
+1> gc_example:example().
+["Hello","Hello"]
+2> spawn(gc_example,example,[]).
+----
+
+Then I use gdb to attach to my erlang node (os PID: 2955 in this case)
+----
+$ gdb /home/happi/otp/lib/erlang/erts-6.0/bin/beam.smp 2955
+----
+
+
+NOTE: Depending on your settings for ptrace_scope you might have to
+precede the gdb invocation with 'sudo'.
+
+Then in gdb I set a breakpoint at the start of the main GC function and
+let the node continue:
+
+----
+(gdb) break garbage_collect_0
+(gdb) cont
+Continuing.
+----
+
+Now I hit enter in the Erlang shell and execution stops at the breakpoint:
+
+----
+Breakpoint 1, garbage_collect_0 (A__p=0x7f673d085f88, BIF__ARGS=0x7f673da90340) at beam/bif.c:3771
+3771	    FLAGS(BIF_P) |= F_NEED_FULLSWEEP;
+----
+
+Now we can inspect the PCB of the process:
+
+----
+(gdb) p *(Process *) A__p
+$1 = {common = {id = 1408749273747, refc = {counter = 1}, tracer_proc = 18446744073709551611, trace_flags = 0, u = {alive = {
+        started_interval = 0, reg = 0x0, links = 0x0, monitors = 0x0, ptimer = 0x0}, release = {later = 0, func = 0x0, data = 0x0, 
+        next = 0x0}}}, htop = 0x7f6737145950, stop = 0x7f6737146000, heap = 0x7f67371458c8, hend = 0x7f6737146010, heap_sz = 233, 
+  min_heap_size = 233, min_vheap_size = 46422, fp_exception = 0, hipe = {nsp = 0x0, nstack = 0x0, nstend = 0x0, ncallee = 0x7f673d080000, 
+    closure = 0, nstgraylim = 0x0, nstblacklim = 0x0, ngra = 0x0, ncsp = 0x7f673d0863e8, narity = 0, float_result = 0}, arity = 0, 
+  arg_reg = 0x7f673d086080, max_arg_reg = 6, def_arg_reg = {393227, 457419, 18446744073709551611, 233, 46422, 2000}, cp = 0x7f673686ac40, 
+  i = 0x7f673be17748, catches = 0, fcalls = 1994, rcount = 0, schedule_count = 0, reds = 0, group_leader = 893353197987, flags = 0, 
+  fvalue = 18446744073709551611, freason = 0, ftrace = 18446744073709551611, next = 0x7f673d084cc0, nodes_monitors = 0x0, 
+  suspend_monitors = 0x0, msg = {first = 0x0, last = 0x7f673d086120, save = 0x7f673d086120, len = 0, mark = 0x0, saved_last = 0x7d0}, u = {
+    bif_timers = 0x0, terminate = 0x0}, dictionary = 0x0, seq_trace_clock = 0, seq_trace_lastcnt = 0, 
+  seq_trace_token = 18446744073709551611, initial = {393227, 457419, 0}, current = 0x7f673be17730, parent = 1133871366675, 
+  approx_started = 1407857804, high_water = 0x7f67371458c8, old_hend = 0x0, old_htop = 0x0, old_heap = 0x0, gen_gcs = 0, 
+  max_gen_gcs = 65535, off_heap = {first = 0x0, overhead = 0}, mbuf = 0x0, mbuf_sz = 0, psd = 0x0, bin_vheap_sz = 46422, 
+  bin_vheap_mature = 0, bin_old_vheap_sz = 46422, bin_old_vheap = 0, sys_task_qs = 0x0, state = {counter = 41002}, msg_inq = {first = 0x0, 
+    last = 0x7f673d086228, len = 0}, pending_exit = {reason = 0, bp = 0x0}, lock = {flags = {counter = 1}, queue = {0x0, 0x0, 0x0, 0x0}, 
+    refc = {counter = 1}}, scheduler_data = 0x7f673bd6c080, suspendee = 18446744073709551611, pending_suspenders = 0x0, run_queue = {
+    counter = 140081362118912}, hipe_smp = {have_receive_locks = 0}}
+----
+
+Wow, that was a lot of information. The interesting part is about the stack and the heap:
+
+----
+hend = 0x7f6737146010,
+stop = 0x7f6737146000,
+htop = 0x7f6737145950,
+heap = 0x7f67371458c8,
+----
+
+By using some helper scripts we can inspect the stack and the heap in a meaningful
+way. (see xref:AP-listings[] for the definitions of the scripts in gdb_script.)
+
+----
+(gdb) source gdb_scripts 
+(gdb) print_p_stack A__p
+0x00007f6737146008 [0x00007f6737145929] cons -> 0x00007f6737145928
+(gdb) print_p_heap A__p
+0x00007f6737145948 [0x00007f6737145909] cons -> 0x00007f6737145908
+0x00007f6737145940 [0x00007f6737145929] cons -> 0x00007f6737145928
+0x00007f6737145938 [0x0000000000000080] Tuple size 2
+0x00007f6737145930 [0x00007f6737145919] cons -> 0x00007f6737145918
+0x00007f6737145928 [0x00007f6737145909] cons -> 0x00007f6737145908
+0x00007f6737145920 [0xfffffffffffffffb] NIL
+0x00007f6737145918 [0x00007f6737145909] cons -> 0x00007f6737145908
+0x00007f6737145910 [0x00007f67371458f9] cons -> 0x00007f67371458f8
+0x00007f6737145908 [0x000000000000048f] 72
+0x00007f6737145900 [0x00007f67371458e9] cons -> 0x00007f67371458e8
+0x00007f67371458f8 [0x000000000000065f] 101
+0x00007f67371458f0 [0x00007f67371458d9] cons -> 0x00007f67371458d8
+0x00007f67371458e8 [0x00000000000006cf] 108
+0x00007f67371458e0 [0x00007f67371458c9] cons -> 0x00007f67371458c8
+0x00007f67371458d8 [0x00000000000006cf] 108
+0x00007f67371458d0 [0xfffffffffffffffb] NIL
+0x00007f67371458c8 [0x00000000000006ff] 111
+----
+
+Here we can see the heap of the process after it has allocated the
+list "Hello" on the heap and the cons containing that list twice, and
+the tuple containing the cons and the list.  The _root set_, in this
+case the stack, contains a pointer to the cons containing two copies
+of the list.  The tuple is dead, that is, there are no references to
+it.
+
+The garbage collection starts by calculating the root set and by
+allocating a new heap (_to space_). By stepping into the gc code in the
+debugger you can see how this is done. I will not go through the
+details here.  After a number of steps the execution will reach the
+point where all terms in the root set are copied to the new heap. This
+starts around (depending on version) line 1272 with a +while+ loop in
+erl_gc.c.
+
+In our case the root is a cons pointing to address 0x00007f95666597f0
+containing the letter (integer) 'H'. When a cons cell is moved from
+the current heap, called _from space_, to _to space_ the value in the
+head (or car) is overwritten with a _moved cons_ tag (the value 0).
+
+After the first step where the root set is moved, the from space
+and the to space looks like this:
+
+from space:
+
+----
+(gdb) print_p_heap p
+0x00007f6737145948 [0x00007f6737145909] cons -> 0x00007f6737145908
+0x00007f6737145940 [0x00007f6737145929] cons -> 0x00007f6737145928
+0x00007f6737145938 [0x0000000000000080] Tuple size 2
+0x00007f6737145930 [0x00007f67371445b1] cons -> 0x00007f67371445b0
+0x00007f6737145928 [0x0000000000000000] Tuple size 0
+0x00007f6737145920 [0xfffffffffffffffb] NIL
+0x00007f6737145918 [0x00007f6737145909] cons -> 0x00007f6737145908
+0x00007f6737145910 [0x00007f67371458f9] cons -> 0x00007f67371458f8
+0x00007f6737145908 [0x000000000000048f] 72
+0x00007f6737145900 [0x00007f67371458e9] cons -> 0x00007f67371458e8
+0x00007f67371458f8 [0x000000000000065f] 101
+0x00007f67371458f0 [0x00007f67371458d9] cons -> 0x00007f67371458d8
+0x00007f67371458e8 [0x00000000000006cf] 108
+0x00007f67371458e0 [0x00007f67371458c9] cons -> 0x00007f67371458c8
+0x00007f67371458d8 [0x00000000000006cf] 108
+0x00007f67371458d0 [0xfffffffffffffffb] NIL
+0x00007f67371458c8 [0x00000000000006ff] 111
+----
+
+to space:
+
+----
+(gdb) print_heap n_htop-1 n_htop-2
+0x00007f67371445b8 [0x00007f6737145919] cons -> 0x00007f6737145918
+0x00007f67371445b0 [0x00007f6737145909] cons -> 0x00007f6737145908
+
+----
+
+In from space the head of the first cons cell has been overwritten
+with 0 (looks like a tuple of size 0) and the tail has been overwritten
+with a forwarding pointer pointing to the new cons cell in the to space.
+In to space we now have the first cons cell with two
+backward pointers to the head and the tail of the cons in the from space.
+
+
+When the collector is done with the root set the to space contains
+backward pointers to all still live terms. At this point the collector
+starts sweeping the to space. It uses two pointers +n_hp+ pointing to
+the bottom of the unseen heap and +n_htop+ pointing to the top of the heap.
+
+----
+n_htop:
+        0x00007f67371445b8 [0x00007f6737145919] cons -> 0x00007f6737145918
+n_hp    0x00007f67371445b0 [0x00007f6737145909] cons -> 0x00007f6737145908
+----
+
+
+The GC will then look at the value pointed to by +n_hp+, in this case a
+cons pointing back to the from space. So it moves that cons to the to
+space, incrementing n_htop to make room for the new cons, and
+incrementing +n_hp+ to indicate that the first cons is seen.
+
+----
+from space:
+
+0x00007f6737145948 [0x00007f6737145909] cons -> 0x00007f6737145908
+0x00007f6737145940 [0x00007f6737145929] cons -> 0x00007f6737145928
+0x00007f6737145938 [0x0000000000000080] Tuple size 2
+0x00007f6737145930 [0x00007f67371445b1] cons -> 0x00007f67371445b0
+0x00007f6737145928 [0x0000000000000000] Tuple size 0
+0x00007f6737145920 [0xfffffffffffffffb] NIL
+0x00007f6737145918 [0x00007f6737145909] cons -> 0x00007f6737145908
+0x00007f6737145910 [0x00007f67371445c1] cons -> 0x00007f67371445c0
+0x00007f6737145908 [0x0000000000000000] Tuple size 0
+0x00007f6737145900 [0x00007f67371458e9] cons -> 0x00007f67371458e8
+0x00007f67371458f8 [0x000000000000065f] 101
+0x00007f67371458f0 [0x00007f67371458d9] cons -> 0x00007f67371458d8
+0x00007f67371458e8 [0x00000000000006cf] 108
+0x00007f67371458e0 [0x00007f67371458c9] cons -> 0x00007f67371458c8
+0x00007f67371458d8 [0x00000000000006cf] 108
+0x00007f67371458d0 [0xfffffffffffffffb] NIL
+0x00007f67371458c8 [0x00000000000006ff] 111
+
+to space:
+
+n_htop:
+        0x00007f67371445c8 [0x00007f67371458f9] cons -> 0x00007f67371458f8
+        0x00007f67371445c0 [0x000000000000048f] 72
+n_hp    0x00007f67371445b8 [0x00007f6737145919] cons -> 0x00007f6737145918
+SEEN    0x00007f67371445b0 [0x00007f67371445c1] cons -> 0x00007f67371445c0
+----
+
+The same thing then happens with the second cons.
+
+----
+from space:
+
+0x00007f6737145948 [0x00007f6737145909] cons -> 0x00007f6737145908
+0x00007f6737145940 [0x00007f6737145929] cons -> 0x00007f6737145928
+0x00007f6737145938 [0x0000000000000080] Tuple size 2
+0x00007f6737145930 [0x00007f67371445b1] cons -> 0x00007f67371445b0
+0x00007f6737145928 [0x0000000000000000] Tuple size 0
+0x00007f6737145920 [0x00007f67371445d1] cons -> 0x00007f67371445d0
+0x00007f6737145918 [0x0000000000000000] Tuple size 0
+0x00007f6737145910 [0x00007f67371445c1] cons -> 0x00007f67371445c0
+0x00007f6737145908 [0x0000000000000000] Tuple size 0
+0x00007f6737145900 [0x00007f67371458e9] cons -> 0x00007f67371458e8
+0x00007f67371458f8 [0x000000000000065f] 101
+0x00007f67371458f0 [0x00007f67371458d9] cons -> 0x00007f67371458d8
+0x00007f67371458e8 [0x00000000000006cf] 108
+0x00007f67371458e0 [0x00007f67371458c9] cons -> 0x00007f67371458c8
+0x00007f67371458d8 [0x00000000000006cf] 108
+0x00007f67371458d0 [0xfffffffffffffffb] NIL
+0x00007f67371458c8 [0x00000000000006ff] 111
+
+to space:
+
+n_htop:
+        0x00007f67371445d8 [0xfffffffffffffffb] NIL
+        0x00007f67371445d0 [0x00007f6737145909] cons -> 0x00007f6737145908
+        0x00007f67371445c8 [0x00007f67371458f9] cons -> 0x00007f67371458f8
+n_hp    0x00007f67371445c0 [0x000000000000048f] 72
+SEEN    0x00007f67371445b8 [0x00007f6737145919] cons -> 0x00007f67371445d0
+SEEN    0x00007f67371445b0 [0x00007f67371445c1] cons -> 0x00007f67371445c0
+----
+
+The next element in to space is the immediate 72, which is only
+stepped over (with +n_hp+++). Then there is another cons which is moved.
+
+The same thing then happens with the second cons.
+
+----
+from space:
+
+0x00007f6737145948 [0x00007f6737145909] cons -> 0x00007f6737145908
+0x00007f6737145940 [0x00007f6737145929] cons -> 0x00007f6737145928
+0x00007f6737145938 [0x0000000000000080] Tuple size 2
+0x00007f6737145930 [0x00007f67371445b1] cons -> 0x00007f67371445b0
+0x00007f6737145928 [0x0000000000000000] Tuple size 0
+0x00007f6737145920 [0x00007f67371445d1] cons -> 0x00007f67371445d0
+0x00007f6737145918 [0x0000000000000000] Tuple size 0
+0x00007f6737145910 [0x00007f67371445c1] cons -> 0x00007f67371445c0
+0x00007f6737145908 [0x0000000000000000] Tuple size 0
+0x00007f6737145900 [0x00007f67371445e1] cons -> 0x00007f67371445e0
+0x00007f67371458f8 [0x0000000000000000] Tuple size 0
+0x00007f67371458f0 [0x00007f67371458d9] cons -> 0x00007f67371458d8
+0x00007f67371458e8 [0x00000000000006cf] 108
+0x00007f67371458e0 [0x00007f67371458c9] cons -> 0x00007f67371458c8
+0x00007f67371458d8 [0x00000000000006cf] 108
+0x00007f67371458d0 [0xfffffffffffffffb] NIL
+0x00007f67371458c8 [0x00000000000006ff] 111
+
+to space:
+
+n_htop:
+        0x00007f67371445e8 [0x00007f67371458e9] cons -> 0x00007f67371458e8
+        0x00007f67371445e0 [0x000000000000065f] 101
+        0x00007f67371445d8 [0xfffffffffffffffb] NIL
+n_hp    0x00007f67371445d0 [0x00007f6737145909] cons -> 0x00007f6737145908
+SEEN    0x00007f67371445c8 [0x00007f67371458f9] cons -> 0x00007f67371445e0
+SEEN    0x00007f67371445c0 [0x000000000000048f] 72
+SEEN    0x00007f67371445b8 [0x00007f6737145919] cons -> 0x00007f67371445d0
+SEEN    0x00007f67371445b0 [0x00007f67371445c1] cons -> 0x00007f67371445c0
+----
+
+Now we come to a cons that points to a cell that has already been moved.
+The GC sees the IS_MOVED_CONS tag at 0x00007f6737145908 and copies the
+destination of the moved cell from the tail (+*n_hp++ = ptr[1];+). This
+way sharing is preserved during GC. This step does not affect from space,
+but the backward pointer in to space is rewritten.
+
+----
+to space:
+
+n_htop:
+        0x00007f67371445e8 [0x00007f67371458e9] cons -> 0x00007f67371458e8
+        0x00007f67371445e0 [0x000000000000065f] 101
+n_hp    0x00007f67371445d8 [0xfffffffffffffffb] NIL
+SEEN    0x00007f67371445d0 [0x00007f67371445c1] cons -> 0x00007f67371445c0
+SEEN    0x00007f67371445c8 [0x00007f67371458f9] cons -> 0x00007f67371445e0
+SEEN    0x00007f67371445c0 [0x000000000000048f] 72
+SEEN    0x00007f67371445b8 [0x00007f6737145919] cons -> 0x00007f67371445d0
+SEEN    0x00007f67371445b0 [0x00007f67371445c1] cons -> 0x00007f67371445c0
+----
+
+Then the rest of the list (the string) is moved.
+
+----
+from space:
+
+0x00007f6737145948 [0x00007f6737145909] cons -> 0x00007f6737145908
+0x00007f6737145940 [0x00007f6737145929] cons -> 0x00007f6737145928
+0x00007f6737145938 [0x0000000000000080] Tuple size 2
+0x00007f6737145930 [0x00007f67371445b1] cons -> 0x00007f67371445b0
+0x00007f6737145928 [0x0000000000000000] Tuple size 0
+0x00007f6737145920 [0x00007f67371445d1] cons -> 0x00007f67371445d0
+0x00007f6737145918 [0x0000000000000000] Tuple size 0
+0x00007f6737145910 [0x00007f67371445c1] cons -> 0x00007f67371445c0
+0x00007f6737145908 [0x0000000000000000] Tuple size 0
+0x00007f6737145900 [0x00007f67371445e1] cons -> 0x00007f67371445e0
+0x00007f67371458f8 [0x0000000000000000] Tuple size 0
+0x00007f67371458f0 [0x00007f67371445f1] cons -> 0x00007f67371445f0
+0x00007f67371458e8 [0x0000000000000000] Tuple size 0
+0x00007f67371458e0 [0x00007f6737144601] cons -> 0x00007f6737144600
+0x00007f67371458d8 [0x0000000000000000] Tuple size 0
+0x00007f67371458d0 [0x00007f6737144611] cons -> 0x00007f6737144610
+0x00007f67371458c8 [0x0000000000000000] Tuple size 0
+
+to space:
+
+n_htop:
+n_hp
+SEEN    0x00007f6737144618 [0xfffffffffffffffb] NIL
+SEEN    0x00007f6737144610 [0x00000000000006ff] 111
+SEEN    0x00007f6737144608 [0x00007f6737144611] cons -> 0x00007f6737144610
+SEEN    0x00007f6737144600 [0x00000000000006cf] 108
+SEEN    0x00007f67371445f8 [0x00007f6737144601] cons -> 0x00007f6737144600
+SEEN    0x00007f67371445f0 [0x00000000000006cf] 108
+SEEN    0x00007f67371445e8 [0x00007f67371445f1] cons -> 0x00007f67371445f0
+SEEN    0x00007f67371445e0 [0x000000000000065f] 101
+SEEN    0x00007f67371445d8 [0xfffffffffffffffb] NIL
+SEEN    0x00007f67371445d0 [0x00007f67371445c1] cons -> 0x00007f67371445c0
+SEEN    0x00007f67371445c8 [0x00007f67371445e1] cons -> 0x00007f67371445e0
+SEEN    0x00007f67371445c0 [0x000000000000048f] 72
+SEEN    0x00007f67371445b8 [0x00007f67371445d1] cons -> 0x00007f67371445d0
+SEEN    0x00007f67371445b0 [0x00007f67371445c1] cons -> 0x00007f67371445c0
+----
+
+There are some things to note from this example. When terms are
+created in Erlang they are created bottom up, starting with the
+elements. The garbage collector works top down, starting with the
+top level structure and then copying the elements. This means that
+the direction of the pointers change after the first GC. This has
+no real implications but it is good to know when looking at actual
+heaps. You can not assume that structures should be bottom up.
+
+Also note that the GC does a breath first traversal. This means that
+locality for one term most often is worse after a GC. With the size of
+modern cashes this should not be a problem. You could of course create
+a pathological example where it becomes a problem, but you can also
+create a pathological example where a depth first approach would cause
+problems. 
+
+The third thing to note is that sharing is preserved which is really
+important otherwise we might end up using more space after a GC than
+before.
+
+
+
+
+
+
+Generations...
+
+
+----
+  hend ->  +----+
+           |....|
+  stop ->  |    |
+           |    |    +----+ old_hend
+           |    |    |    |
+  htop ->  |    |    |    |
+           |....|    |    | old_htop
+           |....|    |....|
+  heap ->  +----+    +----+ old_heap
+          The Heap   Old Heap
+----
+
+
+ +high_water, old_hend, old_htop, old_heap,
+ gen_gcs, max_gen_gcs, off_heap,  mbuf, mbuf_sz, psd, bin_vheap_sz,
+ bin_vheap_mature, bin_old_vheap_sz, bin_old_vheap+.
+
+
+
+//  ==== TODO
+//
+// Growth of the stack and heap, Shrinking. No Stack overflow? 
+//
+// Random thoughts:
+//
+// Erlang has no updates - there can be no cycles: use reference count.
+//
+// Erlang terms are small.
+//
+// The HiPE group did some measures:
+// 75% cons cells
+// 24% !cons but smaller than 8 words
+//  1% >= 8 words
+//
+// Less fragmentation & better locality with copying collector
+//
+// Advantages with 1 heap/process:
+// + Free reclamation when a process dies
+// + Small root set
+// + Improved cache locallity
+// + Cheap stack/heap test
+//
+// Disadvantages with 1 heap/process:
+// - Message passing is expensive
+// - Uses more space (fragmentation)
+//
+
+//  The garbage collector, generations, full sweep. 
+
+
+// Put in pat II:
+//  Getting information about stacks,
+//  heaps and the GC. 
+//  Tweaking stack and heap
+//  parameters. 
+
+// Hibernation. 
+
+=== Other interesting memory areas
+
+==== The atom table.
+TODO
+==== Code
+TODO
+==== Constants
+TODO
+
+
diff --git a/preface.asciidoc b/preface.asciidoc
index 73c92d7..0670115 100644
--- a/preface.asciidoc
+++ b/preface.asciidoc
@@ -4,7 +4,7 @@ Preface
 
 
 
-This book is unfortunately not about how to write correct and
+This book is not about how to write correct and
 beautiful code, I am assuming that you already know how to do
 that. This book isn’t really about profiling and performance tuning
 either. Although, there is a chapter in this book on tracing and
@@ -31,7 +31,7 @@ If you want to debug the VM If you want to extend the VM If you want
 to do performance tweaking--jump to the last chapter … but to really
 understand that chapter you need to read the book.
 
-## How to read this book
+=== How to read this book
 
 The Erlang RunTime System (ERTS) is a complex system with many
 interdependent components. It is written in a very portable way so
diff --git a/type_system.asciidoc b/type_system.asciidoc
new file mode 100755
index 0000000..6e7c922
--- /dev/null
+++ b/type_system.asciidoc
@@ -0,0 +1,354 @@
+[[CH-TypeSystem]]
+== The Erlang Type System and Tags
+
+One of the most important aspects of ERTS to understand is how ERTS
+stores data, that is, how Erlang terms are stored in memory. This
+gives you the basis for understanding how garbage collection works,
+how message passing works, and gives you an insight into how much
+memory is needed.
+
+In this chapter you will learn the basic data types of Erlang and
+how they are implemented in ERTS. This knowledge will be essential
+in understanding the chapter on memory allocation and garbage
+collection, see xref:CH-Memory[].
+
+=== The Erlang Type System
+
+Erlang is _strongly typed_. That is, there is no way to coerce one
+type into another type, you can only convert from one type to another.
+Compare this to e.g. C where you can coerce a _char_ to an _int_ or
+any type pointed to by a pointer to (_void *_).
+
+The Erlang type lattice is quite flat, there are only a few real sub
+types, numbers have the sub types integer and float, and list has the
+subtypes nil and cons. (One could also argue that tuple has one
+subtype for each size.)
+
+The Erlang Type Lattice 
+----
+
+                                                  any()
+            /               /         /       /    |     \     \       \         \         \
+           /               /         /       /     |      \     \       \         \         \
+       number()         atom() reference() fun() port() pid() tuple()    map()   list()   binary()
+       /    \             \         \        \     |     /      /       /        /  \       /
+  integer() float()        \         \        \    |    /      /       /      nil() cons() /
+      \      \              \         \        \   |   /      /       /        /     /    /
+                                                  none()
+
+----
+
+There is a partial order (< and >) on all terms in Erlang where the
+types are ordered from left to right in the above lattice.
+
+The order is partial and not total since integers and floats
+are converted before comparison. Both (1 < 1.0) and (1.0 < 1) are
+false, and (1 =< 1.0 and 1 >= 1.0) and (1 =/= 1.0). The number with
+the lesser precision is converted to the number with higher precision.
+Usually integers are converted to floats. For very large or small
+floats the float is converted to an integer. This happens if all
+significant digits are to the left of the decimal point.
+
+Since Erlang 18, when two maps are compared for order they are
+compared as follows: If one map has fewer elements than the other it
+is considered smaller.  Otherwise the keys are compared in term order,
+where all integers are considered smaller than all floats. If all the
+keys are the same then each value pair (in key order) is compared
+arithmetically, i.e. by first converting them to the same precision.
+
+The same is true when comparing for equality, thus #{1 =&gt; 1.0} == #{1 =&gt; 1} but #{1.0 =&gt; 1} /= #{1 =&gt; 1}.
+
+In Erlang versions prior to 18 keys where also compared
+arithmetically.
+
+Erlang is dynamically typed. That is, types will be checked at
+runtime and if a type error occurs an exception is thrown. The
+compiler does not check the types at compile time, unlike in a
+statically typed language like C or Java where you can get a
+type error during compilation.
+
+These aspects of the Erlang type system, strongly statically typed
+with an order on the types puts some constraints on the implementation
+of the language. In order to be able to check and compare types at
+runtime each Erlang term has to carry its type with it.
+
+This is solved by _tagging_ the terms.
+
+=== The Tagging Scheme
+
+In the memory representation of an Erlang term a few bits are reserved
+for a type tag. For performance reasons the terms are divided into
+_immediates_ and _boxed_ terms.  An immediate term can fit into a
+machine word, that is, in a register or on a stack slot. A boxed term
+consists of two parts: a tagged pointer and a number of words stored
+on the process heap. The _boxes_ stored on the heap have a header and
+a body, unless it is a list.
+
+Currently ERTS uses a staged tag scheme, the history and reasoning
+behind the this scheme is explained in a technical report from the
+HiPE group. (See
+link:http://www.it.uu.se/research/publications/reports/2000-029/)
+The tagging scheme is implemented in +erl_term.h+.
+
+The basic idea is to use the least significant bits for tags. Since
+most modern CPU architectures aligns 32- and 64-bit words, there are at
+least two bits that are "unused" for pointers. These bits can be
+used as tags instead. Unfortunately those two bits are not enough
+for all the types in Erlang, more bits are therefore used as needed.
+
+==== Tags for Immediates
+
+The first two bits (the primary tag) are used as follows:
+
+----
+  00 Header (on heap) CP (on stack)
+  01 List (cons)
+  10 Boxed
+  11 Immediate
+----
+
+The header tag is only used on the heap for header words, more on that later.
+On the stack 00 indicates a return address.
+The list tag is used for cons cells, and the boxed tag is used for all other
+pointers to the heap. The immediate tag is used further divided like this:
+
+----
+ 00 11 Pid
+ 01 11 Port
+ 10 11 Immediate 2
+ 11 11 Small integer
+----
+
+Pid and ports are immediates and can be compared for equality
+efficiently. They are of course in reality just references, a pid
+is a process identifier and it points to a process. The process does
+not reside on the heap of any process but is handled by the PCB.
+A port works in much the same way.
+
+//  (MORE ON THIS REF!)
+
+There are two types of integers in ERTS, small integers and
+bignums. Small integers fits in one machine word minus four tag bits,
+i.e. in 28 or 60 bits for 32 and 64 bits system respectively. Bignums
+on the other hand can be as large as needed (only limited by the heap
+space) and are stored on the heap, as boxed objects.
+
+By having all four tag bits as ones for small integers the emulator
+can make an efficient test when doing integer arithmetic to see if
+both arguments are immediates. (+is_both_small(x,y)+ is defined as 
++(x & y & 1111) == 1111+).
+
+The Immediate 2 tag is further divided like this:
+
+----
+ 00 10 11 Atom
+ 01 10 11 Catch
+ 10 10 11   [UNUSED]
+ 11 10 11 Nil
+----
+
+Atoms are made up of an index in the _atom table_ and the atom tag.
+Two atom immediates can be compared for equality by just comparing
+their immediate representation.
+
+In the atom table atoms are stored as C structs like this:
+
+----
+typedef struct atom {
+    IndexSlot slot;  /* MUST BE LOCATED AT TOP OF STRUCT!!! */
+    int len;         /* length of atom name */
+    int ord0;        /* ordinal value of first 3 bytes + 7 bits */
+    byte* name;      /* name of atom */
+} Atom;
+----
+
+Thanks to the +len+ and the +ord0+ fields the order of two atoms can
+be compared efficiently as long as they don't start with the same four
+letters.
+
+****
+
+NOTE: If you for some reason generate atoms with a pattern like name
+followed by a number and then store them in an ordered list or ordered
+tree the atom comparison will be more expensive if they all have the
+same first letters (e.g. foo_1, foo_2, etc.).
+
+Not that you should ever generate atom names, since the atom table is
+limited. I'm just saying, there is an evil micro optimization to be
+found here.
+
+You would of course never do this, but if you find code that generates
+atom with a number followed by a postfix name, now you know what the
+author of that code might have been thinking.
+
+****
+
+The Catch immediate is only used on the stack. It contains an indirect
+pointer to the continuation point in the code where execution should
+continue after an exception. More on this in xref:CH-Calls[].
+
+The Nil tag is used for the empty list (nil or +[]+). The rest of the
+word is filled with ones.
+
+
+==== Tags for Boxed Terms
+
+Erlang terms stored on the heap uses several machine words. Lists, or
+cons cells, are just two consecutive words on the heap. The head and
+the tail (or car and cdr as they are called in lisp and some places in
+the ERTS code).
+
+A string in Erlang is just a list of integers representing
+characters. In releases prior to Erlang OTP R14 strings have been
+encoded as ISO-latin-1 (ISO8859-1). Since R14 strings are encoded as
+lists of Unicode code points. For strings in latin-1 there is no
+difference since latin-1 is a subset of Unicode.
+// Describe Unicode code points better. Is the subset thing true?
+
+The string "Hello" might look like this in memory:
+
+.Representation of the string "Hello" on a 32 bit machine.
+----
+
+ hend ->     +-------- -------- -------- --------+
+             |              ...                  |
+             |              ...                  |
+             |00000000 00000000 00000000 10000001| 128 + list tag  ---------------+
+ stop ->     |                                   |                                |
+                                                                                  |
+ htop ->     |                                   |                                |
+         132 |00000000 00000000 00000000 01111001| 120 + list tag  -------------- | -+
+         128 |00000000 00000000 00000110 10001111| (H) 104 bsl 4 + small int tag <+  |
+         124 |00000000 00000000 00000000 01110001| 112 + list tag  ----------------- | -+
+         120 |00000000 00000000 00000110 01011111| (e) 101 bsl 4 + small int tag <---+  | 
+         116 |00000000 00000000 00000000 01110001| 112 + list tag  -------------------- | -+
+         112 |00000000 00000000 00000110 11001111| (l) 108 bsl 4 + small int tag <------+  |
+         108 |00000000 00000000 00000000 01110001|  96 + list tag  ----------------------- | -+
+         104 |00000000 00000000 00000110 11001111| (l) 108 bsl 4 + small int tag <---------+  |
+         100 |11111111 11111111 11111111 11111011| NIL                                        |
+          96 |00000000 00000000 00000110 11111111| (o) 111 bsl 4 + small int tag <------------+
+             |                ...                |
+ heap ->     +-----------------------------------+
+
+----
+
+All other boxed terms start with a header word. The header word uses a
+four bit header tag and the primary header tag (00), it also has an
+arity which says how many words the boxed term uses.  On a 32-bit
+machine it looks like this: +aaaaaaaaaaaaaaaaaaaaaaaaaatttt00+.
+
+The tags are:
+
+----
+
+ 0000	ARITYVAL (Tuples)
+ 0001   BINARY_AGGREGATE                |
+ 001s	BIGNUM with sign bit		|
+ 0100	REF                             |
+ 0101	FUN                             | THINGS
+ 0110	FLONUM                          |
+ 0111   EXPORT                          |
+ 1000	REFC_BINARY     |               |
+ 1001	HEAP_BINARY     | BINARIES      |
+ 1010	SUB_BINARY      |               |
+ 1011     [UNUSED]
+ 1100   EXTERNAL_PID  |                 |
+ 1101   EXTERNAL_PORT | EXTERNAL THINGS |
+ 1110   EXTERNAL_REF  |                 |
+ 1111   MAP
+
+----
+
+Tuples are stored on the heap with just the arity and then each
+element in the following words. The empty tuple +{}+ is stored just as
+the word 0 (header tag 0, tuple tag 0000, and arity 0).
+
+.Representation of the tuple {104,101,108,108,111} on a 32 bit machine.
+----
+
+ hend ->     +-------- -------- -------- --------+
+             |              ...                  |
+             |              ...                  |
+             |00000000 00000000 00000000 10000010| 128 + boxed tag ---------------+
+ stop ->     |                                   |                                |
+                                                                                  |
+ htop ->     |                                   |                                |
+         150 |00000000 00000000 00000110 11111111| (o) 111 bsl 4 + small int tag  |
+         144 |00000000 00000000 00000110 11001111| (l) 108 bsl 4 + small int tag  |
+         140 |00000000 00000000 00000110 11001111| (l) 108 bsl 4 + small int tag  |
+         136 |00000000 00000000 00000110 01011111| (e) 101 bsl 4 + small int tag  | 
+         132 |00000000 00000000 00000110 10001111| (H) 104 bsl 4 + small int tag  |
+         128 |00000000 00000000 00000000 10100000| 5 bsl 6 + tuple & header tag <-+
+             |                ...                |
+ heap ->     +-----------------------------------+
+
+----
+
+A _binary_ is an imutable array of bytes.  There are four types of
+internal representations of _binaries_. The two types _heap binaries_
+and _refc binaries_ contains binary data.  The other two types, _sub
+binaries_ and _match contexts_ (the BINARY_AGREGATE tag) are smaller
+references into one of the other two types.
+
+Binaries that are 64 bytes or less can be stored directly on the
+process heap as _heap binaries_. Larger binaries are reference
+counted and the paylod is stored outside of the process heap, a
+reference to the payload is stoed on the process heap in an object
+called a _ProcBin_.
+
+// Todo: draw a picture of binaries and their tags.
+
+We will talk more about binaries in the xref:CH-Memory[].
+
+Integers that do not fit in a small integer (word size - 4 bits) are
+stored on the heap as "bignums" (or arbitrary precision integers). A
+bignum has a header word followed by a number of words encoding the
+bignum.  The sign part of the bignum tag (s) in the header encodes the
+sign of the number (s=0 for positive numbers, and s=1 for negative
+numbers).
+
+TODO: Describe bignum encoding. (And arithmetic ?)
+
+A reference is a _"unique"_ term often used to tag messages in order
+to basically implement a channel over a process mailbox. A references
+is implemented as an 82 bit counter. After 9671406556917033397649407
+calls to +make_ref+ the counter will wrap and start over with ref 0
+again. You need a really fast machine to do that many calls to
++make_ref+ within your lifetime. Unless you restart the node, in which
+case it also will start from 0 again, but then all the old local refs
+are gone. If you send the pid to another node it becomes an external
+ref, see below.
+
+On a 32-bit system a local ref takes up four 32-bit words on the
+heap. On a 64-bit system a ref takes up three 64-bit words on the
+heap.
+
+.Representation of a ref in a 32-bit (or half-word) system.
+----
+
+    |00000000 00000000 00000000 11010000| Arity 3 + ref tag 
+    |00000000 000000rr rrrrrrrr rrrrrrrr| Data0
+    |rrrrrrrr rrrrrrrr rrrrrrrr rrrrrrrr| Data1
+    |rrrrrrrr rrrrrrrr rrrrrrrr rrrrrrrr| Data2
+
+----
+
+The reference number is (Data2 bsl 50) + (Data1 bsl 18) + Data0.
+
+.Outline
+****
+
+*TODO*
+
+ The implementation of floats,  ports, pids. Strings as lists, IO lists,
+ lists on 64-bit machines. Binaries, sub binaries, and copying. Records.
+
+ Possibly: The half-word machine. Sharing and deep copy. (or this will be in GC)
+
+ Outro/conclusion
+****
+
+
+
+
+