diff --git a/.dir-locals.el b/.dir-locals.el index c4e1e5c7..74fde018 100644 --- a/.dir-locals.el +++ b/.dir-locals.el @@ -1,4 +1,6 @@ ;;; Directory Local Variables ;;; For more information see (info "(emacs) Directory Variables") - -((dired-mode . ((c-basic-offset . 4)))) +( + (dired-mode . ((c-basic-offset . 4))) + (java-mode . ((c-basic-offset . 4))) +) diff --git a/README.md b/README.md index 59a342b1..3424c5d9 100644 --- a/README.md +++ b/README.md @@ -34,16 +34,17 @@ The simple language will be a small subset of C. * [Chapter 4](chapter04/README.md): A non-constant external variable input named `arg`. Binary and Comparison operators involving constants and `arg`. Non-zero values will be truthy. Peephole optimizations involving algebraic simplifications. * [Chapter 5](chapter05/README.md): `if` statement. CFG construction. * [Chapter 6](chapter06/README.md): Peephole optimization around dead control flow. -* Chapter 7: `while` statement. -* Chapter 8: Global Value Numbering. -* Chapter 9: Functions and calls. -* Chapter 10: Boolean operators `&&`, `||` and `!` including short circuit. -* Chapter 11: Memory effects: general memory edges in SSA. Peephole optimization around +* [Chapter 7](chapter07/README.md): `while` statement. Looping construct - eager phi approach. +* Chapter 8: Looping construct continued, lazy phi creation, `break` and `continue` statements. +* Chapter 9: Global Value Numbering. +* Chapter 10: Functions and calls. +* Chapter 11: Boolean operators `&&`, `||` and `!` including short circuit. +* Chapter 12: Memory effects: general memory edges in SSA. Peephole optimization around load-after-store/store-after-store. -* Chapter 12: Equivalence class aliasing, fine grained peephole optimizations. Free ptr-to analysis in SoN; but does not +* Chapter 13: Equivalence class aliasing, fine grained peephole optimizations. Free ptr-to analysis in SoN; but does not handle aliasing in arrays. -* Chapter 13: One dimensional static length array type. Array load/store. -* Chapter 14: Global Code Motion - unwind SoN to CFG. Scheduling. -* Chapter 15: Instruction selection, BURS. -* Chapter 16: Backend register allocation. -* Chapter 17: Garbage Collection. \ No newline at end of file +* Chapter 14: One dimensional static length array type. Array load/store. +* Chapter 15: Global Code Motion - unwind SoN to CFG. Scheduling. +* Chapter 16: Instruction selection, BURS. +* Chapter 17: Backend register allocation. +* Chapter 18: Garbage Collection. \ No newline at end of file diff --git a/chapter07/Makefile b/chapter07/Makefile new file mode 100644 index 00000000..56f69e12 --- /dev/null +++ b/chapter07/Makefile @@ -0,0 +1,92 @@ +SHELL := /bin/bash +.DELETE_ON_ERROR: + +# for printing variable values +# usage: make print-VARIABLE +# > VARIABLE = value_of_variable +print-% : ; @echo $* = $($*) + +# literal space +space := $() $() + +# Decide OS-specific questions +# jar-file seperator +ifeq ($(OS),Windows_NT) + SEP = ; +else + SEP = : +endif +# Find a reasonable ctags. +CTAGS = $(shell which ctags) +# Hack for MacOS: /usr/bin/ctags is unfriendly, so look for ctags from brew +ifeq ($(UNAME),Darwin) + CTAGS = $(shell brew list ctags 2> /dev/null | grep bin/ctags) +endif + +# Fun Args to javac. Mostly limit to java8 source definitions, and fairly +# aggressive lint warnings. +JAVAC_ARGS = -g + +# Source code +SIMPLE := com/seaofnodes/simple +SRC := src/main/java +TST := src/test/java +CLZDIR:= build/classes +main_javas := $(wildcard $(SRC)/$(SIMPLE)/*java $(SRC)/$(SIMPLE)/*/*java) +test_javas := $(wildcard $(TST)/$(SIMPLE)/*java $(TST)/$(SIMPLE)/*/*java) +main_classes := $(patsubst $(SRC)/%java,$(CLZDIR)/main/%class,$(main_javas)) +test_classes := $(patsubst $(TST)/%java,$(CLZDIR)/test/%class,$(test_javas)) +test_cp := $(patsubst $(TST)/$(SIMPLE)/%.java,com.seaofnodes.simple.%,$(test_javas)) +classes = $(main_classes) $(test_classes) +# All the libraries +libs = $(wildcard lib/*jar) +jars = $(subst $(space),$(SEP),$(libs)) + + +default_targets := $(main_classes) $(test_classes) +# Optionally add ctags to the default target if a reasonable one was found. +ifneq ($(CTAGS),) +default_targets += tags +endif + +default: $(default_targets) + +# Compile just the out-of-date files +$(main_classes): build/classes/main/%class: $(SRC)/%java + @echo "compiling " $@ " because " $? + @[ -d $(CLZDIR)/main ] || mkdir -p $(CLZDIR)/main + @javac $(JAVAC_ARGS) -cp "$(CLZDIR)/main$(SEP)$(jars)" -sourcepath $(SRC) -d $(CLZDIR)/main $(main_javas) + +$(test_classes): $(CLZDIR)/test/%class: $(TST)/%java $(main_classes) + @echo "compiling " $@ " because " $? + @[ -d $(CLZDIR)/test ] || mkdir -p $(CLZDIR)/test + @javac $(JAVAC_ARGS) -cp "$(CLZDIR)/test$(SEP)$(CLZDIR)/main$(SEP)$(jars)" -sourcepath $(TST) -d $(CLZDIR)/test $(test_javas) + +# Base launch line for JVM tests +JVM=nice java -ea -cp "build/classes/main${SEP}${jars}${SEP}$(CLZDIR)/test" + +tests: $(default_targets) + $(JVM) org.junit.runner.JUnitCore $(test_cp) + +.PHONY: clean +clean: + rm -rf build + rm -f TAGS + (find . -name "*~" -exec rm {} \; 2>/dev/null; exit 0) + +# Download libs from maven +lib: lib/junit-4.12.jar lib/hamcrest-core-1.3.jar + +# Jars +lib/junit-4.12.jar: + @[ -d lib ] || mkdir -p lib + @(cd lib; wget https://repo1.maven.org/maven2/junit/junit/4.12/junit-4.12.jar) + +lib/hamcrest-core-1.3.jar: + @[ -d lib ] || mkdir -p lib + @(cd lib; wget https://repo1.maven.org/maven2/org/hamcrest/hamcrest-core/1.3/hamcrest-core-1.3.jar) + +# Build emacs tags (part of a tasty emacs ide experience) +tags: $(main_javas) $(test_javas) + @rm -f TAGS + @$(CTAGS) -e --recurse=yes --extra=+q --fields=+fksaiS $(SRC) $(TST) diff --git a/chapter07/README.md b/chapter07/README.md new file mode 100644 index 00000000..c6bcdee4 --- /dev/null +++ b/chapter07/README.md @@ -0,0 +1,227 @@ +# Chapter 7 + +In this chapter we introduce the `while` statement. + +Here is the [complete language grammar](docs/07-grammar.md) for this chapter. + +## Dealing With Back Edges + +The complication introduced by looping constructs is that variables flow back into the body of the loop on iteration. +For example: + +```java +while(arg < 10) { + arg = arg + 1; +} +return arg; +``` + +The variable `arg` is assigned a new value inside the body of the loop, and this then flows back into the body of the loop. + +In general, we will rewrite the looping construct as follows: + +```java +loop_head: +if( !(arg < 10) ) + goto loop_exit; +arg = arg + 1; +goto loop_head; + +loop_exit: +``` + +Above is for illustration only, we do not have labels and `goto` statements in the language. + +From an SSA[^1] point of view, since `arg` flows back, it requires a `phi` node at the head. Conceptually we would like the outcome to be: + +```java +// arg1 represents incoming arg +// +loop_head: +arg2 = phi(arg1, arg3); +if( !(arg2 < 10) ) + goto loop_exit; +arg3 = arg2 + 1; +goto loop_head; + +loop_exit: +``` + +Notice that the phi for `arg2` refers to `arg3`, which is not known at the time we parse the `while` loop predicate. This is the crux of the problem that we need +to solve in order to successfully construct the Sea of Nodes graph, which is always in SSA form. + +Recall from [Chapter 5](../chapter05/README.md) that when parsing `if` statements, we clone the symbol tables as we go past the `if` predicate. +Later we merge the two sets of symbol tables at a `Region` - creating phis for all names that encountered a change in definition within the two +branches of the `if` statement. In the case of the `if` statement, the phis are created at the merge point when we already know the definitions +that are being merged. + +The essential idea for loop constructs is to eagerly create phis for all names in the symbol tables *before* we enter the loop's `if` condition, +since we do not know which names will be redefined inside the body of the loop. When the loop terminates, we go back and remove unnecessary +phis. We call this approach the "eager phi" approach.[^2] + +In [Chapter 8](../chapter08) we will implement a "lazy phi" approach that creates phis only when we encounter redefinition of a name. + +## New Node Types + +Our list of nodes remains the same as in [Chapter 5](../chapter05/README.md), however, we create a subtype of `Region` named `Loop` to better +encapsulate some of the additional logic required. A key aspect of this is to temporarily disable peepholes of the `Region` and any phis +created until we complete parsing the loop body. This is because our phis are not fully constructed until the loop end. + +## Detailed Steps + +1. We start by creating a new subclass of `Region`, the `Loop`. The `Loop` gets two control inputs, + the first one is the entry point, i.e. the current binding to `$ctrl`, and second one (`null`) is a placeholder for the back edge that is + set after loop is parsed. The absence of a back edge is used as an indicator to switch off peepholes of the region and + associated phis. + + ```java + ctrl(new LoopNode(ctrl(),null).peephole()); + ``` + + The newly created region becomes the current control. + +2. We duplicate the current Scope node. This involves duplicating all the symbols at + every level with the scope, and creating phis for every symbol except the `$ctrl` binding. + + ```java + // Make a new Scope for the body. + _scope = _scope.dup(true); + ``` + + Note that the `dup` call is given an argument `true`. This triggers creating phis. The code + that creates the phis in the `dup()` method is shown below. + + ```java + // boolean loop=true if this is a loop region + + String[] reverse = reverse_names(); + dup.add_def(ctrl()); // Control input is just copied + for( int i=1; ikeep().peephole(); + // Setup projection nodes + Node ifT = new ProjNode(ifNode, 0, "True" ).peephole(); + ifNode.unkeep(); + Node ifF = new ProjNode(ifNode, 1, "False").peephole(); + ``` + +4. We set the control token to the `True` projection node `ifT`, but before that we make another clone of + the current scope. + + ```java + // The exit scope, accounting for any side effects in the predicate + var exit = _scope.dup(); + exit.ctrl(ifF); + ``` + + The new scope is saved as the exit scope that will live after the loop ends, therefore `$ctrl` in the exit scope is + set to the `False` projection. The exit scope captures any side effects of the loop's predicate. + +5. We now set the control to the `True` projection and parse the loop body. + + ```java + // Parse the true side, which corresponds to loop body + ctrl(ifT); // set ctrl token to ifTrue projection + parseStatement(); // Parse loop body + ``` + +6. After the loop body is parsed, we go back and process all the phis we created earlier. + + ```java + // The true branch loops back, so whatever is current control gets + // added to head loop as input + head.endLoop(_scope, exit); + ``` + + The `endLoop` method sets the second control of the loop region to the control from the back edge. + It then goes through all the phis and sets the second data input on the phi to the corresponding entry + from the loop body; phis that were not used are peepholed and get replaced by the original input. + + ```java + Node ctrl = ctrl(); + ctrl.set_def(2,back.ctrl()); + for( int i=1; i'| '<'| '>='| '<=') additiveExpression)* + ; + +additiveExpression + : multiplicativeExpression (('+' | '-') multiplicativeExpression)* + ; + +multiplicativeExpression + : unaryExpression (('*' | '/') unaryExpression)* + ; + +unaryExpression + : ('-') unaryExpression + | primaryExpression + ; + +primaryExpression + : IDENTIFIER + | INTEGER_LITERAL + | 'true' + | 'false' + | '(' expression ')' + ; + +INTEGER_LITERAL + : [1-9][0-9]* + | [0] + ; + +IDENTIFIER + : NON_DIGIT (NON_DIGIT | DIGIT)* + ; + +NON_DIGIT: [a-zA-Z_]; +DEC_DIGIT: [0-9]; +``` \ No newline at end of file diff --git a/chapter07/docs/07-graph1.gv b/chapter07/docs/07-graph1.gv new file mode 100644 index 00000000..ba806a28 --- /dev/null +++ b/chapter07/docs/07-graph1.gv @@ -0,0 +1,107 @@ +digraph chapter07 { +/* +while(arg < 10) { + arg = arg + 1; + #showGraph; +} +return arg; + +*/ + rankdir=BT; + ordering="in"; + concentrate="true"; + compound="true"; + subgraph cluster_Nodes { + Start2 [ shape=plaintext label=< + + + +
Start
+ + +
$ctrlarg
+
> + ]; + Loop6 [ shape=box style=filled fillcolor=yellow label="Loop" ]; + Phi_arg8 [ style=filled fillcolor=lightyellow label="φ_arg" ]; + Con_9 [ label="10" ]; + LT10 [ label="<" ]; + If11 [ shape=plaintext label=< + + + +
If
+ + +
TrueFalse
+
> + ]; + Con_15 [ label="1" ]; + Add16 [ label="+" ]; + { rank=same; Loop6;Phi_arg8;} + } + node [shape=plaintext]; + subgraph cluster_Scope7_1 { + Scope7_1 [label=< + + +
2
>]; + subgraph cluster_Scope7_2 { + Scope7_2 [label=< + + +
1
>]; + subgraph cluster_Scope7_3 { + Scope7_3 [label=< + + +
0$ctrlarg
>]; + } + } + } + node [shape=plaintext]; + subgraph cluster_Scope1_1 { + Scope1_1 [label=< + + +
1
>]; + subgraph cluster_Scope1_2 { + Scope1_2 [label=< + + +
0$ctrlarg
>]; + } + } + node [shape=plaintext]; + subgraph cluster_Scope14_1 { + Scope14_1 [label=< + + +
1
>]; + subgraph cluster_Scope14_2 { + Scope14_2 [label=< + + +
0$ctrlarg
>]; + } + } + edge [ fontname=Helvetica, fontsize=8 ]; + Loop6 -> Start2:p0[taillabel=1 color=red]; + Phi_arg8 -> Loop6 [style=dotted taillabel=0]; + Phi_arg8 -> Start2:p1[taillabel=1]; + LT10 -> Phi_arg8[taillabel=1]; + LT10 -> Con_9[taillabel=2]; + If11 -> Loop6[taillabel=0 color=red]; + If11 -> LT10[taillabel=1]; + Add16 -> Phi_arg8[taillabel=1]; + Add16 -> Con_15[taillabel=2]; + edge [style=dashed color=cornflowerblue]; + Scope7_3:"Scope7_3_$ctrl" -> If11:p0; + Scope7_3:"Scope7_3_arg" -> Add16; + edge [style=dashed color=cornflowerblue]; + Scope1_2:"Scope1_2_$ctrl" -> Loop6; + Scope1_2:"Scope1_2_arg" -> Phi_arg8; + edge [style=dashed color=cornflowerblue]; + Scope14_2:"Scope14_2_$ctrl" -> If11:p1; + Scope14_2:"Scope14_2_arg" -> Phi_arg8; +} diff --git a/chapter07/docs/07-graph1.svg b/chapter07/docs/07-graph1.svg new file mode 100644 index 00000000..94248cfb --- /dev/null +++ b/chapter07/docs/07-graph1.svg @@ -0,0 +1,268 @@ + + + + + + +chapter07 + + +cluster_Nodes + + + +cluster_Scope7_1 + + + +cluster_Scope7_2 + + + +cluster_Scope7_3 + + + +cluster_Scope1_1 + + + +cluster_Scope1_2 + + + +cluster_Scope14_1 + + + +cluster_Scope14_2 + + + + +Start2 + + +Start + + + +$ctrl + +arg + + + +Loop6 + +Loop + + + +Loop6->Start2:p0 + + +1 + + + +Phi_arg8 + +φ_arg + + + +Phi_arg8->Start2:p1 + + +1 + + + +Phi_arg8->Loop6 + + +0 + + + +Con_9 + +10 + + + +LT10 + +< + + + +LT10->Phi_arg8 + + +1 + + + +LT10->Con_9 + + +2 + + + +If11 + + +If + + + +True + + +False + + + +If11->Loop6 + + +0 + + + +If11->LT10 + + +1 + + + +Con_15 + +1 + + + +Add16 + ++ + + + +Add16->Phi_arg8 + + +1 + + + +Add16->Con_15 + + +2 + + + +Scope7_1 + + +2 + + + +Scope7_2 + + +1 + + + +Scope7_3 + + +0 + +$ctrl + +arg + + + +Scope7_3:Scope7_3_$ctrl->If11:p0 + + + + + +Scope7_3:Scope7_3_arg->Add16 + + + + + +Scope1_1 + + +1 + + + +Scope1_2 + + +0 + +$ctrl + +arg + + + +Scope1_2:Scope1_2_$ctrl->Loop6 + + + + + +Scope1_2:Scope1_2_arg->Phi_arg8 + + + + + +Scope14_1 + + +1 + + + +Scope14_2 + + +0 + +$ctrl + +arg + + + +Scope14_2:Scope14_2_arg->Phi_arg8 + + + + + +Scope14_2:Scope14_2_$ctrl->If11:p1 + + + + + diff --git a/chapter07/docs/07-graph2.gv b/chapter07/docs/07-graph2.gv new file mode 100644 index 00000000..f4cfcb45 --- /dev/null +++ b/chapter07/docs/07-graph2.gv @@ -0,0 +1,62 @@ +digraph chapter07 { +/* +while(arg < 10) { + arg = arg + 1; + #showGraph; +} +return arg; + +*/ + rankdir=BT; + ordering="in"; + concentrate="true"; + compound="true"; + subgraph cluster_Nodes { + Start2 [ shape=plaintext label=< + + + +
Start
+ + +
$ctrlarg
+
> + ]; + Stop3 [ shape=box style=filled fillcolor=yellow label="Stop" ]; + Loop6 [ shape=box style=filled fillcolor=yellow label="Loop" ]; + Phi_arg8 [ style=filled fillcolor=lightyellow label="φ_arg" ]; + Con_9 [ label="10" ]; + LT10 [ label="<" ]; + If11 [ shape=plaintext label=< + + + +
If
+ + +
TrueFalse
+
> + ]; + Con_15 [ label="1" ]; + Add16 [ label="+" ]; + Return17 [ shape=box style=filled fillcolor=yellow label="Return" ]; + { rank=same; Loop6;Phi_arg8;} + } + node [shape=plaintext]; + edge [ fontname=Helvetica, fontsize=8 ]; + Stop3 -> Return17[taillabel=0 color=red]; + Loop6 -> Start2:p0[taillabel=1 color=red]; + Loop6 -> If11:p0[taillabel=2 color=red constraint=false]; + Phi_arg8 -> Loop6 [style=dotted taillabel=0]; + Phi_arg8 -> Start2:p1[taillabel=1]; + Phi_arg8 -> Add16[taillabel=2 constraint=false]; + LT10 -> Phi_arg8[taillabel=1]; + LT10 -> Con_9[taillabel=2]; + If11 -> Loop6[taillabel=0 color=red]; + If11 -> LT10[taillabel=1]; + Add16 -> Phi_arg8[taillabel=1]; + Add16 -> Con_15[taillabel=2]; + Return17 -> If11:p1[taillabel=0 color=red]; + Return17 -> Phi_arg8[taillabel=1]; + edge [style=dashed color=cornflowerblue]; +} \ No newline at end of file diff --git a/chapter07/docs/07-graph2.svg b/chapter07/docs/07-graph2.svg new file mode 100644 index 00000000..1fec9556 --- /dev/null +++ b/chapter07/docs/07-graph2.svg @@ -0,0 +1,184 @@ + + + + + + +chapter07 + + +cluster_Nodes + + + + +Start2 + + +Start + + + +$ctrl + +arg + + + +Stop3 + +Stop + + + +Return17 + +Return + + + +Stop3->Return17 + + +0 + + + +Loop6 + +Loop + + + +Loop6->Start2:p0 + + +1 + + + +If11 + + +If + + + +True + + +False + + + +Loop6->If11:p0 + + +2 + + + +Phi_arg8 + +φ_arg + + + +Phi_arg8->Start2:p1 + + +1 + + + +Phi_arg8->Loop6 + + +0 + + + +Add16 + ++ + + + +Con_9 + +10 + + + +LT10 + +< + + + +LT10->Phi_arg8 + + +1 + + + +LT10->Con_9 + + +2 + + + +If11->Loop6 + + +0 + + + +If11->LT10 + + +1 + + + +Con_15 + +1 + + + +Add16->Phi_arg8 + + + +1 + + + +Add16->Con_15 + + +2 + + + +Return17->Phi_arg8 + + +1 + + + +Return17->If11:p1 + + +0 + + + diff --git a/chapter07/docs/07-graph3.gv b/chapter07/docs/07-graph3.gv new file mode 100644 index 00000000..4482100b --- /dev/null +++ b/chapter07/docs/07-graph3.gv @@ -0,0 +1,108 @@ +digraph chapter07 { +/* +int sum = 0; +int i = 0; +while(i < arg) { + i = i + 1; + int j = 0; + while( j < arg ) { + sum = sum + j; + j = j + 1; + } +} +return sum; + +*/ + rankdir=BT; + ordering="in"; + concentrate="true"; + compound="true"; + subgraph cluster_Nodes { + Start2 [ shape=plaintext label=< + + + +
Start
+ + +
$ctrlarg
+
> + ]; + Stop3 [ shape=box style=filled fillcolor=yellow label="Stop" ]; + Con_6 [ label="0" ]; + Con_7 [ label="0" ]; + Loop8 [ shape=box style=filled fillcolor=yellow label="Loop" ]; + Phi_sum11 [ style=filled fillcolor=lightyellow label="φ_sum" ]; + Phi_i12 [ style=filled fillcolor=lightyellow label="φ_i" ]; + LT13 [ label="<" ]; + If14 [ shape=plaintext label=< + + + +
If
+ + +
TrueFalse
+
> + ]; + Con_18 [ label="1" ]; + Add19 [ label="+" ]; + Con_20 [ label="0" ]; + Loop21 [ shape=box style=filled fillcolor=yellow label="Loop" ]; + Phi_sum24 [ style=filled fillcolor=lightyellow label="φ_sum" ]; + Phi_j26 [ style=filled fillcolor=lightyellow label="φ_j" ]; + LT27 [ label="<" ]; + If28 [ shape=plaintext label=< + + + +
If
+ + +
TrueFalse
+
> + ]; + Add32 [ label="+" ]; + Con_33 [ label="1" ]; + Add34 [ label="+" ]; + Return35 [ shape=box style=filled fillcolor=yellow label="Return" ]; + { rank=same; Loop8;Phi_sum11;Phi_i12;} + { rank=same; Loop21;Phi_sum24;Phi_j26;} + } + node [shape=plaintext]; + edge [ fontname=Helvetica, fontsize=8 ]; + Stop3 -> Return35[taillabel=0 color=red]; + Loop8 -> Start2:p0[taillabel=1 color=red]; + Loop8 -> If28:p1[taillabel=2 color=red constraint=false]; + Phi_sum11 -> Loop8 [style=dotted taillabel=0]; + Phi_sum11 -> Con_6[taillabel=1]; + Phi_sum11 -> Phi_sum24[taillabel=2 constraint=false]; + Phi_i12 -> Loop8 [style=dotted taillabel=0]; + Phi_i12 -> Con_7[taillabel=1]; + Phi_i12 -> Add19[taillabel=2 constraint=false]; + LT13 -> Phi_i12[taillabel=1]; + LT13 -> Start2:p1[taillabel=2]; + If14 -> Loop8[taillabel=0 color=red]; + If14 -> LT13[taillabel=1]; + Add19 -> Phi_i12[taillabel=1]; + Add19 -> Con_18[taillabel=2]; + Loop21 -> If14:p0[taillabel=1 color=red]; + Loop21 -> If28:p0[taillabel=2 color=red constraint=false]; + Phi_sum24 -> Loop21 [style=dotted taillabel=0]; + Phi_sum24 -> Phi_sum11[taillabel=1]; + Phi_sum24 -> Add32[taillabel=2 constraint=false]; + Phi_j26 -> Loop21 [style=dotted taillabel=0]; + Phi_j26 -> Con_20[taillabel=1]; + Phi_j26 -> Add34[taillabel=2 constraint=false]; + LT27 -> Phi_j26[taillabel=1]; + LT27 -> Start2:p1[taillabel=2]; + If28 -> Loop21[taillabel=0 color=red]; + If28 -> LT27[taillabel=1]; + Add32 -> Phi_j26[taillabel=1]; + Add32 -> Phi_sum24[taillabel=2]; + Add34 -> Phi_j26[taillabel=1]; + Add34 -> Con_33[taillabel=2]; + Return35 -> If14:p1[taillabel=0 color=red]; + Return35 -> Phi_sum11[taillabel=1]; + edge [style=dashed color=cornflowerblue]; +} \ No newline at end of file diff --git a/chapter07/docs/07-graph3.svg b/chapter07/docs/07-graph3.svg new file mode 100644 index 00000000..a9a0e8d0 --- /dev/null +++ b/chapter07/docs/07-graph3.svg @@ -0,0 +1,373 @@ + + + + + + +chapter07 + + +cluster_Nodes + + + + +Start2 + + +Start + + + +$ctrl + +arg + + + +Stop3 + +Stop + + + +Return35 + +Return + + + +Stop3->Return35 + + +0 + + + +Con_6 + +0 + + + +Con_7 + +0 + + + +Loop8 + +Loop + + + +Loop8->Start2:p0 + + +1 + + + +If28 + + +If + + + +True + + +False + + + +Loop8->If28:p1 + + +2 + + + +Phi_sum11 + +φ_sum + + + +Phi_sum11->Con_6 + + +1 + + + +Phi_sum11->Loop8 + + +0 + + + +Phi_sum24 + +φ_sum + + + +Phi_i12 + +φ_i + + + +Phi_i12->Con_7 + + +1 + + + +Phi_i12->Loop8 + + +0 + + + +Add19 + ++ + + + +LT13 + +< + + + +LT13->Start2:p1 + + +2 + + + +LT13->Phi_i12 + + +1 + + + +If14 + + +If + + + +True + + +False + + + +If14->Loop8 + + +0 + + + +If14->LT13 + + +1 + + + +Con_18 + +1 + + + +Add19->Phi_i12 + + + +1 + + + +Add19->Con_18 + + +2 + + + +Con_20 + +0 + + + +Loop21 + +Loop + + + +Loop21->If14:p0 + + +1 + + + +Loop21->If28:p0 + + +2 + + + +Phi_sum24->Phi_sum11 + + + +1 + + + +Phi_sum24->Loop21 + + +0 + + + +Add32 + ++ + + + +Phi_j26 + +φ_j + + + +Phi_j26->Con_20 + + +1 + + + +Phi_j26->Loop21 + + +0 + + + +Add34 + ++ + + + +LT27 + +< + + + +LT27->Start2:p1 + + +2 + + + +LT27->Phi_j26 + + +1 + + + +If28->Loop21 + + +0 + + + +If28->LT27 + + +1 + + + +Add32->Phi_sum24 + + + +2 + + + +Add32->Phi_j26 + + +1 + + + +Con_33 + +1 + + + +Add34->Phi_j26 + + + +1 + + + +Add34->Con_33 + + +2 + + + +Return35->Phi_sum11 + + +1 + + + +Return35->If14:p1 + + +0 + + + diff --git a/chapter07/docs/07-graph4.gv b/chapter07/docs/07-graph4.gv new file mode 100644 index 00000000..24ace3da --- /dev/null +++ b/chapter07/docs/07-graph4.gv @@ -0,0 +1,96 @@ +digraph chapter07 { +/* +int a = 1; +int b = 2; +while(a < 10) { + if (a == 2) a = 3; + else b = 4; +} +return b; + +*/ + rankdir=BT; + ordering="in"; + concentrate="true"; + compound="true"; + subgraph cluster_Nodes { + Start2 [ shape=plaintext label=< + + + +
Start
+ + +
$ctrl
+
> + ]; + Stop3 [ shape=box style=filled fillcolor=yellow label="Stop" ]; + Con_6 [ label="1" ]; + Con_7 [ label="2" ]; + Loop8 [ shape=box style=filled fillcolor=yellow label="Loop" ]; + Phi_a11 [ style=filled fillcolor=lightyellow label="φ_a" ]; + Phi_b12 [ style=filled fillcolor=lightyellow label="φ_b" ]; + Con_13 [ label="10" ]; + LT14 [ label="<" ]; + If15 [ shape=plaintext label=< + + + +
If
+ + +
TrueFalse
+
> + ]; + Con_19 [ label="2" ]; + EQ20 [ label="==" ]; + If21 [ shape=plaintext label=< + + + +
If
+ + +
TrueFalse
+
> + ]; + Con_25 [ label="3" ]; + Con_26 [ label="4" ]; + Region27 [ shape=box style=filled fillcolor=yellow label="Region" ]; + Phi_a28 [ style=filled fillcolor=lightyellow label="φ_a" ]; + Phi_b29 [ style=filled fillcolor=lightyellow label="φ_b" ]; + Return30 [ shape=box style=filled fillcolor=yellow label="Return" ]; + { rank=same; Loop8;Phi_a11;Phi_b12;} + { rank=same; Region27;Phi_b29;Phi_a28;} + } + node [shape=plaintext]; + edge [ fontname=Helvetica, fontsize=8 ]; + Stop3 -> Return30[taillabel=0 color=red]; + Loop8 -> Start2:p0[taillabel=1 color=red]; + Loop8 -> Region27[taillabel=2 color=red constraint=false]; + Phi_a11 -> Loop8 [style=dotted taillabel=0]; + Phi_a11 -> Con_6[taillabel=1]; + Phi_a11 -> Phi_a28[taillabel=2 constraint=false]; + Phi_b12 -> Loop8 [style=dotted taillabel=0]; + Phi_b12 -> Con_7[taillabel=1]; + Phi_b12 -> Phi_b29[taillabel=2 constraint=false]; + LT14 -> Phi_a11[taillabel=1]; + LT14 -> Con_13[taillabel=2]; + If15 -> Loop8[taillabel=0 color=red]; + If15 -> LT14[taillabel=1]; + EQ20 -> Phi_a11[taillabel=1]; + EQ20 -> Con_19[taillabel=2]; + If21 -> If15:p0[taillabel=0 color=red]; + If21 -> EQ20[taillabel=1]; + Region27 -> If21:p0[taillabel=1 color=red]; + Region27 -> If21:p1[taillabel=2 color=red]; + Phi_a28 -> Region27 [style=dotted taillabel=0]; + Phi_a28 -> Con_25[taillabel=1]; + Phi_a28 -> Phi_a11[taillabel=2 constraint=false]; + Phi_b29 -> Region27 [style=dotted taillabel=0]; + Phi_b29 -> Phi_b12[taillabel=1]; + Phi_b29 -> Con_26[taillabel=2 constraint=false]; + Return30 -> If15:p1[taillabel=0 color=red]; + Return30 -> Phi_b12[taillabel=1]; + edge [style=dashed color=cornflowerblue]; +} diff --git a/chapter07/docs/07-graph4.svg b/chapter07/docs/07-graph4.svg new file mode 100644 index 00000000..cf42453d --- /dev/null +++ b/chapter07/docs/07-graph4.svg @@ -0,0 +1,329 @@ + + + + + + +chapter07 + + +cluster_Nodes + + + + +Start2 + + +Start + + + +$ctrl + + + +Stop3 + +Stop + + + +Return30 + +Return + + + +Stop3->Return30 + + +0 + + + +Con_6 + +1 + + + +Con_7 + +2 + + + +Loop8 + +Loop + + + +Loop8->Start2:p0 + + +1 + + + +Region27 + +Region + + + +Loop8->Region27 + + +2 + + + +Phi_a11 + +φ_a + + + +Phi_a11->Con_6 + + +1 + + + +Phi_a11->Loop8 + + +0 + + + +Phi_a28 + +φ_a + + + +Phi_b12 + +φ_b + + + +Phi_b12->Con_7 + + +1 + + + +Phi_b12->Loop8 + + +0 + + + +Phi_b29 + +φ_b + + + +Con_13 + +10 + + + +LT14 + +< + + + +LT14->Phi_a11 + + +1 + + + +LT14->Con_13 + + +2 + + + +If15 + + +If + + + +True + + +False + + + +If15->Loop8 + + +0 + + + +If15->LT14 + + +1 + + + +Con_19 + +2 + + + +EQ20 + +== + + + +EQ20->Phi_a11 + + +1 + + + +EQ20->Con_19 + + +2 + + + +If21 + + +If + + + +True + + +False + + + +If21->If15:p0 + + +0 + + + +If21->EQ20 + + +1 + + + +Con_25 + +3 + + + +Con_26 + +4 + + + +Region27->If21:p0 + + +1 + + + +Region27->If21:p1 + + +2 + + + +Phi_a28->Phi_a11 + + + +2 + + + +Phi_a28->Con_25 + + +1 + + + +Phi_a28->Region27 + + +0 + + + +Phi_b29->Phi_b12 + + + +1 + + + +Phi_b29->Con_26 + + +2 + + + +Phi_b29->Region27 + + +0 + + + +Return30->Phi_b12 + + +1 + + + +Return30->If15:p1 + + +0 + + + diff --git a/chapter07/docs/build-graphs.sh b/chapter07/docs/build-graphs.sh new file mode 100644 index 00000000..27b2b72f --- /dev/null +++ b/chapter07/docs/build-graphs.sh @@ -0,0 +1,6 @@ +dot -Tsvg 07-graph1.gv > 07-graph1.svg +dot -Tsvg 07-graph2.gv > 07-graph2.svg +dot -Tsvg 07-graph3.gv > 07-graph3.svg +dot -Tsvg 07-graph4.gv > 07-graph4.svg + + diff --git a/chapter07/pom.xml b/chapter07/pom.xml new file mode 100644 index 00000000..28d119b5 --- /dev/null +++ b/chapter07/pom.xml @@ -0,0 +1,14 @@ + + + 4.0.0 + + com.seaofnodes + simple + 1.0 + + chapter07 + jar + Chapter 7 + \ No newline at end of file diff --git a/chapter07/src/main/java/com/seaofnodes/simple/GraphVisualizer.java b/chapter07/src/main/java/com/seaofnodes/simple/GraphVisualizer.java new file mode 100644 index 00000000..ce3522bc --- /dev/null +++ b/chapter07/src/main/java/com/seaofnodes/simple/GraphVisualizer.java @@ -0,0 +1,256 @@ +package com.seaofnodes.simple; + +import com.seaofnodes.simple.node.*; + +import java.util.*; + +/** + * Simple visualizer that outputs GraphViz dot format. + * The dot output must be saved to a file and run manually via dot to generate the SVG output. + * Currently, this is done manually. + */ +public class GraphVisualizer { + + /** + * If set to true we put the control nodes in a separate cluster from + * data nodes. + */ + boolean _separateControlCluster = false; + + public GraphVisualizer(boolean separateControlCluster) { this._separateControlCluster = separateControlCluster; } + public GraphVisualizer() { this(false); } + + public String generateDotOutput(Parser parser) { + + // Since the graph has cycles, we need to create a flat list of all the + // nodes in the graph. + Collection all = findAll(parser); + StringBuilder sb = new StringBuilder(); + sb.append("digraph chapter07 {\n"); + sb.append("/*\n"); + sb.append(parser.src()); + sb.append("\n*/\n"); + + // To keep the Scopes below the graph and pointing up into the graph we + // need to group the Nodes in a subgraph cluster, and the scopes into a + // different subgraph cluster. THEN we can draw edges between the + // scopes and nodes. If we try to cross subgraph cluster borders while + // still making the subgraphs DOT gets confused. + sb.append("\trankdir=BT;\n"); // Force Nodes before Scopes + + // Preserve node input order + sb.append("\tordering=\"in\";\n"); + + // Merge multiple edges hitting the same node. Makes common shared + // nodes much prettier to look at. + sb.append("\tconcentrate=\"true\";\n"); + + // Force nested scopes to order + sb.append("\tcompound=\"true\";\n"); + + // Just the Nodes first, in a cluster no edges + nodes(sb, all); + + // Now the scopes, in a cluster no edges + scope(sb,parser._scope ); + for( ScopeNode scope : parser._xScopes ) + scope( sb, scope ); + + // Walk the Node edges + nodeEdges(sb, all); + + // Walk the active Scope edges + scopeEdges( sb, parser._scope ); + for( ScopeNode scope : parser._xScopes ) + scopeEdges( sb, scope ); + + sb.append("}\n"); + return sb.toString(); + } + + private void nodesByCluster(StringBuilder sb, boolean doCtrl, Collection all) { + if (!_separateControlCluster && doCtrl) // all nodes in 1 cluster + return; + // Just the Nodes first, in a cluster no edges + sb.append(doCtrl ? "\tsubgraph cluster_Controls {\n" : "\tsubgraph cluster_Nodes {\n"); // Magic "cluster_" in the subgraph name + for (Node n : all) { + if (n instanceof ProjNode || n instanceof ScopeNode) + continue; // Do not emit, rolled into MultiNode or Scope cluster already + if (_separateControlCluster && doCtrl && !n.isCFG()) continue; + if (_separateControlCluster && !doCtrl && n.isCFG()) continue; + sb.append("\t\t").append(n.uniqueName()).append(" [ "); + String lab = n.glabel(); + if (n instanceof MultiNode) { + // Make a box with the MultiNode on top, and all the projections on the bottom + sb.append("shape=plaintext label=<\n"); + sb.append("\t\t\t\n"); + sb.append("\t\t\t\n"); + sb.append("\t\t\t"); + boolean doProjTable = false; + for (Node use : n._outputs) { + if (use instanceof ProjNode proj) { + if (!doProjTable) { + doProjTable = true; + sb.append(""); + } + sb.append("\n"); + sb.append("\t\t\t
").append(lab).append("
").append("\n"); + sb.append("\t\t\t\t").append("\n"); + sb.append("\t\t\t\t"); + } + sb.append(""); + } + } + if (doProjTable) { + sb.append("").append("\n"); + sb.append("\t\t\t\t
").append(proj.glabel()).append("
").append("\n"); + sb.append("\t\t\t
>\n\t\t"); + + } else { + // control nodes have box shape + // other nodes are ellipses, i.e. default shape + if (n.isCFG()) sb.append("shape=box style=filled fillcolor=yellow "); + else if (n instanceof PhiNode) sb.append("style=filled fillcolor=lightyellow "); + sb.append("label=\"").append(lab).append("\" "); + } + sb.append("];\n"); + } + if (!_separateControlCluster) { + // Force Region & Phis to line up + for (Node n : all) { + if (n instanceof RegionNode region) { + sb.append("\t\t{ rank=same; "); + sb.append(region).append(";"); + for (Node phi : region._outputs) + if (phi instanceof PhiNode) sb.append(phi.uniqueName()).append(";"); + sb.append("}\n"); + } + } + } + sb.append("\t}\n"); // End Node cluster + } + + private void nodes(StringBuilder sb, Collection all) { + nodesByCluster(sb, true, all); + nodesByCluster(sb, false, all); + } + + // Build a nested scope display, walking the _prev edge + private void scope( StringBuilder sb, ScopeNode scope ) { + sb.append("\tnode [shape=plaintext];\n"); + int level=1; + for( int idx = scope._scopes.size()-1; idx>=0; idx-- ) { + var syms = scope._scopes.get(idx); + String scopeName = makeScopeName(scope, level); + sb.append("\tsubgraph cluster_").append(scopeName).append(" {\n"); // Magic "cluster_" in the subgraph name + sb.append("\t\t").append(scopeName).append(" [label=<\n"); + sb.append("\t\t\t\n"); + // Add the scope level + int scopeLevel = scope._scopes.size()-level; + sb.append("\t\t\t"); + for(String name: syms.keySet()) + sb.append(""); + sb.append("\n"); + sb.append("\t\t\t
").append(scopeLevel).append("").append(name).append("
>];\n"); + level++; + } + // Scope clusters nest, so the graphics shows the nested scopes, so + // they are not closed as they are printed; so they just keep nesting. + // We close them all at once here. + sb.append( "\t}\n".repeat( level-1 ) ); // End all Scope clusters + } + + private String makeScopeName(ScopeNode sn, int level) { return sn.uniqueName() + "_" + level; } + private String makePortName(String scopeName, String varName) { return scopeName + "_" + varName; } + + // Walk the node edges + private void nodeEdges(StringBuilder sb, Collection all) { + // All them edge labels + sb.append("\tedge [ fontname=Helvetica, fontsize=8 ];\n"); + for( Node n : all ) { + // Do not display the Constant->Start edge; + // ProjNodes handled by Multi; + // ScopeNodes are done separately + if( n instanceof ConstantNode || n instanceof ProjNode || n instanceof ScopeNode ) + continue; + for( int i=0; idef edge from Phi to Region. + sb.append('\t').append(n.uniqueName()); + sb.append(" -> "); + sb.append(def.uniqueName()); + sb.append(" [style=dotted taillabel=").append(i).append("];\n"); + } else if( def != null ) { + // Most edges land here use->def + sb.append('\t').append(n.uniqueName()).append(" -> "); + if( def instanceof ProjNode proj ) { + String mname = proj.ctrl().uniqueName(); + sb.append(mname).append(":p").append(proj._idx); + } else sb.append(def.uniqueName()); + // Number edges, so we can see how they track + sb.append("[taillabel=").append(i); + // control edges are colored red + if( def.isCFG() ) + sb.append(" color=red"); + // Backedges do not add a ranking constraint + if( i==2 && (n instanceof PhiNode || n instanceof LoopNode) ) + sb.append(" constraint=false"); + sb.append("];\n"); + } + } + } + } + + // Walk the scope edges + private void scopeEdges( StringBuilder sb, ScopeNode scope ) { + sb.append("\tedge [style=dashed color=cornflowerblue];\n"); + int level=1; + for( int i = scope._scopes.size()-1; i>=0; i-- ) { + var syms = scope._scopes.get(i); + String scopeName = makeScopeName(scope, level); + for( String name : syms.keySet() ) { + int idx = syms.get(name); + Node def = scope.in(idx); + if( def==null ) continue; + sb.append("\t") + .append(scopeName).append(":") + .append('"').append(makePortName(scopeName, name)).append('"') // wrap port name with quotes because $ctrl is not valid unquoted + .append(" -> "); + if( def instanceof ProjNode proj ) { + String mname = proj.ctrl().uniqueName(); + sb.append(mname).append(":p").append(proj._idx); + } else sb.append(def.uniqueName()); + sb.append(";\n"); + } + level++; + } + } + + /** + * Finds all nodes in the graph. + */ + private Collection findAll(Parser parser) { + final HashMap all = new HashMap<>(); + for( Node n : Parser.START._outputs ) + walk(all, n); + for( Node n : parser._scope._inputs ) + walk(all, n); + return all.values(); + } + + /** + * Walk a subgraph and populate distinct nodes in the all list. + */ + private void walk(HashMap all, Node n) { + if(n == null ) return; + if (all.get(n._nid) != null) return; // Been there, done that + all.put(n._nid, n); + for (Node c : n._inputs) + walk(all, c); + for( Node c : n._outputs ) + walk(all, c); + } +} diff --git a/chapter07/src/main/java/com/seaofnodes/simple/IRPrinter.java b/chapter07/src/main/java/com/seaofnodes/simple/IRPrinter.java new file mode 100644 index 00000000..dc6d9f4f --- /dev/null +++ b/chapter07/src/main/java/com/seaofnodes/simple/IRPrinter.java @@ -0,0 +1,124 @@ +package com.seaofnodes.simple; + +import com.seaofnodes.simple.node.LoopNode; +import com.seaofnodes.simple.node.Node; + +import java.util.ArrayList; +import java.util.BitSet; + +public class IRPrinter { + + // Another bulk pretty-printer. Makes more effort at basic-block grouping. + public static String prettyPrint(Node node, int depth) { + // First, a Breadth First Search at a fixed depth. + BFS bfs = new BFS(node,depth); + // Convert just that set to a post-order + ArrayList rpos = new ArrayList<>(); + BitSet visit = new BitSet(); + for( int i=bfs._lim; i< bfs._bfs.size(); i++ ) + postOrd( bfs._bfs.get(i), rpos, visit, bfs._bs); + // Reverse the post-order walk + StringBuilder sb = new StringBuilder(); + boolean gap=false; + for( int i=rpos.size()-1; i>=0; i-- ) { + Node n = rpos.get(i); + if( n.isCFG() || n.isMultiHead() ) { + if( !gap ) sb.append("\n"); // Blank before multihead + n._print_line(sb); // Print head + while( --i >= 0 ) { + Node t = rpos.get(i); + if( !t.isMultiTail() ) { i++; break; } + t._print_line(sb); + } + sb.append("\n"); // Blank after multitail + gap = true; + } else { + n._print_line( sb ); + gap = false; + } + } + return sb.toString(); + } + + private static void postOrd(Node n, ArrayList rpos, BitSet visit, BitSet bfs) { + if( !bfs.get(n._nid) ) + return; // Not in the BFS visit + if( visit.get(n._nid) ) return; // Already post-order walked + visit.set(n._nid); + // First walk the CFG, then everything + if( n.isCFG() ) { + for( Node use : n._outputs ) + if( use.isCFG() && use.nOuts()>=1 && !(use._outputs.get(0) instanceof LoopNode) ) + postOrd(use, rpos,visit,bfs); + for( Node use : n._outputs ) + if( use.isCFG() ) + postOrd(use,rpos,visit,bfs); + } + for( Node use : n._outputs ) + postOrd(use, rpos,visit,bfs); + // Post-order + rpos.add(n); + } + + // Breadth-first search, broken out in a class to keep in more independent. + // Maintains a root-set of Nodes at the limit (or past by 1 if MultiHead). + public static class BFS { + // A breadth first search, plus MultiHeads for any MultiTails + public final ArrayList _bfs; + public final BitSet _bs; // Visited members by node id + public final int _depth; // Depth limit + public final int _lim; // From here to _bfs._len can be roots for a reverse search + public BFS( Node base, int d ) { + _depth = d; + _bfs = new ArrayList<>(); + _bs = new BitSet(); + + add(base); // Prime the pump + int idx=0, lim=1; // Limit is where depth counter changes + while( idx < _bfs.size() ) { // Ran out of nodes below depth + Node n = _bfs.get(idx++); + for( Node def : n._inputs ) + if( def!=null && !_bs.get(def._nid) ) + add(def); + if( idx==lim ) { // Depth counter changes at limit + if( --d < 0 ) + break; // Ran out of depth + lim = _bfs.size(); // New depth limit + } + } + // Toss things past the limit except multi-heads + while( idx < _bfs.size() ) { + Node n = _bfs.get(idx); + if( n.isMultiHead() ) idx++; + else del(idx); + } + // Root set is any node with no inputs in the visited set + lim = _bfs.size(); + for( int i=_bfs.size()-1; i>=0; i-- ) + if( !any_visited(_bfs.get(i)) ) + swap( i,--lim); + _lim = lim; + } + void swap( int x, int y ) { + if( x==y ) return; + Node tx = _bfs.get(x); + Node ty = _bfs.get(y); + _bfs.set(x,ty); + _bfs.set(y,tx); + } + void add(Node n) { + _bfs.add(n); + _bs.set(n._nid); + } + void del(int idx) { + Node n = Utils.del(_bfs, idx); + _bs.clear(n._nid); + } + boolean any_visited( Node n ) { + for( Node def : n._inputs ) + if( def!=null && _bs.get(def._nid) ) + return true; + return false; + } + } +} diff --git a/chapter07/src/main/java/com/seaofnodes/simple/Parser.java b/chapter07/src/main/java/com/seaofnodes/simple/Parser.java new file mode 100644 index 00000000..99594c95 --- /dev/null +++ b/chapter07/src/main/java/com/seaofnodes/simple/Parser.java @@ -0,0 +1,572 @@ +package com.seaofnodes.simple; + +import com.seaofnodes.simple.node.*; +import com.seaofnodes.simple.type.*; + +import java.util.*; + +/** + * The Parser converts a Simple source program to the Sea of Nodes intermediate + * representation directly in one pass. There is no intermediate Abstract + * Syntax Tree structure. + *

+ * This is a simple recursive descent parser. All lexical analysis is done here as well. + */ +public class Parser { + /** + * A Global Static, unique to each compilation. This is a public, so we + * can make constants everywhere without having to thread the StartNode + * through the entire parser and optimizer. + *

+ * To make the compiler multithreaded, this field will have to move into a TLS. + */ + public static StartNode START; + + public StopNode STOP; + + // The Lexer. Thin wrapper over a byte[] buffer with a cursor. + private final Lexer _lexer; + + /** + * Current ScopeNode - ScopeNodes change as we parse code, but at any point of time + * there is one current ScopeNode. The reason the current ScopeNode can change is to do with how + * we handle branching. See {@link #parseIf()}. + *

+ * Each ScopeNode contains a stack of lexical scopes, each scope is a symbol table that binds + * variable names to Nodes. The top of this stack represents current scope. + *

+ * We keep a list of all ScopeNodes so that we can show them in graphs. + * @see #parseIf() + * @see #_xScopes + */ + public ScopeNode _scope; + + /** + * List of keywords disallowed as identifiers + */ + private final HashSet KEYWORDS = new HashSet<>(){{ + add("else"); + add("false"); + add("if"); + add("int"); + add("return"); + add("true"); + add("while"); + }}; + + + /** + * We clone ScopeNodes when control flows branch; it is useful to have + * a list of all active ScopeNodes for purposes of visualization of the SoN graph + */ + public final Stack _xScopes = new Stack<>(); + + public Parser(String source, TypeInteger arg) { + Node.reset(); + _lexer = new Lexer(source); + _scope = new ScopeNode(); + START = new StartNode(new Type[]{ Type.CONTROL, arg }); + STOP = new StopNode(); + } + + public Parser(String source) { + this(source, TypeInteger.BOT); + } + + @Override + public String toString() { return _lexer.toString(); } + + String src() { return new String( _lexer._input ); } + + // Debugging utility to find a Node by index + public static Node find(int nid) { return START.find(nid); } + + private Node ctrl() { return _scope.ctrl(); } + + private Node ctrl(Node n) { return _scope.ctrl(n); } + + public StopNode parse() { return parse(false); } + public StopNode parse(boolean show) { + // Enter a new scope for the initial control and arguments + _scope.push(); + _scope.define(ScopeNode.CTRL, new ProjNode(START, 0, ScopeNode.CTRL).peephole()); + _scope.define(ScopeNode.ARG0, new ProjNode(START, 1, ScopeNode.ARG0).peephole()); + parseBlock(); + _scope.pop(); + if (!_lexer.isEOF()) throw error("Syntax error, unexpected " + _lexer.getAnyNextToken()); + STOP.peephole(); + if( show ) showGraph(); + return STOP; + } + + + /** + * Parses a block + * + *

+     *     '{' statements '}'
+     * 
+ * Does not parse the opening or closing '{}' + * @return a {@link Node} or {@code null} + */ + private Node parseBlock() { + // Enter a new scope + _scope.push(); + while (!peek('}') && !_lexer.isEOF()) + parseStatement(); + // Exit scope + _scope.pop(); + return null; + } + + /** + * Parses a statement + * + *
+     *     returnStatement | declStatement | blockStatement | ifStatement | expressionStatement
+     * 
+ * @return a {@link Node} or {@code null} + */ + private Node parseStatement() { + if (matchx("return") ) return parseReturn(); + else if (matchx("int")) return parseDecl(); + else if (match ("{" )) return require(parseBlock(),"}"); + else if (matchx("if" )) return parseIf(); + else if (matchx("while")) return parseWhile(); + else if (matchx("#showGraph")) return require(showGraph(),";"); + else return parseExpressionStatement(); + } + + /** + * Parses a while statement + * + *
+     *     while ( expression ) statement
+     * 
+ * @return a {@link Node}, never {@code null} + */ + private Node parseWhile() { + require("("); + + // Loop region has two control inputs, the first one is the entry + // point, and second one is back edge that is set after loop is parsed + // (see end_loop() call below). Note that the absence of back edge is + // used as an indicator to switch off peepholes of the region and + // associated phis. + + ctrl(new LoopNode(ctrl(),null).peephole()); + ScopeNode head = _xScopes.push(_scope).keep(); // Save the current scope as the loop head + // Make a new Scope for the body. + _scope = _scope.dup(true); + + // Parse predicate + var pred = require(parseExpression(), ")"); + // IfNode takes current control and predicate + IfNode ifNode = (IfNode)new IfNode(ctrl(), pred).keep().peephole(); + // Setup projection nodes + Node ifT = new ProjNode(ifNode, 0, "True" ).peephole(); + ifNode.unkeep(); + Node ifF = new ProjNode(ifNode, 1, "False").peephole(); + // The exit scope, accounting for any side effects in the predicate + var exit = _scope.dup(); + _xScopes.push(exit); + exit.ctrl(ifF); + + // Parse the true side, which corresponds to loop body + ctrl(ifT); // set ctrl token to ifTrue projection + parseStatement(); // Parse loop body + + // The true branch loops back, so whatever is current control gets + // added to head loop as input + head.endLoop(_scope, exit); + head.unkeep().kill(); + _xScopes.pop(); + _xScopes.pop(); // Discard pushed from graph display + + // At exit the false control is the current control, and + // the scope is the exit scope after the exit test. + return (_scope = exit); + } + + /** + * Parses a statement + * + *
+     *     if ( expression ) statement [else statement]
+     * 
+ * @return a {@link Node}, never {@code null} + */ + private Node parseIf() { + require("("); + // Parse predicate + var pred = require(parseExpression(), ")"); + // IfNode takes current control and predicate + IfNode ifNode = (IfNode)new IfNode(ctrl(), pred).keep().peephole(); + // Setup projection nodes + Node ifT = new ProjNode(ifNode, 0, "True" ).peephole(); + ifNode.unkeep(); + Node ifF = new ProjNode(ifNode, 1, "False").peephole(); + // In if true branch, the ifT proj node becomes the ctrl + // But first clone the scope and set it as current + int ndefs = _scope.nIns(); + ScopeNode fScope = _scope.dup(); // Duplicate current scope + _xScopes.push(fScope); // For graph visualization we need all scopes + + // Parse the true side + ctrl(ifT); // set ctrl token to ifTrue projection + parseStatement(); // Parse true-side + ScopeNode tScope = _scope; + + // Parse the false side + _scope = fScope; // Restore scope, then parse else block if any + ctrl(ifF); // Ctrl token is now set to ifFalse projection + if (matchx("else")) { + parseStatement(); + fScope = _scope; + } + + if( tScope.nIns() != ndefs || fScope.nIns() != ndefs ) + throw error("Cannot define a new name on one arm of an if"); + + // Merge results + _scope = tScope; + _xScopes.pop(); // Discard pushed from graph display + + return ctrl(tScope.mergeScopes(fScope)); + } + + + /** + * Parses a return statement; "return" already parsed. + * The $ctrl edge is killed. + * + *
+     *     'return' expr ;
+     * 
+ * @return an expression {@link Node}, never {@code null} + */ + private Node parseReturn() { + var expr = require(parseExpression(), ";"); + Node ret = STOP.addReturn(new ReturnNode(ctrl(), expr).peephole()); + ctrl(new ConstantNode(Type.XCONTROL).peephole()); // Kill control + return ret; + } + + /** + * Dumps out the node graph + * @return {@code null} + */ + private Node showGraph() { + System.out.println(new GraphVisualizer().generateDotOutput(this)); + return null; + } + + /** + * Parses an expression statement + * + *
+     *     name '=' expression ';'
+     * 
+ * @return an expression {@link Node}, never {@code null} + */ + private Node parseExpressionStatement() { + var name = requireId(); + require("="); + var expr = require(parseExpression(), ";"); + if( _scope.update(name, expr)==null ) + throw error("Undefined name '" + name + "'"); + return expr; + } + + /** + * Parses a declStatement + * + *
+     *     'int' name = expression ';'
+     * 
+ * @return an expression {@link Node}, never {@code null} + */ + private Node parseDecl() { + // Type is 'int' for now + var name = requireId(); + require("="); + var expr = require(parseExpression(), ";"); + if( _scope.define(name,expr) == null ) + throw error("Redefining name '" + name + "'"); + return expr; + } + + /** + * Parse an expression of the form: + * + *
+     *     expr : compareExpr
+     * 
+ * @return an expression {@link Node}, never {@code null} + */ + private Node parseExpression() { return parseComparison(); } + + /** + * Parse an expression of the form: + * + *
+     *     expr : additiveExpr op additiveExpr
+     * 
+ * @return an comparator expression {@link Node}, never {@code null} + */ + private Node parseComparison() { + var lhs = parseAddition(); + if (match("==")) return new BoolNode.EQ(lhs, parseComparison()).peephole(); + if (match("!=")) return new NotNode(new BoolNode.EQ(lhs, parseComparison()).peephole()).peephole(); + if (match("<" )) return new BoolNode.LT(lhs, parseComparison()).peephole(); + if (match("<=")) return new BoolNode.LE(lhs, parseComparison()).peephole(); + if (match(">" )) return new BoolNode.LT(parseComparison(), lhs).peephole(); + if (match(">=")) return new BoolNode.LE(parseComparison(), lhs).peephole(); + return lhs; + } + + /** + * Parse an additive expression + * + *
+     *     additiveExpr : multiplicativeExpr (('+' | '-') multiplicativeExpr)*
+     * 
+ * @return an add expression {@link Node}, never {@code null} + */ + private Node parseAddition() { + var lhs = parseMultiplication(); + if (match("+")) return new AddNode(lhs, parseAddition()).peephole(); + if (match("-")) return new SubNode(lhs, parseAddition()).peephole(); + return lhs; + } + + /** + * Parse an multiplicativeExpr expression + * + *
+     *     multiplicativeExpr : unaryExpr (('*' | '/') unaryExpr)*
+     * 
+ * @return a multiply expression {@link Node}, never {@code null} + */ + private Node parseMultiplication() { + var lhs = parseUnary(); + if (match("*")) return new MulNode(lhs, parseMultiplication()).peephole(); + if (match("/")) return new DivNode(lhs, parseMultiplication()).peephole(); + return lhs; + } + + /** + * Parse a unary minus expression. + * + *
+     *     unaryExpr : ('-') unaryExpr | primaryExpr
+     * 
+ * @return a unary expression {@link Node}, never {@code null} + */ + private Node parseUnary() { + if (match("-")) return new MinusNode(parseUnary()).peephole(); + return parsePrimary(); + } + + /** + * Parse a primary expression: + * + *
+     *     primaryExpr : integerLiteral | Identifier | true | false | '(' expression ')'
+     * 
+ * @return a primary {@link Node}, never {@code null} + */ + private Node parsePrimary() { + if( _lexer.isNumber() ) return parseIntegerLiteral(); + if( match("(") ) return require(parseExpression(), ")"); + if( matchx("true" ) ) return new ConstantNode(TypeInteger.constant(1)).peephole(); + if( matchx("false") ) return new ConstantNode(TypeInteger.constant(0)).peephole(); + String name = _lexer.matchId(); + if( name == null) throw errorSyntax("an identifier or expression"); + Node n = _scope.lookup(name); + if( n!=null ) return n; + throw error("Undefined name '" + name + "'"); + } + + /** + * Parse integer literal + * + *
+     *     integerLiteral: [1-9][0-9]* | [0]
+     * 
+ */ + private ConstantNode parseIntegerLiteral() { + return (ConstantNode) new ConstantNode(_lexer.parseNumber()).peephole(); + } + + ////////////////////////////////// + // Utilities for lexical analysis + + // Return true and skip if "syntax" is next in the stream. + private boolean match (String syntax) { return _lexer.match (syntax); } + // Match must be "exact", not be followed by more id letters + private boolean matchx(String syntax) { return _lexer.matchx(syntax); } + // Return true and do NOT skip if 'ch' is next + private boolean peek(char ch) { return _lexer.peek(ch); } + + // Require and return an identifier + private String requireId() { + String id = _lexer.matchId(); + if (id != null && !KEYWORDS.contains(id) ) return id; + throw error("Expected an identifier, found '"+id+"'"); + } + + // Require an exact match + private void require(String syntax) { require(null, syntax); } + private Node require(Node n, String syntax) { + if (match(syntax)) return n; + throw errorSyntax(syntax); + } + + RuntimeException errorSyntax(String syntax) { + return error("Syntax error, expected " + syntax + ": " + _lexer.getAnyNextToken()); + } + + static RuntimeException error(String errorMessage) { + return new RuntimeException(errorMessage); + } + + //////////////////////////////////// + // Lexer components + + private static class Lexer { + + // Input buffer; an array of text bytes read from a file or a string + private final byte[] _input; + // Tracks current position in input buffer + private int _position = 0; + + /** + * Record the source text for lexing + */ + public Lexer(String source) { + this(source.getBytes()); + } + + /** + * Direct from disk file source + */ + public Lexer(byte[] buf) { + _input = buf; + } + + // Very handy in the debugger, shows the unparsed program + @Override + public String toString() { + return new String(_input, _position, _input.length - _position); + } + + // True if at EOF + private boolean isEOF() { + return _position >= _input.length; + } + + // Peek next character, or report EOF + private char peek() { + return isEOF() ? Character.MAX_VALUE // Special value that causes parsing to terminate + : (char) _input[_position]; + } + + private char nextChar() { + char ch = peek(); + _position++; + return ch; + } + + // True if a white space + private boolean isWhiteSpace() { + return peek() <= ' '; // Includes all the use space, tab, newline, CR + } + + /** + * Return the next non-white-space character + */ + private void skipWhiteSpace() { + while (isWhiteSpace()) _position++; + } + + + // Return true, if we find "syntax" after skipping white space; also + // then advance the cursor past syntax. + // Return false otherwise, and do not advance the cursor. + boolean match(String syntax) { + skipWhiteSpace(); + int len = syntax.length(); + if (_position + len > _input.length) return false; + for (int i = 0; i < len; i++) + if ((char) _input[_position + i] != syntax.charAt(i)) + return false; + _position += len; + return true; + } + + boolean matchx(String syntax) { + if( !match(syntax) ) return false; + if( !isIdLetter(peek()) ) return true; + _position -= syntax.length(); + return false; + } + + private boolean peek(char ch) { + skipWhiteSpace(); + return peek()==ch; + } + + // Return an identifier or null + String matchId() { + skipWhiteSpace(); + return isIdStart(peek()) ? parseId() : null; + } + + // Used for errors + String getAnyNextToken() { + if (isEOF()) return ""; + if (isIdStart(peek())) return parseId(); + if (isPunctuation(peek())) return parsePunctuation(); + return String.valueOf(peek()); + } + + boolean isNumber() {return isNumber(peek());} + boolean isNumber(char ch) {return Character.isDigit(ch);} + + private Type parseNumber() { + int start = _position; + while (isNumber(nextChar())) ; + String snum = new String(_input, start, --_position - start); + if (snum.length() > 1 && snum.charAt(0) == '0') + throw error("Syntax error: integer values cannot start with '0'"); + return TypeInteger.constant(Long.parseLong(snum)); + } + + // First letter of an identifier + private boolean isIdStart(char ch) { + return Character.isAlphabetic(ch) || ch == '_'; + } + + // All characters of an identifier, e.g. "_x123" + private boolean isIdLetter(char ch) { + return Character.isLetterOrDigit(ch) || ch == '_'; + } + + private String parseId() { + int start = _position; + while (isIdLetter(nextChar())) ; + return new String(_input, start, --_position - start); + } + + // + private boolean isPunctuation(char ch) { + return "=;[]<>()+-/*".indexOf(ch) != -1; + } + + private String parsePunctuation() { + int start = _position; + return new String(_input, start, 1); + } + } +} diff --git a/chapter07/src/main/java/com/seaofnodes/simple/Utils.java b/chapter07/src/main/java/com/seaofnodes/simple/Utils.java new file mode 100644 index 00000000..4d663c25 --- /dev/null +++ b/chapter07/src/main/java/com/seaofnodes/simple/Utils.java @@ -0,0 +1,39 @@ +package com.seaofnodes.simple; + +import java.util.ArrayList; + +public class Utils { + public static RuntimeException TODO() { return TODO("Not yet implemented"); } + public static RuntimeException TODO(String msg) { return new RuntimeException(msg); } + + /** + * Fast, constant-time, element removal. Does not preserve order + * + * @param array ArrayList to modify + * @param i element to be removed + * @return element removed + */ + public static E del(ArrayList array, int i) { + if ( i >= 0 && i < array.size() ) { + E tmp = array.get(i); + E last = array.removeLast(); + if (i < array.size()) array.set(i, last); + return tmp; + } + return null; + } + + /** + * Search a list for an element by reference + * + * @param ary List to search in + * @param x Object to be searched + * @return >= 0 on success, -1 on failure + */ + public static int find( ArrayList ary, E x ) { + for( int i=0; i 2 ) { + // Only valid to rotate constants if both are same associative ops + if( lhs.getClass() != op.getClass() ) return null; + lphi = pcon(lhs.in(2)); // Will rotate with the Phi push + } + if( lphi==null ) return null; + + // RHS is a constant or a Phi of constants + if( !(rhs instanceof ConstantNode con) && pcon(rhs)==null ) + return null; + + // If both are Phis, must be same Region + if( rhs instanceof PhiNode && lphi.in(0) != rhs.in(0) ) + return null; + + // Note that this is the exact reverse of Phi pulling a common op down + // to reduce total op-count. We don't get in an endless push-up + // push-down peephole cycle because the constants all fold first. + Node[] ns = new Node[lphi.nIns()]; + ns[0] = lphi.in(0); + // Push constant up through the phi: x + (phi con0+con0 con1+con1...) + for( int i=1; i hi._nid; + } + + @Override Node copy(Node lhs, Node rhs) { return new AddNode(lhs,rhs); } +} diff --git a/chapter07/src/main/java/com/seaofnodes/simple/node/BoolNode.java b/chapter07/src/main/java/com/seaofnodes/simple/node/BoolNode.java new file mode 100644 index 00000000..d65fbf14 --- /dev/null +++ b/chapter07/src/main/java/com/seaofnodes/simple/node/BoolNode.java @@ -0,0 +1,60 @@ +package com.seaofnodes.simple.node; + +import com.seaofnodes.simple.type.Type; +import com.seaofnodes.simple.type.TypeInteger; + +import java.util.BitSet; + +abstract public class BoolNode extends Node { + + public BoolNode(Node lhs, Node rhs) { + super(null, lhs, rhs); + } + + abstract String op(); // String opcode name + + @Override + public String label() { return getClass().getSimpleName(); } + + @Override + public String glabel() { return op(); } + + @Override + StringBuilder _print1(StringBuilder sb, BitSet visited) { + in(1)._print0(sb.append("("), visited); + in(2)._print0(sb.append(op()), visited); + return sb.append(")"); + } + + @Override + public Type compute() { + if( in(1)._type instanceof TypeInteger i0 && + in(2)._type instanceof TypeInteger i1 ) { + if (i0.isConstant() && i1.isConstant()) + return TypeInteger.constant(doOp(i0.value(), i1.value()) ? 1 : 0); + return i0.meet(i1); + } + return Type.BOTTOM; + } + + abstract boolean doOp(long lhs, long rhs); + + @Override + public Node idealize() { + // Compare of same + if( in(1)==in(2) ) + return new ConstantNode(TypeInteger.constant(doOp(3,3)?1:0)); + + // Do we have ((x * (phi cons)) * con) ? + // Do we have ((x * (phi cons)) * (phi cons)) ? + // Push constant up through the phi: x * (phi con0*con0 con1*con1...) + Node phicon = AddNode.phiCon(this,false); + if( phicon!=null ) return phicon; + + return null; + } + + public static class EQ extends BoolNode { public EQ(Node lhs, Node rhs) { super(lhs,rhs); } String op() { return "=="; } boolean doOp(long lhs, long rhs) { return lhs == rhs; } Node copy(Node lhs, Node rhs) { return new EQ(lhs,rhs); } } + public static class LT extends BoolNode { public LT(Node lhs, Node rhs) { super(lhs,rhs); } String op() { return "<" ; } boolean doOp(long lhs, long rhs) { return lhs < rhs; } Node copy(Node lhs, Node rhs) { return new LT(lhs,rhs); } } + public static class LE extends BoolNode { public LE(Node lhs, Node rhs) { super(lhs,rhs); } String op() { return "<="; } boolean doOp(long lhs, long rhs) { return lhs <= rhs; } Node copy(Node lhs, Node rhs) { return new LE(lhs,rhs); } } +} diff --git a/chapter07/src/main/java/com/seaofnodes/simple/node/ConstantNode.java b/chapter07/src/main/java/com/seaofnodes/simple/node/ConstantNode.java new file mode 100644 index 00000000..0e13d2f2 --- /dev/null +++ b/chapter07/src/main/java/com/seaofnodes/simple/node/ConstantNode.java @@ -0,0 +1,45 @@ +package com.seaofnodes.simple.node; + +import com.seaofnodes.simple.Parser; +import com.seaofnodes.simple.type.Type; + +import java.util.BitSet; + +/** + * A Constant node represents a constant value. At present, the only constants + * that we allow are integer literals; therefore Constants contain an integer + * value. As we add other types of constants, we will refactor how we represent + * Constants. + *

+ * Constants have no semantic inputs. However, we set Start as an input to + * Constants to enable a forward graph walk. This edge carries no semantic + * meaning, and it is present solely to allow visitation. + *

+ * The Constant's value is the value stored in it. + */ +public class ConstantNode extends Node { + Type _con; + public ConstantNode( Type type ) { + super(Parser.START); + _con = type; + } + + @Override + public String label() { return ""+_con; } + + @Override + public String uniqueName() { return "Con_" + _nid; } + + @Override + StringBuilder _print1(StringBuilder sb, BitSet visited) { + return _con._print(sb); + } + + @Override public boolean isMultiTail() { return true; } + + @Override + public Type compute() { return _con; } + + @Override + public Node idealize() { return null; } +} diff --git a/chapter07/src/main/java/com/seaofnodes/simple/node/DivNode.java b/chapter07/src/main/java/com/seaofnodes/simple/node/DivNode.java new file mode 100644 index 00000000..b0e12547 --- /dev/null +++ b/chapter07/src/main/java/com/seaofnodes/simple/node/DivNode.java @@ -0,0 +1,39 @@ +package com.seaofnodes.simple.node; + +import com.seaofnodes.simple.type.Type; +import com.seaofnodes.simple.type.TypeInteger; + +import java.util.BitSet; + +public class DivNode extends Node { + public DivNode(Node lhs, Node rhs) { super(null, lhs, rhs); } + + @Override public String label() { return "Div"; } + + @Override public String glabel() { return "//"; } + + @Override + StringBuilder _print1(StringBuilder sb, BitSet visited) { + in(1)._print0(sb.append("("), visited); + in(2)._print0(sb.append("/"), visited); + return sb.append(")"); + } + + @Override + public Type compute() { + if (in(1)._type instanceof TypeInteger i0 && + in(2)._type instanceof TypeInteger i1) { + if (i0.isConstant() && i1.isConstant()) + return i1.value() == 0 + ? TypeInteger.ZERO + : TypeInteger.constant(i0.value()/i1.value()); + return i0.meet(i1); + } + return Type.BOTTOM; + } + + @Override + public Node idealize() { return null; } + + @Override Node copy(Node lhs, Node rhs) { return new DivNode(lhs,rhs); } +} diff --git a/chapter07/src/main/java/com/seaofnodes/simple/node/IfNode.java b/chapter07/src/main/java/com/seaofnodes/simple/node/IfNode.java new file mode 100644 index 00000000..e573010d --- /dev/null +++ b/chapter07/src/main/java/com/seaofnodes/simple/node/IfNode.java @@ -0,0 +1,58 @@ +package com.seaofnodes.simple.node; + +import com.seaofnodes.simple.type.Type; +import com.seaofnodes.simple.type.TypeInteger; +import com.seaofnodes.simple.type.TypeTuple; + +import java.util.BitSet; + +public class IfNode extends MultiNode { + + public IfNode(Node ctrl, Node pred) { + super(ctrl, pred); + } + + @Override + public String label() { return "If"; } + + @Override + StringBuilder _print1(StringBuilder sb, BitSet visited) { + sb.append("if( "); + return in(1)._print0(sb, visited).append(" )"); + } + + @Override public boolean isCFG() { return true; } + @Override public boolean isMultiHead() { return true; } + + public Node ctrl() { return in(0); } + public Node pred() { return in(1); } + + @Override + public Type compute() { + // If the If node is not reachable then neither is any following Proj + if (ctrl()._type != Type.CONTROL && ctrl()._type != Type.BOTTOM ) + return TypeTuple.IF_NEITHER; + // If constant is 0 then false branch is reachable + // Else true branch is reachable + if (pred()._type instanceof TypeInteger ti && ti.isConstant()) { + if (ti.value() == 0) return TypeTuple.IF_FALSE; + else return TypeTuple.IF_TRUE; + } + + // Hunt up the immediate dominator tree. If we find an identical if + // test on either the true or false branch, then this test matches. + for( Node dom = idom(), prior=this; dom!=null; prior=dom, dom = dom.idom() ) + if( dom instanceof IfNode iff && iff.pred()==pred() ) + return prior instanceof ProjNode proj + // Repeated test, dominated on one side. Test result is the same. + ? (proj._idx==0 ? TypeTuple.IF_TRUE : TypeTuple.IF_FALSE) + : null; // Repeated test not dominated on one side + + return TypeTuple.IF_BOTH; + } + + @Override + public Node idealize() { + return null; + } +} diff --git a/chapter07/src/main/java/com/seaofnodes/simple/node/LoopNode.java b/chapter07/src/main/java/com/seaofnodes/simple/node/LoopNode.java new file mode 100644 index 00000000..eaf19199 --- /dev/null +++ b/chapter07/src/main/java/com/seaofnodes/simple/node/LoopNode.java @@ -0,0 +1,35 @@ +package com.seaofnodes.simple.node; + +import com.seaofnodes.simple.type.Type; + +import java.util.BitSet; + +public class LoopNode extends RegionNode { + + public LoopNode( Node entry, Node back) { super(null,entry,back); } + + Node entry() { return in(1); } + Node back () { return in(2); } + + @Override + public String label() { return "Loop"; } + + @Override + StringBuilder _print1(StringBuilder sb, BitSet visited) { + return sb.append("Loop").append(_nid); + } + + @Override + public Type compute() { + return inProgress() ? Type.CONTROL : super.compute(); + } + + @Override + public Node idealize() { + return inProgress() ? null : super.idealize(); + } + + @Override Node idom() { return entry(); } + + @Override public boolean inProgress() { return back()==null; } +} diff --git a/chapter07/src/main/java/com/seaofnodes/simple/node/MinusNode.java b/chapter07/src/main/java/com/seaofnodes/simple/node/MinusNode.java new file mode 100644 index 00000000..8a96693d --- /dev/null +++ b/chapter07/src/main/java/com/seaofnodes/simple/node/MinusNode.java @@ -0,0 +1,30 @@ +package com.seaofnodes.simple.node; + +import com.seaofnodes.simple.type.Type; +import com.seaofnodes.simple.type.TypeInteger; + +import java.util.BitSet; + +public class MinusNode extends Node { + public MinusNode(Node in) { super(null, in); } + + @Override public String label() { return "Minus"; } + + @Override public String glabel() { return "-"; } + + @Override + StringBuilder _print1(StringBuilder sb, BitSet visited) { + in(1)._print0(sb.append("(-"), visited); + return sb.append(")"); + } + + @Override + public Type compute() { + if (in(1)._type instanceof TypeInteger i0) + return i0.isConstant() ? TypeInteger.constant(-i0.value()) : i0; + return Type.BOTTOM; + } + + @Override + public Node idealize() { return null; } +} diff --git a/chapter07/src/main/java/com/seaofnodes/simple/node/MulNode.java b/chapter07/src/main/java/com/seaofnodes/simple/node/MulNode.java new file mode 100644 index 00000000..70e6c0db --- /dev/null +++ b/chapter07/src/main/java/com/seaofnodes/simple/node/MulNode.java @@ -0,0 +1,58 @@ +package com.seaofnodes.simple.node; + +import com.seaofnodes.simple.type.Type; +import com.seaofnodes.simple.type.TypeInteger; + +import java.util.BitSet; + +public class MulNode extends Node { + public MulNode(Node lhs, Node rhs) { super(null, lhs, rhs); } + + @Override public String label() { return "Mul"; } + + @Override public String glabel() { return "*"; } + + @Override + StringBuilder _print1(StringBuilder sb, BitSet visited) { + in(1)._print0(sb.append("("), visited); + in(2)._print0(sb.append("*"), visited); + return sb.append(")"); + } + + @Override + public Type compute() { + if (in(1)._type instanceof TypeInteger i0 && + in(2)._type instanceof TypeInteger i1) { + if (i0.isConstant() && i1.isConstant()) + return TypeInteger.constant(i0.value()*i1.value()); + return i0.meet(i1); + } + return Type.BOTTOM; + } + + @Override + public Node idealize() { + Node lhs = in(1); + Node rhs = in(2); + Type t1 = lhs._type; + Type t2 = rhs._type; + + // Mul of 1. We do not check for (1*x) because this will already + // canonicalize to (x*1) + if ( t2.isConstant() && t2 instanceof TypeInteger i && i.value()==1 ) + return lhs; + + // Move constants to RHS: con*arg becomes arg*con + if ( t1.isConstant() && !t2.isConstant() ) + return swap12(); + + // Do we have ((x * (phi cons)) * con) ? + // Do we have ((x * (phi cons)) * (phi cons)) ? + // Push constant up through the phi: x * (phi con0*con0 con1*con1...) + Node phicon = AddNode.phiCon(this,true); + if( phicon!=null ) return phicon; + + return null; + } + @Override Node copy(Node lhs, Node rhs) { return new MulNode(lhs,rhs); } +} diff --git a/chapter07/src/main/java/com/seaofnodes/simple/node/MultiNode.java b/chapter07/src/main/java/com/seaofnodes/simple/node/MultiNode.java new file mode 100644 index 00000000..81c90f68 --- /dev/null +++ b/chapter07/src/main/java/com/seaofnodes/simple/node/MultiNode.java @@ -0,0 +1,7 @@ +package com.seaofnodes.simple.node; + +public abstract class MultiNode extends Node { + + public MultiNode(Node... inputs) { super(inputs); } + +} diff --git a/chapter07/src/main/java/com/seaofnodes/simple/node/Node.java b/chapter07/src/main/java/com/seaofnodes/simple/node/Node.java new file mode 100644 index 00000000..fcc8df85 --- /dev/null +++ b/chapter07/src/main/java/com/seaofnodes/simple/node/Node.java @@ -0,0 +1,467 @@ +package com.seaofnodes.simple.node; + +import com.seaofnodes.simple.Utils; +import com.seaofnodes.simple.type.Type; + +import java.util.*; + +/** + * All Nodes in the Sea of Nodes IR inherit from the Node class. + * The Node class provides common functionality used by all subtypes. + * Subtypes of Node specialize by overriding methods. + */ +public abstract class Node { + + /** + * Each node has a unique dense Node ID within a compilation context + * The ID is useful for debugging, for using as an offset in a bitvector, + * as well as for computing equality of nodes (to be implemented later). + */ + public final int _nid; + + /** + * Inputs to the node. These are use-def references to Nodes. + *

+ * Generally fixed length, ordered, nulls allowed, no unused trailing space. + * Ordering is required because e.g. "a/b" is different from "b/a". + * The first input (offset 0) is often a {@link #isCFG} node. + */ + public final ArrayList _inputs; + + /** + * Outputs reference Nodes that are not null and have this Node as an + * input. These nodes are users of this node, thus these are def-use + * references to Nodes. + *

+ * Outputs directly match inputs, making a directed graph that can be + * walked in either direction. These outputs are typically used for + * efficient optimizations but otherwise have no semantics meaning. + */ + public final ArrayList _outputs; + + + /** + * Current computed type for this Node. This value changes as the graph + * changes and more knowledge is gained about the program. + */ + public Type _type; + + + /** + * Immediate dominator tree depth, used to approximate a real IDOM during + * parsing where we do not have the whole program, and also peepholes + * change the CFG incrementally. + *

+ * See {@link ...} + */ + int _idepth; + + /** + * A private Global Static mutable counter, for unique node id generation. + * To make the compiler multithreaded, this field will have to move into a TLS. + * Starting with value 1, to avoid bugs confusing node ID 0 with uninitialized values. + * */ + private static int UNIQUE_ID = 1; + + protected Node(Node... inputs) { + _nid = UNIQUE_ID++; // allocate unique dense ID + _inputs = new ArrayList<>(); + Collections.addAll(_inputs,inputs); + _outputs = new ArrayList<>(); + for( Node n : _inputs ) + if( n != null ) + n.addUse( this ); + } + + // Easy reading label for debugger, e.g. "Add" or "Region" or "EQ" + public abstract String label(); + + // Unique label for graph visualization, e.g. "Add12" or "Region30" or "EQ99" + public String uniqueName() { return label() + _nid; } + + // Graphical label, e.g. "+" or "Region" or "==" + public String glabel() { return label(); } + + + // ------------------------------------------------------------------------ + + // Debugger Printing. + + // {@code toString} is what you get in the debugger. It has to print 1 + // line (because this is what a debugger typically displays by default) and + // has to be robust with broken graph/nodes. + @Override + public final String toString() { return print(); } + + // This is a *deep* print. This version will fail on cycles, which we will + // correct later when we can parse programs with loops. We print with a + // tik-tok style; the common _print0 calls the per-Node _print1, which + // calls back to _print0; + public final String print() { + return _print0(new StringBuilder(), new BitSet()).toString(); + } + // This is the common print: check for repeats, check for DEAD and print + // "DEAD" else call the per-Node print1. + final StringBuilder _print0(StringBuilder sb, BitSet visited) { + if (visited.get(_nid)) + return sb.append(label()); + visited.set(_nid); + return isDead() + ? sb.append(uniqueName()).append(":DEAD") + : _print1(sb, visited); + } + // Every Node implements this; a partial-line recursive print + abstract StringBuilder _print1(StringBuilder sb, BitSet visited); + + + // Print a node on 1 line, columnar aligned, as: + // NNID NNAME DDEF DDEF [[ UUSE UUSE ]] TYPE + // 1234 sssss 1234 1234 1234 1234 1234 1234 tttttt + public void _print_line( StringBuilder sb ) { + sb.append("%4d %-7.7s ".formatted(_nid,label())); + if( _inputs==null ) { + sb.append("DEAD\n"); + return; + } + for( Node def : _inputs ) + sb.append(def==null ? "____ " : "%4d ".formatted(def._nid)); + for( int i = _inputs.size(); i<3; i++ ) + sb.append(" "); + sb.append(" [[ "); + for( Node use : _outputs ) + sb.append("%4d ".formatted(use._nid)); + int lim = 5 - Math.max(_inputs.size(),3); + for( int i = _outputs.size(); idef into a Node. Keeps the edges correct, by removing + * the corresponding use->def edge. This may make the original + * def go dead. This function is co-recursive with {@link #kill}. + *

+ + * This method is the normal path for altering a Node, because it does the + * proper default edge maintenance. It also immediately kills + * Nodes that lose their last use; at times care must be taken to avoid + * killing Nodes that are being used without having an output Node. This + * definitely happens in the middle of recursive {@link #peephole} calls. + * + * @param idx which def to set + * @param new_def the new definition + * @return new_def for flow coding + */ + Node set_def(int idx, Node new_def ) { + Node old_def = in(idx); + if( old_def == new_def ) return this; // No change + // If new def is not null, add the corresponding def->use edge + // This needs to happen before removing the old node's def->use edge as + // the new_def might get killed if the old node kills it recursively. + if( new_def != null ) + new_def.addUse(this); + if( old_def != null && // If the old def exists, remove a def->use edge + old_def.delUse(this) ) // If we removed the last use, the old def is now dead + old_def.kill(); // Kill old def + // Set the new_def over the old (killed) edge + _inputs.set(idx,new_def); + // Return self for easy flow-coding + return new_def; + } + + // Remove the numbered input, compressing the inputs in-place. This + // shuffles the order deterministically - which is suitable for Region and + // Phi, but not for every Node. + void delDef(int idx) { + Node old_def = in(idx); + if( old_def != null && // If the old def exists, remove a def->use edge + old_def.delUse(this) ) // If we removed the last use, the old def is now dead + old_def.kill(); // Kill old def + Utils.del(_inputs, idx); + } + + /** + * Add a new def to an existing Node. Keep the edges correct by + * adding the corresponding def->use edge. + * + * @param new_def the new definition, appended to the end of existing definitions + * @return new_def for flow coding + */ + public Node add_def(Node new_def) { + // Add use->def edge + _inputs.add(new_def); + // If new def is not null, add the corresponding def->use edge + if( new_def != null ) + new_def.addUse(this); + return new_def; + } + + // Breaks the edge invariants, used temporarily + protected N addUse(Node n) { _outputs.add(n); return (N)this; } + + // Remove node 'use' from 'def's (i.e. our) output list, by compressing the list in-place. + // Return true if the output list is empty afterward. + // Error is 'use' does not exist; ok for 'use' to be null. + protected boolean delUse( Node use ) { + Utils.del(_outputs, Utils.find(_outputs, use)); + return _outputs.size() == 0; + } + + // Shortcut for "popping" n nodes. A "pop" is basically a + // set_def(last,null) followed by lowering the nIns() count. + void pop_n(int n) { + for( int i=0; iuses, by setting all of its defs + * to null. This may recursively kill more Nodes and is basically dead + * code elimination. This function is co-recursive with {@link #pop_n}. + */ + public void kill( ) { + assert isUnused(); // Has no uses, so it is dead + pop_n(nIns()); // Set all inputs to null, recursively killing unused Nodes + _type=null; // Flag as dead + assert isDead(); // Really dead now + } + + // Mostly used for asserts and printing. + boolean isDead() { return isUnused() && nIns()==0 && _type==null; } + + // Shortcuts to stop DCE mid-parse + // Add bogus null use to keep node alive + public N keep() { return addUse(null); } + // Remove bogus null. + public N unkeep() { delUse(null); return (N)this; } + + + // Replace self with nnn in the graph, making 'this' go dead + void subsume( Node nnn ) { + assert nnn!=this; + while( nOuts() > 0 ) { + Node n = _outputs.removeLast(); + int idx = Utils.find(n._inputs, this); + n._inputs.set(idx,nnn); + nnn.addUse(n); + } + kill(); + } + + // ------------------------------------------------------------------------ + // Graph-based optimizations + + /** + * We allow disabling peephole opt so that we can observe the + * full graph, vs the optimized graph. + */ + public static boolean _disablePeephole = false; + + /** + * Try to peephole at this node and return a better replacement Node if + * possible. We compute a {@link Type} and then check and replace: + *

    + *
  • if the Type {@link Type#isConstant}, we replace with a {@link ConstantNode}
  • + *
  • in a future chapter we will look for a + * Common Subexpression + * to eliminate.
  • + *
  • we ask the Node for a better replacement. The "better replacement" + * is things like {@code (1+2)} becomes {@code 3} and {@code (1+(x+2))} becomes + * {@code (x+(1+2))}. By canonicalizing expressions we fold common addressing + * math constants, remove algebraic identities and generally simplify the + * code.
  • + *
+ */ + public final Node peephole( ) { + // Compute initial or improved Type + Type type = _type = compute(); + + if (_disablePeephole) + return this; // Peephole optimizations turned off + + // Replace constant computations from non-constants with a constant node + if (!(this instanceof ConstantNode) && type.isConstant()) + return deadCodeElim(new ConstantNode(type).peephole()); + + // Future chapter: Global Value Numbering goes here + + // Ask each node for a better replacement + Node n = idealize(); + if( n != null ) // Something changed + // Recursively optimize + return deadCodeElim(n.peephole()); + + return this; // No progress + } + + // m is the new Node, self is the old. + // Return 'm', which may have zero uses but is alive nonetheless. + // If self has zero uses (and is not 'm'), {@link #kill} self. + private Node deadCodeElim(Node m) { + // If self is going dead and not being returned here (Nodes returned + // from peephole commonly have no uses (yet)), then kill self. + if( m != this && isUnused() ) { + // Killing self - and since self recursively kills self's inputs we + // might end up killing 'm', which we are returning as a live Node. + // So we add a bogus extra null output edge to stop kill(). + m.keep(); // Keep m alive + kill(); // Kill self because replacing with 'm' + m.unkeep(); // Okay to peephole m + } + return m; + } + + /** + * This function needs to be + * Monotonic + * as it is part of a Monotone Analysis Framework. + * See for example this set of slides. + *

+ * For Chapter 2, all our Types are really integer constants, and so all + * the needed properties are trivially true, and we can ignore the high + * theory. Much later on, this will become important and allow us to do + * many fancy complex optimizations trivially... because theory. + *

+ * compute() needs to be stand-alone, and cannot recursively call compute + * on its inputs programs are cyclic (have loops!) and this will just + * infinitely recurse until stack overflow. Instead, compute typically + * computes a new type from the {@link #_type} field of its inputs. + */ + public abstract Type compute(); + + /** + * This function rewrites the current Node into a more "idealized" form. + * This is the bulk of our peephole rewrite rules, and we use this to + * e.g. turn arbitrary collections of adds and multiplies with mixed + * constants into a normal form that's easy for hardware to implement. + * Example: An array addressing expression: + *

   ary[idx+1]
+ * might turn into Sea-of-Nodes IR: + *
   (ary+12)+((idx+1) * 4)
+ * This expression can then be idealized into: + *
   ary + ((idx*4) + (12 + (1*4)))
+ * And more folding: + *
   ary + ((idx<<2) + 16)
+ * And during code-gen: + *
   MOV4 Rary,Ridx,16 // or some such hardware-specific notation 
+ *

+ * {@link #idealize} has a very specific calling convention: + *

    + *
  • If NO change is made, return {@code null} + *
  • If ANY change is made, return not-null; this can be {@code this} + *
  • The returned Node does NOT call {@link #peephole} on itself; the {@link #peephole} call will recursively peephole it. + *
  • Any NEW nodes that are not directly returned DO call {@link #peephole}. + *
+ *

+ * Examples: + * + * + * + * + * + *
before after return comment
{@code (x+5) }{@code (x+5) }{@code null }No change
{@code (5+x) }{@code (x+5) }{@code this }Swapped arguments
{@code ((x+1)+2)}{@code (x+(1+2))}{@code (x+_) }Returns 2 new Nodes
+ * + * The last entry deserves more discussion. The new Node {@code (1+2)} + * created in {@link #idealize} calls {@link #peephole} (which then folds + * into a constant). The other new Node {@code (x+3)} does not call + * peephole, because it is returned and peephole itself will recursively + * call peephole. + *

+ * Since idealize calls peephole and peephole calls idealize, you must be + * careful that all idealizations are monotonic: all transforms remove + * some feature, so that the set of available transforms always shrinks. + * If you don't, you risk an infinite peephole loop! + * + * @return Either a new or changed node, or null for no changes. + */ + public abstract Node idealize(); + + + // ------------------------------------------------------------------------ + // Peephole utilities + + // Swap inputs without letting either input go dead during the swap. + Node swap12() { + Node tmp = in(1); + _inputs.set(1,in(2)); + _inputs.set(2,tmp); + return this; + } + + // does this node contain all constants? + // Ignores in(0), as is usually control. + boolean all_cons() { + for( int i=1; i= 0 ) lhs = lhs.idom(); + if( comp <= 0 ) rhs = rhs.idom(); + } + if( lhs==null ) return null; + _idepth = lhs._idepth+1; + return (_idom=lhs); + } + + public boolean inProgress() { return false; } +} diff --git a/chapter07/src/main/java/com/seaofnodes/simple/node/ReturnNode.java b/chapter07/src/main/java/com/seaofnodes/simple/node/ReturnNode.java new file mode 100644 index 00000000..4dea773a --- /dev/null +++ b/chapter07/src/main/java/com/seaofnodes/simple/node/ReturnNode.java @@ -0,0 +1,46 @@ +package com.seaofnodes.simple.node; + +import com.seaofnodes.simple.type.*; + +import java.util.BitSet; + +/** + * The Return node has two inputs. The first input is a control node and the + * second is the data node that supplies the return value. + *

+ * In this presentation, Return functions as a Stop node, since multiple return statements are not possible. + * The Stop node will be introduced in Chapter 6 when we implement if statements. + *

+ * The Return's output is the value from the data node. + */ +public class ReturnNode extends Node { + + public ReturnNode(Node ctrl, Node data) { + super(ctrl, data); + } + + public Node ctrl() { return in(0); } + public Node expr() { return in(1); } + + @Override + public String label() { return "Return"; } + + @Override + StringBuilder _print1(StringBuilder sb, BitSet visited) { + return expr()._print0(sb.append("return "), visited).append(";"); + } + + @Override public boolean isCFG() { return true; } + + @Override + public Type compute() { + return new TypeTuple(ctrl()._type,expr()._type); + } + + @Override + public Node idealize() { + if( ctrl()._type==Type.XCONTROL ) + return ctrl(); + return null; + } +} diff --git a/chapter07/src/main/java/com/seaofnodes/simple/node/ScopeNode.java b/chapter07/src/main/java/com/seaofnodes/simple/node/ScopeNode.java new file mode 100644 index 00000000..cc6af152 --- /dev/null +++ b/chapter07/src/main/java/com/seaofnodes/simple/node/ScopeNode.java @@ -0,0 +1,169 @@ +package com.seaofnodes.simple.node; + +import com.seaofnodes.simple.type.Type; + +import java.util.*; + +/** + * The Scope node is purely a parser helper - it tracks names to nodes with a + * stack of scopes. + */ +public class ScopeNode extends Node { + + /** + * The control is a name that binds to the currently active control + * node in the graph + */ + public static final String CTRL = "$ctrl"; + public static final String ARG0 = "arg"; + + /** + * Names for every input edge + */ + public final Stack> _scopes; + + + // A new ScopeNode + public ScopeNode() { + _scopes = new Stack<>(); + _type = Type.BOTTOM; + } + + + @Override public String label() { return "Scope"; } + + @Override + StringBuilder _print1(StringBuilder sb, BitSet visited) { + sb.append("Scope[ "); + String[] names = reverse_names(); + for( int j=0; j syms : _scopes ) + for( String name : syms.keySet() ) + names[syms.get(name)] = name; + return names; + } + + @Override public Type compute() { return Type.BOTTOM; } + + @Override public Node idealize() { return null; } + + public void push() { _scopes.push(new HashMap<>()); } + public void pop() { pop_n(_scopes.pop().size()); } + + // Create a new name in the current scope + public Node define( String name, Node n ) { + HashMap syms = _scopes.lastElement(); + if( syms.put(name,nIns()) != null ) + return null; // Double define + return add_def(n); + } + + // Lookup a name. It is recursive to support lazy Phis on loops (coming in chapter 8). + public Node lookup(String name) { return update(name,null,_scopes.size()-1); } + // If the name is present in any scope, then redefine else null + public Node update(String name, Node n) { return update(name,n,_scopes.size()-1); } + // Both recursive lookup and update. + private Node update( String name, Node n, int i ) { + if( i<0 ) return null; // Missed in all scopes, not found + var syms = _scopes.get(i); + var idx = syms.get(name); + if( idx == null ) return update(name,n,i-1); // Missed in this scope, recursively look + Node old = in(idx); + // If n is null we are looking up rather than updating + return n==null ? old : set_def(idx,n); + } + + public Node ctrl() { return in(0); } + + /** + * The ctrl of a ScopeNode is always bound to the currently active + * control node in the graph, via a special name '$ctrl' that is not + * a valid identifier in the language grammar and hence cannot be + * referenced in Simple code. + * + * @param n The node to be bound to '$ctrl' + * + * @return Node that was bound + */ + public Node ctrl(Node n) { return set_def(0,n); } + + /** + * Duplicate a ScopeNode; including all levels, up to Nodes. So this is + * neither shallow (would dup the Scope but not the internal HashMap + * tables), nor deep (would dup the Scope, the HashMap tables, but then + * also the program Nodes). + *

+ * The new Scope is a full-fledged Node with proper use<->def edges. + */ + public ScopeNode dup() { return dup(false); } + public ScopeNode dup(boolean loop) { + ScopeNode dup = new ScopeNode(); + // Our goals are: + // 1) duplicate the name bindings of the ScopeNode across all stack levels + // 2) Make the new ScopeNode a user of all the nodes bound + // 3) Ensure that the order of defs is the same to allow easy merging + for( HashMap syms : _scopes ) + dup._scopes.push(new HashMap<>(syms)); + String[] reverse = reverse_names(); + dup.add_def(ctrl()); // Control input is just copied + for( int i=1; isee for example this set of slides. + *

+ * The types form a lattice; @see a symmetric complete bounded (ranked) lattice. + *

+ * This wild lattice theory will be needed later to allow us to easily beef up + * the analysis and optimization of the Simple compiler... but we don't need it + * now, just know that it is coming along in a later Chapter. + *

g + * One of the fun things here is that while the theory is deep and subtle, the + * actual implementation is darn near trivial and is generally really obvious + * what we're doing with it. Right now, it's just simple integer math to do + * simple constant folding e.g. 1+2 == 3 stuff. + */ + +public class Type { + + // ---------------------------------------------------------- + // Simple types are implemented fully here. "Simple" means: the code and + // type hierarchy are simple, not that the Type is conceptually simple. + static final byte TBOT = 0; // Bottom (ALL) + static final byte TTOP = 1; // Top (ANY) + static final byte TCTRL = 2; // Ctrl flow bottom + static final byte TXCTRL = 3; // Ctrl flow top (mini-lattice: any-xctrl-ctrl-all) + static final byte TSIMPLE = 4; // End of the Simple Types + static final byte TINT = 5; // All Integers; see TypeInteger + static final byte TTUPLE = 6; // Tuples; finite collections of unrelated Types, kept in parallel + + public final byte _type; + + public boolean is_simple() { return _type < TSIMPLE; } + private static final String[] STRS = new String[]{"Bot","Top","Ctrl","~Ctrl"}; + protected Type(byte type) { _type = type; } + + public static final Type BOTTOM = new Type( TBOT ); // ALL + public static final Type TOP = new Type( TTOP ); // ANY + public static final Type CONTROL = new Type( TCTRL ); // Ctrl + public static final Type XCONTROL = new Type( TXCTRL ); // ~Ctrl + + public boolean isConstant() { return _type == TTOP || _type == TXCTRL; } + + public StringBuilder _print(StringBuilder sb) {return is_simple() ? sb.append(STRS[_type]) : sb;} + + public final Type meet(Type t) { + // Shortcut for the self case + if( t == this ) return this; + // Same-type is always safe in the subclasses + if( _type==t._type ) return xmeet(t); + // Reverse; xmeet 2nd arg is never "is_simple" and never equal to "this". + if( is_simple() ) return this.xmeet(t ); + if( t.is_simple() ) return t .xmeet(this); + return BOTTOM; // Mixing 2 unrelated types + } + + // Compute meet right now. Overridden in subclasses. + // Handle cases where 'this.is_simple()' and unequal to 't'. + // Subclassed xmeet calls can assert that '!t.is_simple()'. + protected Type xmeet(Type t) { + assert is_simple(); // Should be overridden in subclass + // ANY meet anything is thing; thing meet ALL is ALL + if( _type==TBOT || t._type==TTOP ) return this; + if( _type==TTOP || t._type==TBOT ) return t; + // 'this' is {TCTRL,TXCTRL} + if( !t.is_simple() ) return BOTTOM; + // 't' is {TCTRL,TXCTRL} + return _type==TCTRL || t._type==TCTRL ? CONTROL : XCONTROL; + } + + @Override + public final String toString() { + return _print(new StringBuilder()).toString(); + } +} diff --git a/chapter07/src/main/java/com/seaofnodes/simple/type/TypeInteger.java b/chapter07/src/main/java/com/seaofnodes/simple/type/TypeInteger.java new file mode 100644 index 00000000..93f5eae3 --- /dev/null +++ b/chapter07/src/main/java/com/seaofnodes/simple/type/TypeInteger.java @@ -0,0 +1,63 @@ +package com.seaofnodes.simple.type; + +/** + * Integer Type + */ +public class TypeInteger extends Type { + + public final static TypeInteger TOP = new TypeInteger(false, 0); + public final static TypeInteger BOT = new TypeInteger(false, 1); + public final static TypeInteger ZERO= new TypeInteger(true, 0); + + private final boolean _is_con; + + /** + * The constant value or + * if not constant then 0=bottom, 1=top. + */ + private final long _con; + + public TypeInteger(boolean is_con, long con) { + super(TINT); + _is_con = is_con; + _con = con; + } + + public static TypeInteger constant(long con) { return new TypeInteger(true, con); } + + public boolean isTop() { return !_is_con && _con==0; } + public boolean isBot() { return !_is_con && _con==1; } + + @Override + public StringBuilder _print(StringBuilder sb) { + if (isTop()) return sb.append("IntTop"); + if (isBot()) return sb.append("IntBot"); + return sb.append(_con); + } + + @Override + public boolean isConstant() { return _is_con; } + + public long value() { return _con; } + + @Override + public Type xmeet(Type other) { + if( this==other ) return this; + if (!(other instanceof TypeInteger i)) return super.meet(other); + // BOT wins + if ( isBot() ) return this; + if ( i.isBot() ) return i ; + // TOP loses + if ( i.isTop() ) return this; + if ( isTop() ) return i ; + assert isConstant() && i.isConstant(); + return _con==i._con ? this : TypeInteger.BOT; + } + + @Override + public boolean equals( Object o ) { + if( o==this ) return true; + if( !(o instanceof TypeInteger i) ) return false; + return _con==i._con && _is_con==i._is_con; + } +} diff --git a/chapter07/src/main/java/com/seaofnodes/simple/type/TypeTuple.java b/chapter07/src/main/java/com/seaofnodes/simple/type/TypeTuple.java new file mode 100644 index 00000000..f8c3b3c8 --- /dev/null +++ b/chapter07/src/main/java/com/seaofnodes/simple/type/TypeTuple.java @@ -0,0 +1,32 @@ +package com.seaofnodes.simple.type; + +public class TypeTuple extends Type { + + public final Type[] _types; + + public TypeTuple(Type... _types) { + super(TTUPLE); + this._types = _types; + } + + @Override + public Type xmeet(Type other) { + throw new UnsupportedOperationException("Meet on Tuple Type not yet implemented"); + } + + @Override + public StringBuilder _print(StringBuilder sb) { + sb.append("["); + for( Type t : _types ) + t._print(sb).append(","); + sb.setLength(sb.length()-1); + sb.append("]"); + return sb; + } + + public static final TypeTuple IF_BOTH = new TypeTuple(new Type[]{Type.CONTROL, Type.CONTROL}); + public static final TypeTuple IF_NEITHER = new TypeTuple(new Type[]{Type.XCONTROL,Type.XCONTROL}); + public static final TypeTuple IF_TRUE = new TypeTuple(new Type[]{Type.CONTROL, Type.XCONTROL}); + public static final TypeTuple IF_FALSE = new TypeTuple(new Type[]{Type.XCONTROL,Type.CONTROL}); + +} diff --git a/chapter07/src/test/java/com/seaofnodes/simple/Chapter07Test.java b/chapter07/src/test/java/com/seaofnodes/simple/Chapter07Test.java new file mode 100644 index 00000000..efcf90f5 --- /dev/null +++ b/chapter07/src/test/java/com/seaofnodes/simple/Chapter07Test.java @@ -0,0 +1,1026 @@ +package com.seaofnodes.simple; + +import com.seaofnodes.simple.node.*; +import com.seaofnodes.simple.type.TypeInteger; +import org.junit.Test; + +import static org.junit.Assert.*; + +public class Chapter07Test { + + @Test + public void testChapter7Example() { + Parser parser = new Parser( + """ + while(arg < 10) { + arg = arg + 1; + #showGraph; + } + return arg; + """); + Node._disablePeephole = true; + StopNode stop = parser.parse(true); + assertEquals("return Phi(Loop6,arg,(Phi_arg+1));", stop.toString()); + assertTrue(stop.ret().ctrl() instanceof ProjNode); + Node._disablePeephole = false; + } + + @Test + public void testChapter7Regression() { + Parser parser = new Parser( + +""" +int a = 1; +if(arg){}else{ + while(a < 10) { + a = a + 1; + } +} +return a; +"""); + StopNode stop = parser.parse(true); + assertEquals("return Phi(Region23,1,Phi(Loop11,1,(Phi_a+1)));", stop.toString()); + } + + @Test + public void testChapter7WhileNested() { + Parser parser = new Parser( + +""" +int sum = 0; +int i = 0; +while(i < arg) { + i = i + 1; + int j = 0; + while( j < arg ) { + sum = sum + j; + j = j + 1; + } +} +return sum; +"""); + StopNode stop = parser.parse(true); + assertEquals("return Phi(Loop8,0,Phi(Loop21,Phi_sum,(Phi(Loop,0,(Phi_j+1))+Phi_sum)));", stop.toString()); + System.out.println(IRPrinter.prettyPrint(stop,99)); + } + + @Test + public void testChapter7WhileScope() { + Parser parser = new Parser( +""" +int a = 1; +int b = 2; +while(a < 10) { + if (a == 2) a = 3; + else b = 4; +} +return b; +"""); + Node._disablePeephole = true; + StopNode stop = parser.parse(true); + assertEquals("return Phi(Loop8,2,Phi(Region27,Phi_b,4));", stop.toString()); + assertTrue(stop.ret().ctrl() instanceof ProjNode); + Node._disablePeephole = false; + System.out.println(IRPrinter.prettyPrint(stop,99)); + } + + @Test + public void testChapter7WhileNestedIfAndInc() { + Parser parser = new Parser( +""" +int a = 1; +int b = 2; +while(a < 10) { + if (a == 2) a = 3; + else b = 4; + b = b + 1; + a = a + 1; +} +return b; +"""); + StopNode stop = parser.parse(true); + assertEquals("return Phi(Loop8,2,(Phi(Region27,Phi_b,4)+1));", stop.toString()); + assertTrue(stop.ret().ctrl() instanceof ProjNode); + System.out.println(IRPrinter.prettyPrint(stop,99)); + } + + + @Test + public void testChapter7While() { + Parser parser = new Parser( +""" +int a = 1; +while(a < 10) { + a = a + 1; + a = a + 2; +} +return a; +"""); + Node._disablePeephole = true; + StopNode stop = parser.parse(true); + assertEquals("return Phi(Loop7,1,((Phi_a+1)+2));", stop.toString()); + assertTrue(stop.ret().ctrl() instanceof ProjNode); + Node._disablePeephole = false; + } + + @Test + public void testChapter7WhilePeep() { + Parser parser = new Parser( +""" +int a = 1; +while(a < 10) { + a = a + 1; + a = a + 2; +} +return a; +"""); + StopNode stop = parser.parse(true); + assertEquals("return Phi(Loop7,1,(Phi_a+3));", stop.toString()); + assertTrue(stop.ret().ctrl() instanceof ProjNode); + } + + @Test + public void testChapter7While2() { + Parser parser = new Parser( +""" +int a = 1; +while(arg) a = 2; +return a; +"""); + Node._disablePeephole = true; + StopNode stop = parser.parse(true); + assertEquals("return Phi(Loop7,1,2);", stop.toString()); + assertTrue(stop.ret().ctrl() instanceof ProjNode); + Node._disablePeephole = false; + System.out.println(IRPrinter.prettyPrint(stop,99)); + } + + @Test + public void testChapter7While2Peep() { + Parser parser = new Parser( +""" +int a = 1; +while(arg) a = 2; +return a; +"""); + StopNode stop = parser.parse(false); + assertEquals("return Phi(Loop7,1,2);", stop.toString()); + assertTrue(stop.ret().ctrl() instanceof ProjNode); + System.out.println(IRPrinter.prettyPrint(stop, 99)); + } + + @Test + public void testChapter7While3() { + Parser parser = new Parser( +""" +int a = 1; +while(a < 10) { + int b = a + 1; + a = b + 2; +} +return a; +"""); + Node._disablePeephole = true; + StopNode stop = parser.parse(true); + assertEquals("return Phi(Loop7,1,((Phi_a+1)+2));", stop.toString()); + assertTrue(stop.ret().ctrl() instanceof ProjNode); + Node._disablePeephole = false; + } + + @Test + public void testChapter7While3Peep() { + Parser parser = new Parser( +""" +int a = 1; +while(a < 10) { + int b = a + 1; + a = b + 2; +} +return a; +"""); + StopNode stop = parser.parse(true); + assertEquals("return Phi(Loop7,1,(Phi_a+3));", stop.toString()); + assertTrue(stop.ret().ctrl() instanceof ProjNode); + } + + @Test + public void testChapter7While4() { + Parser parser = new Parser( +""" +int a = 1; +int b = 2; +while(a < 10) { + int b = a + 1; + a = b + 2; +} +return a; +"""); + Node._disablePeephole = true; + StopNode stop = parser.parse(true); + assertEquals("return Phi(Loop8,1,((Phi_a+1)+2));", stop.toString()); + assertTrue(stop.ret().ctrl() instanceof ProjNode); + Node._disablePeephole = false; + } + + @Test + public void testChapter7While4Peep() { + Parser parser = new Parser( +""" +int a = 1; +int b = 2; +while(a < 10) { + int b = a + 1; + a = b + 2; +} +return a; +"""); + StopNode stop = parser.parse(true); + assertEquals("return Phi(Loop8,1,(Phi_a+3));", stop.toString()); + assertTrue(stop.ret().ctrl() instanceof ProjNode); + } + + @Test + public void testChapter6PeepholeReturn() { + Parser parser = new Parser( +""" +if( true ) return 2; +return 1; +"""); + StopNode stop = parser.parse(true); + assertEquals("return 2;", stop.toString()); + assertTrue(stop.ret().ctrl() instanceof ProjNode); + } + + @Test + public void testChapter6PeepholeRotate() { + Parser parser = new Parser( +""" +int a = 1; +if (arg) + a = 2; +return (arg < a) < 3; +"""); + StopNode stop = parser.parse(false); + assertEquals("return ((arg VARIABLE = value_of_variable +print-% : ; @echo $* = $($*) + +# literal space +space := $() $() + +# Decide OS-specific questions +# jar-file seperator +ifeq ($(OS),Windows_NT) + SEP = ; +else + SEP = : +endif +# Find a reasonable ctags. +CTAGS = $(shell which ctags) +# Hack for MacOS: /usr/bin/ctags is unfriendly, so look for ctags from brew +ifeq ($(UNAME),Darwin) + CTAGS = $(shell brew list ctags 2> /dev/null | grep bin/ctags) +endif + +# Fun Args to javac. Mostly limit to java8 source definitions, and fairly +# aggressive lint warnings. +JAVAC_ARGS = -g + +# Source code +SIMPLE := com/seaofnodes/simple +SRC := src/main/java +TST := src/test/java +CLZDIR:= build/classes +main_javas := $(wildcard $(SRC)/$(SIMPLE)/*java $(SRC)/$(SIMPLE)/*/*java) +test_javas := $(wildcard $(TST)/$(SIMPLE)/*java $(TST)/$(SIMPLE)/*/*java) +main_classes := $(patsubst $(SRC)/%java,$(CLZDIR)/main/%class,$(main_javas)) +test_classes := $(patsubst $(TST)/%java,$(CLZDIR)/test/%class,$(test_javas)) +test_cp := $(patsubst $(TST)/$(SIMPLE)/%.java,com.seaofnodes.simple.%,$(test_javas)) +classes = $(main_classes) $(test_classes) +# All the libraries +libs = $(wildcard lib/*jar) +jars = $(subst $(space),$(SEP),$(libs)) + + +default_targets := $(main_classes) $(test_classes) +# Optionally add ctags to the default target if a reasonable one was found. +ifneq ($(CTAGS),) +default_targets += tags +endif + +default: $(default_targets) + +# Compile just the out-of-date files +$(main_classes): build/classes/main/%class: $(SRC)/%java + @echo "compiling " $@ " because " $? + @[ -d $(CLZDIR)/main ] || mkdir -p $(CLZDIR)/main + @javac $(JAVAC_ARGS) -cp "$(CLZDIR)/main$(SEP)$(jars)" -sourcepath $(SRC) -d $(CLZDIR)/main $(main_javas) + +$(test_classes): $(CLZDIR)/test/%class: $(TST)/%java $(main_classes) + @echo "compiling " $@ " because " $? + @[ -d $(CLZDIR)/test ] || mkdir -p $(CLZDIR)/test + @javac $(JAVAC_ARGS) -cp "$(CLZDIR)/test$(SEP)$(CLZDIR)/main$(SEP)$(jars)" -sourcepath $(TST) -d $(CLZDIR)/test $(test_javas) + +# Base launch line for JVM tests +JVM=nice java -ea -cp "build/classes/main${SEP}${jars}${SEP}$(CLZDIR)/test" + +tests: $(default_targets) + $(JVM) org.junit.runner.JUnitCore $(test_cp) + +.PHONY: clean +clean: + rm -rf build + rm -f TAGS + (find . -name "*~" -exec rm {} \; 2>/dev/null; exit 0) + +# Download libs from maven +lib: lib/junit-4.12.jar lib/hamcrest-core-1.3.jar + +# Jars +lib/junit-4.12.jar: + @[ -d lib ] || mkdir -p lib + @(cd lib; wget https://repo1.maven.org/maven2/junit/junit/4.12/junit-4.12.jar) + +lib/hamcrest-core-1.3.jar: + @[ -d lib ] || mkdir -p lib + @(cd lib; wget https://repo1.maven.org/maven2/org/hamcrest/hamcrest-core/1.3/hamcrest-core-1.3.jar) + +# Build emacs tags (part of a tasty emacs ide experience) +tags: $(main_javas) $(test_javas) + @rm -f TAGS + @$(CTAGS) -e --recurse=yes --extra=+q --fields=+fksaiS $(SRC) $(TST) diff --git a/chapter08/README.md b/chapter08/README.md new file mode 100644 index 00000000..656ea0ef --- /dev/null +++ b/chapter08/README.md @@ -0,0 +1,2 @@ +# Chapter 8 + diff --git a/chapter08/docs/08-grammar.md b/chapter08/docs/08-grammar.md new file mode 100644 index 00000000..cee33030 --- /dev/null +++ b/chapter08/docs/08-grammar.md @@ -0,0 +1,88 @@ +# Grammar for Chapter 8 + +```antlrv4 +grammar SimpleLanguage; + +program + : statement+ EOF + ; + +statement + : returnStatement + | declStatement + | blockStatment + | expressionStatement + | ifStatement + | whileStatement + | metaStatement + ; + +whileStatement + : 'while' '(' expression ')' statement + ; + +ifStatement + : 'if' '(' expression ')' statement ('else' statement)? + ; + +metaStatement + : '#showGraph' ';' + ; + +expressionStatement + : IDENTIFIER '=' expression ';' + ; + +blockStatement + : '{' statement+ '}' + ; + +declStatement + : 'int' IDENTIFIER '=' expression ';' + ; + +returnStatement + : 'return' expression ';' + ; + +expression + : comparisonExpression + ; + +comparisonExpression + : additiveExpression (('==' | '!='| '>'| '<'| '>='| '<=') additiveExpression)* + ; + +additiveExpression + : multiplicativeExpression (('+' | '-') multiplicativeExpression)* + ; + +multiplicativeExpression + : unaryExpression (('*' | '/') unaryExpression)* + ; + +unaryExpression + : ('-') unaryExpression + | primaryExpression + ; + +primaryExpression + : IDENTIFIER + | INTEGER_LITERAL + | 'true' + | 'false' + | '(' expression ')' + ; + +INTEGER_LITERAL + : [1-9][0-9]* + | [0] + ; + +IDENTIFIER + : NON_DIGIT (NON_DIGIT | DIGIT)* + ; + +NON_DIGIT: [a-zA-Z_]; +DEC_DIGIT: [0-9]; +``` \ No newline at end of file diff --git a/chapter08/docs/build-graphs.sh b/chapter08/docs/build-graphs.sh new file mode 100644 index 00000000..73c39624 --- /dev/null +++ b/chapter08/docs/build-graphs.sh @@ -0,0 +1,3 @@ +dot -Tsvg 07-graph1.gv > 07-graph1.svg + + diff --git a/chapter08/pom.xml b/chapter08/pom.xml new file mode 100644 index 00000000..5942485f --- /dev/null +++ b/chapter08/pom.xml @@ -0,0 +1,14 @@ + + + 4.0.0 + + com.seaofnodes + simple + 1.0 + + chapter08 + jar + Chapter 8 + \ No newline at end of file diff --git a/chapter08/src/main/java/com/seaofnodes/simple/GraphVisualizer.java b/chapter08/src/main/java/com/seaofnodes/simple/GraphVisualizer.java new file mode 100644 index 00000000..c823641d --- /dev/null +++ b/chapter08/src/main/java/com/seaofnodes/simple/GraphVisualizer.java @@ -0,0 +1,258 @@ +package com.seaofnodes.simple; + +import com.seaofnodes.simple.node.*; + +import java.util.*; + +/** + * Simple visualizer that outputs GraphViz dot format. + * The dot output must be saved to a file and run manually via dot to generate the SVG output. + * Currently, this is done manually. + */ +public class GraphVisualizer { + + /** + * If set to true we put the control nodes in a separate cluster from + * data nodes. + */ + boolean _separateControlCluster = false; + + public GraphVisualizer(boolean separateControlCluster) { this._separateControlCluster = separateControlCluster; } + public GraphVisualizer() { this(false); } + + public String generateDotOutput(Parser parser) { + + // Since the graph has cycles, we need to create a flat list of all the + // nodes in the graph. + Collection all = findAll(parser); + StringBuilder sb = new StringBuilder(); + sb.append("digraph chapter08 {\n"); + sb.append("/*\n"); + sb.append(parser.src()); + sb.append("\n*/\n"); + + // To keep the Scopes below the graph and pointing up into the graph we + // need to group the Nodes in a subgraph cluster, and the scopes into a + // different subgraph cluster. THEN we can draw edges between the + // scopes and nodes. If we try to cross subgraph cluster borders while + // still making the subgraphs DOT gets confused. + sb.append("\trankdir=BT;\n"); // Force Nodes before Scopes + + // Preserve node input order + sb.append("\tordering=\"in\";\n"); + + // Merge multiple edges hitting the same node. Makes common shared + // nodes much prettier to look at. + sb.append("\tconcentrate=\"true\";\n"); + + // Force nested scopes to order + sb.append("\tcompound=\"true\";\n"); + + // Just the Nodes first, in a cluster no edges + nodes(sb, all); + + // Now the scopes, in a cluster no edges + scope(sb,parser._scope ); + for( ScopeNode scope : parser._xScopes ) + scope( sb, scope ); + + // Walk the Node edges + nodeEdges(sb, all); + + // Walk the active Scope edges + scopeEdges( sb, parser._scope ); + for( ScopeNode scope : parser._xScopes ) + scopeEdges( sb, scope ); + + sb.append("}\n"); + return sb.toString(); + } + + private void nodesByCluster(StringBuilder sb, boolean doCtrl, Collection all) { + if (!_separateControlCluster && doCtrl) // all nodes in 1 cluster + return; + // Just the Nodes first, in a cluster no edges + sb.append(doCtrl ? "\tsubgraph cluster_Controls {\n" : "\tsubgraph cluster_Nodes {\n"); // Magic "cluster_" in the subgraph name + for( Node n : all ) { + if( n instanceof ProjNode || n instanceof ScopeNode ) + continue; // Do not emit, rolled into MultiNode or Scope cluster already + if (_separateControlCluster && doCtrl && !n.isCFG()) continue; + if (_separateControlCluster && !doCtrl && n.isCFG()) continue; + sb.append("\t\t").append(n.uniqueName()).append(" [ "); + String lab = n.glabel(); + if( n instanceof MultiNode ) { + // Make a box with the MultiNode on top, and all the projections on the bottom + sb.append("shape=plaintext label=<\n"); + sb.append("\t\t\t\n"); + sb.append("\t\t\t\n"); + sb.append("\t\t\t"); + boolean doProjTable = false; + for( Node use : n._outputs ) { + if( use instanceof ProjNode proj ) { + if (!doProjTable) { + doProjTable = true; + sb.append(""); + } + sb.append("\n"); + sb.append("\t\t\t
").append(lab).append("
").append("\n"); + sb.append("\t\t\t\t").append("\n"); + sb.append("\t\t\t\t"); + } + sb.append(""); + } + } + if (doProjTable) { + sb.append("").append("\n"); + sb.append("\t\t\t\t
").append(proj.glabel()).append("
").append("\n"); + sb.append("\t\t\t
>\n\t\t"); + + } else { + // control nodes have box shape + // other nodes are ellipses, i.e. default shape + if (n.isCFG()) sb.append("shape=box style=filled fillcolor=yellow "); + else if (n instanceof PhiNode) sb.append("style=filled fillcolor=lightyellow "); + sb.append("label=\"").append(lab).append("\" "); + } + sb.append("];\n"); + } + if (!_separateControlCluster) { + // Force Region & Phis to line up + for( Node n : all ) { + if( n instanceof RegionNode region ) { + sb.append("\t\t{ rank=same; "); + sb.append(region).append(";") ; + for( Node phi : region._outputs ) + if (phi instanceof PhiNode) sb.append(phi.uniqueName()).append(";"); + sb.append("}\n"); + } + } + } + sb.append("\t}\n"); // End Node cluster + } + + private void nodes(StringBuilder sb, Collection all) { + nodesByCluster(sb, true, all); + nodesByCluster(sb, false, all); + } + + // Build a nested scope display, walking the _prev edge + private void scope( StringBuilder sb, ScopeNode scope ) { + sb.append("\tnode [shape=plaintext];\n"); + int level=1; + for( int idx = scope._scopes.size()-1; idx>=0; idx-- ) { + var syms = scope._scopes.get(idx); + String scopeName = makeScopeName(scope, level); + sb.append("\tsubgraph cluster_").append(scopeName).append(" {\n"); // Magic "cluster_" in the subgraph name + sb.append("\t\t").append(scopeName).append(" [label=<\n"); + sb.append("\t\t\t\n"); + // Add the scope level + int scopeLevel = scope._scopes.size()-level; + sb.append("\t\t\t"); + for(String name: syms.keySet()) + sb.append(""); + sb.append("\n"); + sb.append("\t\t\t
").append(scopeLevel).append("").append(name).append("
>];\n"); + level++; + } + // Scope clusters nest, so the graphics shows the nested scopes, so + // they are not closed as they are printed; so they just keep nesting. + // We close them all at once here. + sb.append( "\t}\n".repeat( level-1 ) ); // End all Scope clusters + } + + private String makeScopeName(ScopeNode sn, int level) { return sn.uniqueName() + "_" + level; } + private String makePortName(String scopeName, String varName) { return scopeName + "_" + varName; } + + // Walk the node edges + private void nodeEdges(StringBuilder sb, Collection all) { + // All them edge labels + sb.append("\tedge [ fontname=Helvetica, fontsize=8 ];\n"); + for( Node n : all ) { + // Do not display the Constant->Start edge; + // ProjNodes handled by Multi; + // ScopeNodes are done separately + if( n instanceof ConstantNode || n instanceof ProjNode || n instanceof ScopeNode ) + continue; + for( int i=0; idef edge from Phi to Region. + sb.append('\t').append(n.uniqueName()); + sb.append(" -> "); + sb.append(def.uniqueName()); + sb.append(" [style=dotted taillabel=").append(i).append("];\n"); + } else if( def != null ) { + // Most edges land here use->def + sb.append('\t').append(n.uniqueName()).append(" -> "); + if( def instanceof ProjNode proj ) { + String mname = proj.ctrl().uniqueName(); + sb.append(mname).append(":p").append(proj._idx); + } else sb.append(def.uniqueName()); + // Number edges, so we can see how they track + sb.append("[taillabel=").append(i); + // control edges are colored red + if( def.isCFG() ) + sb.append(" color=red"); + // Backedges do not add a ranking constraint + if( i==2 && (n instanceof PhiNode || n instanceof LoopNode) ) + sb.append(" constraint=false"); + sb.append("];\n"); + } + } + } + } + + // Walk the scope edges + private void scopeEdges( StringBuilder sb, ScopeNode scope ) { + sb.append("\tedge [style=dashed color=cornflowerblue];\n"); + int level=1; + for( int i = scope._scopes.size()-1; i>=0; i-- ) { + var syms = scope._scopes.get(i); + String scopeName = makeScopeName(scope, level); + for( String name : syms.keySet() ) { + int idx = syms.get(name); + Node def = scope.in(idx); + while( def instanceof ScopeNode lazy ) + def = lazy.in(idx); + if( def==null ) continue; + sb.append("\t") + .append(scopeName).append(":") + .append('"').append(makePortName(scopeName, name)).append('"') // wrap port name with quotes because $ctrl is not valid unquoted + .append(" -> "); + if( def instanceof ProjNode proj ) { + String mname = proj.ctrl().uniqueName(); + sb.append(mname).append(":p").append(proj._idx); + } else sb.append(def.uniqueName()); + sb.append(";\n"); + } + level++; + } + } + + /** + * Finds all nodes in the graph. + */ + private Collection findAll(Parser parser) { + final HashMap all = new HashMap<>(); + for( Node n : Parser.START._outputs ) + walk(all, n); + for( Node n : parser._scope._inputs ) + walk(all, n); + return all.values(); + } + + /** + * Walk a subgraph and populate distinct nodes in the all list. + */ + private void walk(HashMap all, Node n) { + if(n == null ) return; + if (all.get(n._nid) != null) return; // Been there, done that + all.put(n._nid, n); + for (Node c : n._inputs) + walk(all, c); + for( Node c : n._outputs ) + walk(all, c); + } +} diff --git a/chapter08/src/main/java/com/seaofnodes/simple/IRPrinter.java b/chapter08/src/main/java/com/seaofnodes/simple/IRPrinter.java new file mode 100644 index 00000000..dc6d9f4f --- /dev/null +++ b/chapter08/src/main/java/com/seaofnodes/simple/IRPrinter.java @@ -0,0 +1,124 @@ +package com.seaofnodes.simple; + +import com.seaofnodes.simple.node.LoopNode; +import com.seaofnodes.simple.node.Node; + +import java.util.ArrayList; +import java.util.BitSet; + +public class IRPrinter { + + // Another bulk pretty-printer. Makes more effort at basic-block grouping. + public static String prettyPrint(Node node, int depth) { + // First, a Breadth First Search at a fixed depth. + BFS bfs = new BFS(node,depth); + // Convert just that set to a post-order + ArrayList rpos = new ArrayList<>(); + BitSet visit = new BitSet(); + for( int i=bfs._lim; i< bfs._bfs.size(); i++ ) + postOrd( bfs._bfs.get(i), rpos, visit, bfs._bs); + // Reverse the post-order walk + StringBuilder sb = new StringBuilder(); + boolean gap=false; + for( int i=rpos.size()-1; i>=0; i-- ) { + Node n = rpos.get(i); + if( n.isCFG() || n.isMultiHead() ) { + if( !gap ) sb.append("\n"); // Blank before multihead + n._print_line(sb); // Print head + while( --i >= 0 ) { + Node t = rpos.get(i); + if( !t.isMultiTail() ) { i++; break; } + t._print_line(sb); + } + sb.append("\n"); // Blank after multitail + gap = true; + } else { + n._print_line( sb ); + gap = false; + } + } + return sb.toString(); + } + + private static void postOrd(Node n, ArrayList rpos, BitSet visit, BitSet bfs) { + if( !bfs.get(n._nid) ) + return; // Not in the BFS visit + if( visit.get(n._nid) ) return; // Already post-order walked + visit.set(n._nid); + // First walk the CFG, then everything + if( n.isCFG() ) { + for( Node use : n._outputs ) + if( use.isCFG() && use.nOuts()>=1 && !(use._outputs.get(0) instanceof LoopNode) ) + postOrd(use, rpos,visit,bfs); + for( Node use : n._outputs ) + if( use.isCFG() ) + postOrd(use,rpos,visit,bfs); + } + for( Node use : n._outputs ) + postOrd(use, rpos,visit,bfs); + // Post-order + rpos.add(n); + } + + // Breadth-first search, broken out in a class to keep in more independent. + // Maintains a root-set of Nodes at the limit (or past by 1 if MultiHead). + public static class BFS { + // A breadth first search, plus MultiHeads for any MultiTails + public final ArrayList _bfs; + public final BitSet _bs; // Visited members by node id + public final int _depth; // Depth limit + public final int _lim; // From here to _bfs._len can be roots for a reverse search + public BFS( Node base, int d ) { + _depth = d; + _bfs = new ArrayList<>(); + _bs = new BitSet(); + + add(base); // Prime the pump + int idx=0, lim=1; // Limit is where depth counter changes + while( idx < _bfs.size() ) { // Ran out of nodes below depth + Node n = _bfs.get(idx++); + for( Node def : n._inputs ) + if( def!=null && !_bs.get(def._nid) ) + add(def); + if( idx==lim ) { // Depth counter changes at limit + if( --d < 0 ) + break; // Ran out of depth + lim = _bfs.size(); // New depth limit + } + } + // Toss things past the limit except multi-heads + while( idx < _bfs.size() ) { + Node n = _bfs.get(idx); + if( n.isMultiHead() ) idx++; + else del(idx); + } + // Root set is any node with no inputs in the visited set + lim = _bfs.size(); + for( int i=_bfs.size()-1; i>=0; i-- ) + if( !any_visited(_bfs.get(i)) ) + swap( i,--lim); + _lim = lim; + } + void swap( int x, int y ) { + if( x==y ) return; + Node tx = _bfs.get(x); + Node ty = _bfs.get(y); + _bfs.set(x,ty); + _bfs.set(y,tx); + } + void add(Node n) { + _bfs.add(n); + _bs.set(n._nid); + } + void del(int idx) { + Node n = Utils.del(_bfs, idx); + _bs.clear(n._nid); + } + boolean any_visited( Node n ) { + for( Node def : n._inputs ) + if( def!=null && _bs.get(def._nid) ) + return true; + return false; + } + } +} diff --git a/chapter08/src/main/java/com/seaofnodes/simple/Parser.java b/chapter08/src/main/java/com/seaofnodes/simple/Parser.java new file mode 100644 index 00000000..28b8f432 --- /dev/null +++ b/chapter08/src/main/java/com/seaofnodes/simple/Parser.java @@ -0,0 +1,574 @@ +package com.seaofnodes.simple; + +import com.seaofnodes.simple.node.*; +import com.seaofnodes.simple.type.*; + +import java.util.*; + +/** + * The Parser converts a Simple source program to the Sea of Nodes intermediate + * representation directly in one pass. There is no intermediate Abstract + * Syntax Tree structure. + *

+ * This is a simple recursive descent parser. All lexical analysis is done here as well. + */ +public class Parser { + public static boolean LAZY = false; + + /** + * A Global Static, unique to each compilation. This is a public, so we + * can make constants everywhere without having to thread the StartNode + * through the entire parser and optimizer. + *

+ * To make the compiler multithreaded, this field will have to move into a TLS. + */ + public static StartNode START; + + public StopNode STOP; + + // The Lexer. Thin wrapper over a byte[] buffer with a cursor. + private final Lexer _lexer; + + /** + * Current ScopeNode - ScopeNodes change as we parse code, but at any point of time + * there is one current ScopeNode. The reason the current ScopeNode can change is to do with how + * we handle branching. See {@link #parseIf()}. + *

+ * Each ScopeNode contains a stack of lexical scopes, each scope is a symbol table that binds + * variable names to Nodes. The top of this stack represents current scope. + *

+ * We keep a list of all ScopeNodes so that we can show them in graphs. + * @see #parseIf() + * @see #_xScopes + */ + public ScopeNode _scope; + + /** + * List of keywords disallowed as identifiers + */ + private final HashSet KEYWORDS = new HashSet<>(){{ + add("else"); + add("false"); + add("if"); + add("int"); + add("return"); + add("true"); + add("while"); + }}; + + + /** + * We clone ScopeNodes when control flows branch; it is useful to have + * a list of all active ScopeNodes for purposes of visualization of the SoN graph + */ + public final Stack _xScopes = new Stack<>(); + + public Parser(String source, TypeInteger arg) { + Node.reset(); + _lexer = new Lexer(source); + _scope = new ScopeNode(); + START = new StartNode(new Type[]{ Type.CONTROL, arg }); + STOP = new StopNode(); + } + + public Parser(String source) { + this(source, TypeInteger.BOT); + } + + @Override + public String toString() { return _lexer.toString(); } + + String src() { return new String( _lexer._input ); } + + // Debugging utility to find a Node by index + public static Node find(int nid) { return START.find(nid); } + + private Node ctrl() { return _scope.ctrl(); } + + private Node ctrl(Node n) { return _scope.ctrl(n); } + + public StopNode parse() { return parse(false); } + public StopNode parse(boolean show) { + // Enter a new scope for the initial control and arguments + _scope.push(); + _scope.define(ScopeNode.CTRL, new ProjNode(START, 0, ScopeNode.CTRL).peephole()); + _scope.define(ScopeNode.ARG0, new ProjNode(START, 1, ScopeNode.ARG0).peephole()); + parseBlock(); + _scope.pop(); + if (!_lexer.isEOF()) throw error("Syntax error, unexpected " + _lexer.getAnyNextToken()); + STOP.peephole(); + if( show ) showGraph(); + return STOP; + } + + + /** + * Parses a block + * + *

+     *     '{' statements '}'
+     * 
+ * Does not parse the opening or closing '{}' + * @return a {@link Node} or {@code null} + */ + private Node parseBlock() { + // Enter a new scope + _scope.push(); + while (!peek('}') && !_lexer.isEOF()) + parseStatement(); + // Exit scope + _scope.pop(); + return null; + } + + /** + * Parses a statement + * + *
+     *     returnStatement | declStatement | blockStatement | ifStatement | expressionStatement
+     * 
+ * @return a {@link Node} or {@code null} + */ + private Node parseStatement() { + if (matchx("return") ) return parseReturn(); + else if (matchx("int")) return parseDecl(); + else if (match ("{" )) return require(parseBlock(),"}"); + else if (matchx("if" )) return parseIf(); + else if (matchx("while")) return parseWhile(); + else if (matchx("#showGraph")) return require(showGraph(),";"); + else return parseExpressionStatement(); + } + + /** + * Parses a while statement + * + *
+     *     while ( expression ) statement
+     * 
+ * @return a {@link Node}, never {@code null} + */ + private Node parseWhile() { + require("("); + + // Loop region has two control inputs, the first one is the entry + // point, and second one is back edge that is set after loop is parsed + // (see end_loop() call below). Note that the absence of back edge is + // used as an indicator to switch off peepholes of the region and + // associated phis. + + ctrl(new LoopNode(ctrl(),null).peephole()); + ScopeNode head = _xScopes.push(_scope).keep(); // Save the current scope as the loop head + // Make a new Scope for the body, which has lazy-phi loop markers. + _scope = _scope.dup(true); + + // Parse predicate + var pred = require(parseExpression(), ")"); + // IfNode takes current control and predicate + IfNode ifNode = (IfNode)new IfNode(ctrl(), pred).keep().peephole(); + // Setup projection nodes + Node ifT = new ProjNode(ifNode, 0, "True" ).peephole(); + ifNode.unkeep(); + Node ifF = new ProjNode(ifNode, 1, "False").peephole(); + // The exit scope, accounting for any side effects in the predicate + var exit = _scope.dup(); + _xScopes.push(exit); + exit.ctrl(ifF); + + // Parse the true side, which corresponds to loop body + ctrl(ifT); // set ctrl token to ifTrue projection + parseStatement(); // Parse loop body + + // The true branch loops back, so whatever is current control gets + // added to head loop as input + head.endLoop(_scope, exit); + head.unkeep().kill(); + _xScopes.pop(); + _xScopes.pop(); // Discard pushed from graph display + + // At exit the false control is the current control, and + // the scope is the exit scope after the exit test. + return (_scope = exit); + } + + /** + * Parses a statement + * + *
+     *     if ( expression ) statement [else statement]
+     * 
+ * @return a {@link Node}, never {@code null} + */ + private Node parseIf() { + require("("); + // Parse predicate + var pred = require(parseExpression(), ")"); + // IfNode takes current control and predicate + IfNode ifNode = (IfNode)new IfNode(ctrl(), pred).keep().peephole(); + // Setup projection nodes + Node ifT = new ProjNode(ifNode, 0, "True" ).peephole(); + ifNode.unkeep(); + Node ifF = new ProjNode(ifNode, 1, "False").peephole(); + // In if true branch, the ifT proj node becomes the ctrl + // But first clone the scope and set it as current + int ndefs = _scope.nIns(); + ScopeNode fScope = _scope.dup(); // Duplicate current scope + _xScopes.push(fScope); // For graph visualization we need all scopes + + // Parse the true side + ctrl(ifT); // set ctrl token to ifTrue projection + parseStatement(); // Parse true-side + ScopeNode tScope = _scope; + + // Parse the false side + _scope = fScope; // Restore scope, then parse else block if any + ctrl(ifF); // Ctrl token is now set to ifFalse projection + if (matchx("else")) { + parseStatement(); + fScope = _scope; + } + + if( tScope.nIns() != ndefs || fScope.nIns() != ndefs ) + throw error("Cannot define a new name on one arm of an if"); + + // Merge results + _scope = tScope; + _xScopes.pop(); // Discard pushed from graph display + + return ctrl(tScope.mergeScopes(fScope)); + } + + + /** + * Parses a return statement; "return" already parsed. + * The $ctrl edge is killed. + * + *
+     *     'return' expr ;
+     * 
+ * @return an expression {@link Node}, never {@code null} + */ + private Node parseReturn() { + var expr = require(parseExpression(), ";"); + Node ret = STOP.addReturn(new ReturnNode(ctrl(), expr).peephole()); + ctrl(new ConstantNode(Type.XCONTROL).peephole()); // Kill control + return ret; + } + + /** + * Dumps out the node graph + * @return {@code null} + */ + private Node showGraph() { + System.out.println(new GraphVisualizer().generateDotOutput(this)); + return null; + } + + /** + * Parses an expression statement + * + *
+     *     name '=' expression ';'
+     * 
+ * @return an expression {@link Node}, never {@code null} + */ + private Node parseExpressionStatement() { + var name = requireId(); + require("="); + var expr = require(parseExpression(), ";"); + if( _scope.update(name, expr)==null ) + throw error("Undefined name '" + name + "'"); + return expr; + } + + /** + * Parses a declStatement + * + *
+     *     'int' name = expression ';'
+     * 
+ * @return an expression {@link Node}, never {@code null} + */ + private Node parseDecl() { + // Type is 'int' for now + var name = requireId(); + require("="); + var expr = require(parseExpression(), ";"); + if( _scope.define(name,expr) == null ) + throw error("Redefining name '" + name + "'"); + return expr; + } + + /** + * Parse an expression of the form: + * + *
+     *     expr : compareExpr
+     * 
+ * @return an expression {@link Node}, never {@code null} + */ + private Node parseExpression() { return parseComparison(); } + + /** + * Parse an expression of the form: + * + *
+     *     expr : additiveExpr op additiveExpr
+     * 
+ * @return an comparator expression {@link Node}, never {@code null} + */ + private Node parseComparison() { + var lhs = parseAddition(); + if (match("==")) return new BoolNode.EQ(lhs, parseComparison()).peephole(); + if (match("!=")) return new NotNode(new BoolNode.EQ(lhs, parseComparison()).peephole()).peephole(); + if (match("<" )) return new BoolNode.LT(lhs, parseComparison()).peephole(); + if (match("<=")) return new BoolNode.LE(lhs, parseComparison()).peephole(); + if (match(">" )) return new BoolNode.LT(parseComparison(), lhs).peephole(); + if (match(">=")) return new BoolNode.LE(parseComparison(), lhs).peephole(); + return lhs; + } + + /** + * Parse an additive expression + * + *
+     *     additiveExpr : multiplicativeExpr (('+' | '-') multiplicativeExpr)*
+     * 
+ * @return an add expression {@link Node}, never {@code null} + */ + private Node parseAddition() { + var lhs = parseMultiplication(); + if (match("+")) return new AddNode(lhs, parseAddition()).peephole(); + if (match("-")) return new SubNode(lhs, parseAddition()).peephole(); + return lhs; + } + + /** + * Parse an multiplicativeExpr expression + * + *
+     *     multiplicativeExpr : unaryExpr (('*' | '/') unaryExpr)*
+     * 
+ * @return a multiply expression {@link Node}, never {@code null} + */ + private Node parseMultiplication() { + var lhs = parseUnary(); + if (match("*")) return new MulNode(lhs, parseMultiplication()).peephole(); + if (match("/")) return new DivNode(lhs, parseMultiplication()).peephole(); + return lhs; + } + + /** + * Parse a unary minus expression. + * + *
+     *     unaryExpr : ('-') unaryExpr | primaryExpr
+     * 
+ * @return a unary expression {@link Node}, never {@code null} + */ + private Node parseUnary() { + if (match("-")) return new MinusNode(parseUnary()).peephole(); + return parsePrimary(); + } + + /** + * Parse a primary expression: + * + *
+     *     primaryExpr : integerLiteral | Identifier | true | false | '(' expression ')'
+     * 
+ * @return a primary {@link Node}, never {@code null} + */ + private Node parsePrimary() { + if( _lexer.isNumber() ) return parseIntegerLiteral(); + if( match("(") ) return require(parseExpression(), ")"); + if( matchx("true" ) ) return new ConstantNode(TypeInteger.constant(1)).peephole(); + if( matchx("false") ) return new ConstantNode(TypeInteger.constant(0)).peephole(); + String name = _lexer.matchId(); + if( name == null) throw errorSyntax("an identifier or expression"); + Node n = _scope.lookup(name); + if( n!=null ) return n; + throw error("Undefined name '" + name + "'"); + } + + /** + * Parse integer literal + * + *
+     *     integerLiteral: [1-9][0-9]* | [0]
+     * 
+ */ + private ConstantNode parseIntegerLiteral() { + return (ConstantNode) new ConstantNode(_lexer.parseNumber()).peephole(); + } + + ////////////////////////////////// + // Utilities for lexical analysis + + // Return true and skip if "syntax" is next in the stream. + private boolean match (String syntax) { return _lexer.match (syntax); } + // Match must be "exact", not be followed by more id letters + private boolean matchx(String syntax) { return _lexer.matchx(syntax); } + // Return true and do NOT skip if 'ch' is next + private boolean peek(char ch) { return _lexer.peek(ch); } + + // Require and return an identifier + private String requireId() { + String id = _lexer.matchId(); + if (id != null && !KEYWORDS.contains(id) ) return id; + throw error("Expected an identifier, found '"+id+"'"); + } + + // Require an exact match + private void require(String syntax) { require(null, syntax); } + private Node require(Node n, String syntax) { + if (match(syntax)) return n; + throw errorSyntax(syntax); + } + + RuntimeException errorSyntax(String syntax) { + return error("Syntax error, expected " + syntax + ": " + _lexer.getAnyNextToken()); + } + + static RuntimeException error(String errorMessage) { + return new RuntimeException(errorMessage); + } + + //////////////////////////////////// + // Lexer components + + private static class Lexer { + + // Input buffer; an array of text bytes read from a file or a string + private final byte[] _input; + // Tracks current position in input buffer + private int _position = 0; + + /** + * Record the source text for lexing + */ + public Lexer(String source) { + this(source.getBytes()); + } + + /** + * Direct from disk file source + */ + public Lexer(byte[] buf) { + _input = buf; + } + + // Very handy in the debugger, shows the unparsed program + @Override + public String toString() { + return new String(_input, _position, _input.length - _position); + } + + // True if at EOF + private boolean isEOF() { + return _position >= _input.length; + } + + // Peek next character, or report EOF + private char peek() { + return isEOF() ? Character.MAX_VALUE // Special value that causes parsing to terminate + : (char) _input[_position]; + } + + private char nextChar() { + char ch = peek(); + _position++; + return ch; + } + + // True if a white space + private boolean isWhiteSpace() { + return peek() <= ' '; // Includes all the use space, tab, newline, CR + } + + /** + * Return the next non-white-space character + */ + private void skipWhiteSpace() { + while (isWhiteSpace()) _position++; + } + + + // Return true, if we find "syntax" after skipping white space; also + // then advance the cursor past syntax. + // Return false otherwise, and do not advance the cursor. + boolean match(String syntax) { + skipWhiteSpace(); + int len = syntax.length(); + if (_position + len > _input.length) return false; + for (int i = 0; i < len; i++) + if ((char) _input[_position + i] != syntax.charAt(i)) + return false; + _position += len; + return true; + } + + boolean matchx(String syntax) { + if( !match(syntax) ) return false; + if( !isIdLetter(peek()) ) return true; + _position -= syntax.length(); + return false; + } + + private boolean peek(char ch) { + skipWhiteSpace(); + return peek()==ch; + } + + // Return an identifier or null + String matchId() { + skipWhiteSpace(); + return isIdStart(peek()) ? parseId() : null; + } + + // Used for errors + String getAnyNextToken() { + if (isEOF()) return ""; + if (isIdStart(peek())) return parseId(); + if (isPunctuation(peek())) return parsePunctuation(); + return String.valueOf(peek()); + } + + boolean isNumber() {return isNumber(peek());} + boolean isNumber(char ch) {return Character.isDigit(ch);} + + private Type parseNumber() { + int start = _position; + while (isNumber(nextChar())) ; + String snum = new String(_input, start, --_position - start); + if (snum.length() > 1 && snum.charAt(0) == '0') + throw error("Syntax error: integer values cannot start with '0'"); + return TypeInteger.constant(Long.parseLong(snum)); + } + + // First letter of an identifier + private boolean isIdStart(char ch) { + return Character.isAlphabetic(ch) || ch == '_'; + } + + // All characters of an identifier, e.g. "_x123" + private boolean isIdLetter(char ch) { + return Character.isLetterOrDigit(ch) || ch == '_'; + } + + private String parseId() { + int start = _position; + while (isIdLetter(nextChar())) ; + return new String(_input, start, --_position - start); + } + + // + private boolean isPunctuation(char ch) { + return "=;[]<>()+-/*".indexOf(ch) != -1; + } + + private String parsePunctuation() { + int start = _position; + return new String(_input, start, 1); + } + } +} diff --git a/chapter08/src/main/java/com/seaofnodes/simple/Utils.java b/chapter08/src/main/java/com/seaofnodes/simple/Utils.java new file mode 100644 index 00000000..4d663c25 --- /dev/null +++ b/chapter08/src/main/java/com/seaofnodes/simple/Utils.java @@ -0,0 +1,39 @@ +package com.seaofnodes.simple; + +import java.util.ArrayList; + +public class Utils { + public static RuntimeException TODO() { return TODO("Not yet implemented"); } + public static RuntimeException TODO(String msg) { return new RuntimeException(msg); } + + /** + * Fast, constant-time, element removal. Does not preserve order + * + * @param array ArrayList to modify + * @param i element to be removed + * @return element removed + */ + public static E del(ArrayList array, int i) { + if ( i >= 0 && i < array.size() ) { + E tmp = array.get(i); + E last = array.removeLast(); + if (i < array.size()) array.set(i, last); + return tmp; + } + return null; + } + + /** + * Search a list for an element by reference + * + * @param ary List to search in + * @param x Object to be searched + * @return >= 0 on success, -1 on failure + */ + public static int find( ArrayList ary, E x ) { + for( int i=0; i 2 ) { + // Only valid to rotate constants if both are same associative ops + if( lhs.getClass() != op.getClass() ) return null; + lphi = pcon(lhs.in(2)); // Will rotate with the Phi push + } + if( lphi==null ) return null; + + // RHS is a constant or a Phi of constants + if( !(rhs instanceof ConstantNode con) && pcon(rhs)==null ) + return null; + + // If both are Phis, must be same Region + if( rhs instanceof PhiNode && lphi.in(0) != rhs.in(0) ) + return null; + + // Note that this is the exact reverse of Phi pulling a common op down + // to reduce total op-count. We don't get in an endless push-up + // push-down peephole cycle because the constants all fold first. + Node[] ns = new Node[lphi.nIns()]; + ns[0] = lphi.in(0); + // Push constant up through the phi: x + (phi con0+con0 con1+con1...) + for( int i=1; i hi._nid; + } + + @Override Node copy(Node lhs, Node rhs) { return new AddNode(lhs,rhs); } +} diff --git a/chapter08/src/main/java/com/seaofnodes/simple/node/BoolNode.java b/chapter08/src/main/java/com/seaofnodes/simple/node/BoolNode.java new file mode 100644 index 00000000..d65fbf14 --- /dev/null +++ b/chapter08/src/main/java/com/seaofnodes/simple/node/BoolNode.java @@ -0,0 +1,60 @@ +package com.seaofnodes.simple.node; + +import com.seaofnodes.simple.type.Type; +import com.seaofnodes.simple.type.TypeInteger; + +import java.util.BitSet; + +abstract public class BoolNode extends Node { + + public BoolNode(Node lhs, Node rhs) { + super(null, lhs, rhs); + } + + abstract String op(); // String opcode name + + @Override + public String label() { return getClass().getSimpleName(); } + + @Override + public String glabel() { return op(); } + + @Override + StringBuilder _print1(StringBuilder sb, BitSet visited) { + in(1)._print0(sb.append("("), visited); + in(2)._print0(sb.append(op()), visited); + return sb.append(")"); + } + + @Override + public Type compute() { + if( in(1)._type instanceof TypeInteger i0 && + in(2)._type instanceof TypeInteger i1 ) { + if (i0.isConstant() && i1.isConstant()) + return TypeInteger.constant(doOp(i0.value(), i1.value()) ? 1 : 0); + return i0.meet(i1); + } + return Type.BOTTOM; + } + + abstract boolean doOp(long lhs, long rhs); + + @Override + public Node idealize() { + // Compare of same + if( in(1)==in(2) ) + return new ConstantNode(TypeInteger.constant(doOp(3,3)?1:0)); + + // Do we have ((x * (phi cons)) * con) ? + // Do we have ((x * (phi cons)) * (phi cons)) ? + // Push constant up through the phi: x * (phi con0*con0 con1*con1...) + Node phicon = AddNode.phiCon(this,false); + if( phicon!=null ) return phicon; + + return null; + } + + public static class EQ extends BoolNode { public EQ(Node lhs, Node rhs) { super(lhs,rhs); } String op() { return "=="; } boolean doOp(long lhs, long rhs) { return lhs == rhs; } Node copy(Node lhs, Node rhs) { return new EQ(lhs,rhs); } } + public static class LT extends BoolNode { public LT(Node lhs, Node rhs) { super(lhs,rhs); } String op() { return "<" ; } boolean doOp(long lhs, long rhs) { return lhs < rhs; } Node copy(Node lhs, Node rhs) { return new LT(lhs,rhs); } } + public static class LE extends BoolNode { public LE(Node lhs, Node rhs) { super(lhs,rhs); } String op() { return "<="; } boolean doOp(long lhs, long rhs) { return lhs <= rhs; } Node copy(Node lhs, Node rhs) { return new LE(lhs,rhs); } } +} diff --git a/chapter08/src/main/java/com/seaofnodes/simple/node/ConstantNode.java b/chapter08/src/main/java/com/seaofnodes/simple/node/ConstantNode.java new file mode 100644 index 00000000..0e13d2f2 --- /dev/null +++ b/chapter08/src/main/java/com/seaofnodes/simple/node/ConstantNode.java @@ -0,0 +1,45 @@ +package com.seaofnodes.simple.node; + +import com.seaofnodes.simple.Parser; +import com.seaofnodes.simple.type.Type; + +import java.util.BitSet; + +/** + * A Constant node represents a constant value. At present, the only constants + * that we allow are integer literals; therefore Constants contain an integer + * value. As we add other types of constants, we will refactor how we represent + * Constants. + *

+ * Constants have no semantic inputs. However, we set Start as an input to + * Constants to enable a forward graph walk. This edge carries no semantic + * meaning, and it is present solely to allow visitation. + *

+ * The Constant's value is the value stored in it. + */ +public class ConstantNode extends Node { + Type _con; + public ConstantNode( Type type ) { + super(Parser.START); + _con = type; + } + + @Override + public String label() { return ""+_con; } + + @Override + public String uniqueName() { return "Con_" + _nid; } + + @Override + StringBuilder _print1(StringBuilder sb, BitSet visited) { + return _con._print(sb); + } + + @Override public boolean isMultiTail() { return true; } + + @Override + public Type compute() { return _con; } + + @Override + public Node idealize() { return null; } +} diff --git a/chapter08/src/main/java/com/seaofnodes/simple/node/DivNode.java b/chapter08/src/main/java/com/seaofnodes/simple/node/DivNode.java new file mode 100644 index 00000000..b0e12547 --- /dev/null +++ b/chapter08/src/main/java/com/seaofnodes/simple/node/DivNode.java @@ -0,0 +1,39 @@ +package com.seaofnodes.simple.node; + +import com.seaofnodes.simple.type.Type; +import com.seaofnodes.simple.type.TypeInteger; + +import java.util.BitSet; + +public class DivNode extends Node { + public DivNode(Node lhs, Node rhs) { super(null, lhs, rhs); } + + @Override public String label() { return "Div"; } + + @Override public String glabel() { return "//"; } + + @Override + StringBuilder _print1(StringBuilder sb, BitSet visited) { + in(1)._print0(sb.append("("), visited); + in(2)._print0(sb.append("/"), visited); + return sb.append(")"); + } + + @Override + public Type compute() { + if (in(1)._type instanceof TypeInteger i0 && + in(2)._type instanceof TypeInteger i1) { + if (i0.isConstant() && i1.isConstant()) + return i1.value() == 0 + ? TypeInteger.ZERO + : TypeInteger.constant(i0.value()/i1.value()); + return i0.meet(i1); + } + return Type.BOTTOM; + } + + @Override + public Node idealize() { return null; } + + @Override Node copy(Node lhs, Node rhs) { return new DivNode(lhs,rhs); } +} diff --git a/chapter08/src/main/java/com/seaofnodes/simple/node/IfNode.java b/chapter08/src/main/java/com/seaofnodes/simple/node/IfNode.java new file mode 100644 index 00000000..e573010d --- /dev/null +++ b/chapter08/src/main/java/com/seaofnodes/simple/node/IfNode.java @@ -0,0 +1,58 @@ +package com.seaofnodes.simple.node; + +import com.seaofnodes.simple.type.Type; +import com.seaofnodes.simple.type.TypeInteger; +import com.seaofnodes.simple.type.TypeTuple; + +import java.util.BitSet; + +public class IfNode extends MultiNode { + + public IfNode(Node ctrl, Node pred) { + super(ctrl, pred); + } + + @Override + public String label() { return "If"; } + + @Override + StringBuilder _print1(StringBuilder sb, BitSet visited) { + sb.append("if( "); + return in(1)._print0(sb, visited).append(" )"); + } + + @Override public boolean isCFG() { return true; } + @Override public boolean isMultiHead() { return true; } + + public Node ctrl() { return in(0); } + public Node pred() { return in(1); } + + @Override + public Type compute() { + // If the If node is not reachable then neither is any following Proj + if (ctrl()._type != Type.CONTROL && ctrl()._type != Type.BOTTOM ) + return TypeTuple.IF_NEITHER; + // If constant is 0 then false branch is reachable + // Else true branch is reachable + if (pred()._type instanceof TypeInteger ti && ti.isConstant()) { + if (ti.value() == 0) return TypeTuple.IF_FALSE; + else return TypeTuple.IF_TRUE; + } + + // Hunt up the immediate dominator tree. If we find an identical if + // test on either the true or false branch, then this test matches. + for( Node dom = idom(), prior=this; dom!=null; prior=dom, dom = dom.idom() ) + if( dom instanceof IfNode iff && iff.pred()==pred() ) + return prior instanceof ProjNode proj + // Repeated test, dominated on one side. Test result is the same. + ? (proj._idx==0 ? TypeTuple.IF_TRUE : TypeTuple.IF_FALSE) + : null; // Repeated test not dominated on one side + + return TypeTuple.IF_BOTH; + } + + @Override + public Node idealize() { + return null; + } +} diff --git a/chapter08/src/main/java/com/seaofnodes/simple/node/LoopNode.java b/chapter08/src/main/java/com/seaofnodes/simple/node/LoopNode.java new file mode 100644 index 00000000..eaf19199 --- /dev/null +++ b/chapter08/src/main/java/com/seaofnodes/simple/node/LoopNode.java @@ -0,0 +1,35 @@ +package com.seaofnodes.simple.node; + +import com.seaofnodes.simple.type.Type; + +import java.util.BitSet; + +public class LoopNode extends RegionNode { + + public LoopNode( Node entry, Node back) { super(null,entry,back); } + + Node entry() { return in(1); } + Node back () { return in(2); } + + @Override + public String label() { return "Loop"; } + + @Override + StringBuilder _print1(StringBuilder sb, BitSet visited) { + return sb.append("Loop").append(_nid); + } + + @Override + public Type compute() { + return inProgress() ? Type.CONTROL : super.compute(); + } + + @Override + public Node idealize() { + return inProgress() ? null : super.idealize(); + } + + @Override Node idom() { return entry(); } + + @Override public boolean inProgress() { return back()==null; } +} diff --git a/chapter08/src/main/java/com/seaofnodes/simple/node/MinusNode.java b/chapter08/src/main/java/com/seaofnodes/simple/node/MinusNode.java new file mode 100644 index 00000000..8a96693d --- /dev/null +++ b/chapter08/src/main/java/com/seaofnodes/simple/node/MinusNode.java @@ -0,0 +1,30 @@ +package com.seaofnodes.simple.node; + +import com.seaofnodes.simple.type.Type; +import com.seaofnodes.simple.type.TypeInteger; + +import java.util.BitSet; + +public class MinusNode extends Node { + public MinusNode(Node in) { super(null, in); } + + @Override public String label() { return "Minus"; } + + @Override public String glabel() { return "-"; } + + @Override + StringBuilder _print1(StringBuilder sb, BitSet visited) { + in(1)._print0(sb.append("(-"), visited); + return sb.append(")"); + } + + @Override + public Type compute() { + if (in(1)._type instanceof TypeInteger i0) + return i0.isConstant() ? TypeInteger.constant(-i0.value()) : i0; + return Type.BOTTOM; + } + + @Override + public Node idealize() { return null; } +} diff --git a/chapter08/src/main/java/com/seaofnodes/simple/node/MulNode.java b/chapter08/src/main/java/com/seaofnodes/simple/node/MulNode.java new file mode 100644 index 00000000..70e6c0db --- /dev/null +++ b/chapter08/src/main/java/com/seaofnodes/simple/node/MulNode.java @@ -0,0 +1,58 @@ +package com.seaofnodes.simple.node; + +import com.seaofnodes.simple.type.Type; +import com.seaofnodes.simple.type.TypeInteger; + +import java.util.BitSet; + +public class MulNode extends Node { + public MulNode(Node lhs, Node rhs) { super(null, lhs, rhs); } + + @Override public String label() { return "Mul"; } + + @Override public String glabel() { return "*"; } + + @Override + StringBuilder _print1(StringBuilder sb, BitSet visited) { + in(1)._print0(sb.append("("), visited); + in(2)._print0(sb.append("*"), visited); + return sb.append(")"); + } + + @Override + public Type compute() { + if (in(1)._type instanceof TypeInteger i0 && + in(2)._type instanceof TypeInteger i1) { + if (i0.isConstant() && i1.isConstant()) + return TypeInteger.constant(i0.value()*i1.value()); + return i0.meet(i1); + } + return Type.BOTTOM; + } + + @Override + public Node idealize() { + Node lhs = in(1); + Node rhs = in(2); + Type t1 = lhs._type; + Type t2 = rhs._type; + + // Mul of 1. We do not check for (1*x) because this will already + // canonicalize to (x*1) + if ( t2.isConstant() && t2 instanceof TypeInteger i && i.value()==1 ) + return lhs; + + // Move constants to RHS: con*arg becomes arg*con + if ( t1.isConstant() && !t2.isConstant() ) + return swap12(); + + // Do we have ((x * (phi cons)) * con) ? + // Do we have ((x * (phi cons)) * (phi cons)) ? + // Push constant up through the phi: x * (phi con0*con0 con1*con1...) + Node phicon = AddNode.phiCon(this,true); + if( phicon!=null ) return phicon; + + return null; + } + @Override Node copy(Node lhs, Node rhs) { return new MulNode(lhs,rhs); } +} diff --git a/chapter08/src/main/java/com/seaofnodes/simple/node/MultiNode.java b/chapter08/src/main/java/com/seaofnodes/simple/node/MultiNode.java new file mode 100644 index 00000000..81c90f68 --- /dev/null +++ b/chapter08/src/main/java/com/seaofnodes/simple/node/MultiNode.java @@ -0,0 +1,7 @@ +package com.seaofnodes.simple.node; + +public abstract class MultiNode extends Node { + + public MultiNode(Node... inputs) { super(inputs); } + +} diff --git a/chapter08/src/main/java/com/seaofnodes/simple/node/Node.java b/chapter08/src/main/java/com/seaofnodes/simple/node/Node.java new file mode 100644 index 00000000..fcc8df85 --- /dev/null +++ b/chapter08/src/main/java/com/seaofnodes/simple/node/Node.java @@ -0,0 +1,467 @@ +package com.seaofnodes.simple.node; + +import com.seaofnodes.simple.Utils; +import com.seaofnodes.simple.type.Type; + +import java.util.*; + +/** + * All Nodes in the Sea of Nodes IR inherit from the Node class. + * The Node class provides common functionality used by all subtypes. + * Subtypes of Node specialize by overriding methods. + */ +public abstract class Node { + + /** + * Each node has a unique dense Node ID within a compilation context + * The ID is useful for debugging, for using as an offset in a bitvector, + * as well as for computing equality of nodes (to be implemented later). + */ + public final int _nid; + + /** + * Inputs to the node. These are use-def references to Nodes. + *

+ * Generally fixed length, ordered, nulls allowed, no unused trailing space. + * Ordering is required because e.g. "a/b" is different from "b/a". + * The first input (offset 0) is often a {@link #isCFG} node. + */ + public final ArrayList _inputs; + + /** + * Outputs reference Nodes that are not null and have this Node as an + * input. These nodes are users of this node, thus these are def-use + * references to Nodes. + *

+ * Outputs directly match inputs, making a directed graph that can be + * walked in either direction. These outputs are typically used for + * efficient optimizations but otherwise have no semantics meaning. + */ + public final ArrayList _outputs; + + + /** + * Current computed type for this Node. This value changes as the graph + * changes and more knowledge is gained about the program. + */ + public Type _type; + + + /** + * Immediate dominator tree depth, used to approximate a real IDOM during + * parsing where we do not have the whole program, and also peepholes + * change the CFG incrementally. + *

+ * See {@link ...} + */ + int _idepth; + + /** + * A private Global Static mutable counter, for unique node id generation. + * To make the compiler multithreaded, this field will have to move into a TLS. + * Starting with value 1, to avoid bugs confusing node ID 0 with uninitialized values. + * */ + private static int UNIQUE_ID = 1; + + protected Node(Node... inputs) { + _nid = UNIQUE_ID++; // allocate unique dense ID + _inputs = new ArrayList<>(); + Collections.addAll(_inputs,inputs); + _outputs = new ArrayList<>(); + for( Node n : _inputs ) + if( n != null ) + n.addUse( this ); + } + + // Easy reading label for debugger, e.g. "Add" or "Region" or "EQ" + public abstract String label(); + + // Unique label for graph visualization, e.g. "Add12" or "Region30" or "EQ99" + public String uniqueName() { return label() + _nid; } + + // Graphical label, e.g. "+" or "Region" or "==" + public String glabel() { return label(); } + + + // ------------------------------------------------------------------------ + + // Debugger Printing. + + // {@code toString} is what you get in the debugger. It has to print 1 + // line (because this is what a debugger typically displays by default) and + // has to be robust with broken graph/nodes. + @Override + public final String toString() { return print(); } + + // This is a *deep* print. This version will fail on cycles, which we will + // correct later when we can parse programs with loops. We print with a + // tik-tok style; the common _print0 calls the per-Node _print1, which + // calls back to _print0; + public final String print() { + return _print0(new StringBuilder(), new BitSet()).toString(); + } + // This is the common print: check for repeats, check for DEAD and print + // "DEAD" else call the per-Node print1. + final StringBuilder _print0(StringBuilder sb, BitSet visited) { + if (visited.get(_nid)) + return sb.append(label()); + visited.set(_nid); + return isDead() + ? sb.append(uniqueName()).append(":DEAD") + : _print1(sb, visited); + } + // Every Node implements this; a partial-line recursive print + abstract StringBuilder _print1(StringBuilder sb, BitSet visited); + + + // Print a node on 1 line, columnar aligned, as: + // NNID NNAME DDEF DDEF [[ UUSE UUSE ]] TYPE + // 1234 sssss 1234 1234 1234 1234 1234 1234 tttttt + public void _print_line( StringBuilder sb ) { + sb.append("%4d %-7.7s ".formatted(_nid,label())); + if( _inputs==null ) { + sb.append("DEAD\n"); + return; + } + for( Node def : _inputs ) + sb.append(def==null ? "____ " : "%4d ".formatted(def._nid)); + for( int i = _inputs.size(); i<3; i++ ) + sb.append(" "); + sb.append(" [[ "); + for( Node use : _outputs ) + sb.append("%4d ".formatted(use._nid)); + int lim = 5 - Math.max(_inputs.size(),3); + for( int i = _outputs.size(); idef into a Node. Keeps the edges correct, by removing + * the corresponding use->def edge. This may make the original + * def go dead. This function is co-recursive with {@link #kill}. + *

+ + * This method is the normal path for altering a Node, because it does the + * proper default edge maintenance. It also immediately kills + * Nodes that lose their last use; at times care must be taken to avoid + * killing Nodes that are being used without having an output Node. This + * definitely happens in the middle of recursive {@link #peephole} calls. + * + * @param idx which def to set + * @param new_def the new definition + * @return new_def for flow coding + */ + Node set_def(int idx, Node new_def ) { + Node old_def = in(idx); + if( old_def == new_def ) return this; // No change + // If new def is not null, add the corresponding def->use edge + // This needs to happen before removing the old node's def->use edge as + // the new_def might get killed if the old node kills it recursively. + if( new_def != null ) + new_def.addUse(this); + if( old_def != null && // If the old def exists, remove a def->use edge + old_def.delUse(this) ) // If we removed the last use, the old def is now dead + old_def.kill(); // Kill old def + // Set the new_def over the old (killed) edge + _inputs.set(idx,new_def); + // Return self for easy flow-coding + return new_def; + } + + // Remove the numbered input, compressing the inputs in-place. This + // shuffles the order deterministically - which is suitable for Region and + // Phi, but not for every Node. + void delDef(int idx) { + Node old_def = in(idx); + if( old_def != null && // If the old def exists, remove a def->use edge + old_def.delUse(this) ) // If we removed the last use, the old def is now dead + old_def.kill(); // Kill old def + Utils.del(_inputs, idx); + } + + /** + * Add a new def to an existing Node. Keep the edges correct by + * adding the corresponding def->use edge. + * + * @param new_def the new definition, appended to the end of existing definitions + * @return new_def for flow coding + */ + public Node add_def(Node new_def) { + // Add use->def edge + _inputs.add(new_def); + // If new def is not null, add the corresponding def->use edge + if( new_def != null ) + new_def.addUse(this); + return new_def; + } + + // Breaks the edge invariants, used temporarily + protected N addUse(Node n) { _outputs.add(n); return (N)this; } + + // Remove node 'use' from 'def's (i.e. our) output list, by compressing the list in-place. + // Return true if the output list is empty afterward. + // Error is 'use' does not exist; ok for 'use' to be null. + protected boolean delUse( Node use ) { + Utils.del(_outputs, Utils.find(_outputs, use)); + return _outputs.size() == 0; + } + + // Shortcut for "popping" n nodes. A "pop" is basically a + // set_def(last,null) followed by lowering the nIns() count. + void pop_n(int n) { + for( int i=0; iuses, by setting all of its defs + * to null. This may recursively kill more Nodes and is basically dead + * code elimination. This function is co-recursive with {@link #pop_n}. + */ + public void kill( ) { + assert isUnused(); // Has no uses, so it is dead + pop_n(nIns()); // Set all inputs to null, recursively killing unused Nodes + _type=null; // Flag as dead + assert isDead(); // Really dead now + } + + // Mostly used for asserts and printing. + boolean isDead() { return isUnused() && nIns()==0 && _type==null; } + + // Shortcuts to stop DCE mid-parse + // Add bogus null use to keep node alive + public N keep() { return addUse(null); } + // Remove bogus null. + public N unkeep() { delUse(null); return (N)this; } + + + // Replace self with nnn in the graph, making 'this' go dead + void subsume( Node nnn ) { + assert nnn!=this; + while( nOuts() > 0 ) { + Node n = _outputs.removeLast(); + int idx = Utils.find(n._inputs, this); + n._inputs.set(idx,nnn); + nnn.addUse(n); + } + kill(); + } + + // ------------------------------------------------------------------------ + // Graph-based optimizations + + /** + * We allow disabling peephole opt so that we can observe the + * full graph, vs the optimized graph. + */ + public static boolean _disablePeephole = false; + + /** + * Try to peephole at this node and return a better replacement Node if + * possible. We compute a {@link Type} and then check and replace: + *

    + *
  • if the Type {@link Type#isConstant}, we replace with a {@link ConstantNode}
  • + *
  • in a future chapter we will look for a + * Common Subexpression + * to eliminate.
  • + *
  • we ask the Node for a better replacement. The "better replacement" + * is things like {@code (1+2)} becomes {@code 3} and {@code (1+(x+2))} becomes + * {@code (x+(1+2))}. By canonicalizing expressions we fold common addressing + * math constants, remove algebraic identities and generally simplify the + * code.
  • + *
+ */ + public final Node peephole( ) { + // Compute initial or improved Type + Type type = _type = compute(); + + if (_disablePeephole) + return this; // Peephole optimizations turned off + + // Replace constant computations from non-constants with a constant node + if (!(this instanceof ConstantNode) && type.isConstant()) + return deadCodeElim(new ConstantNode(type).peephole()); + + // Future chapter: Global Value Numbering goes here + + // Ask each node for a better replacement + Node n = idealize(); + if( n != null ) // Something changed + // Recursively optimize + return deadCodeElim(n.peephole()); + + return this; // No progress + } + + // m is the new Node, self is the old. + // Return 'm', which may have zero uses but is alive nonetheless. + // If self has zero uses (and is not 'm'), {@link #kill} self. + private Node deadCodeElim(Node m) { + // If self is going dead and not being returned here (Nodes returned + // from peephole commonly have no uses (yet)), then kill self. + if( m != this && isUnused() ) { + // Killing self - and since self recursively kills self's inputs we + // might end up killing 'm', which we are returning as a live Node. + // So we add a bogus extra null output edge to stop kill(). + m.keep(); // Keep m alive + kill(); // Kill self because replacing with 'm' + m.unkeep(); // Okay to peephole m + } + return m; + } + + /** + * This function needs to be + * Monotonic + * as it is part of a Monotone Analysis Framework. + * See for example this set of slides. + *

+ * For Chapter 2, all our Types are really integer constants, and so all + * the needed properties are trivially true, and we can ignore the high + * theory. Much later on, this will become important and allow us to do + * many fancy complex optimizations trivially... because theory. + *

+ * compute() needs to be stand-alone, and cannot recursively call compute + * on its inputs programs are cyclic (have loops!) and this will just + * infinitely recurse until stack overflow. Instead, compute typically + * computes a new type from the {@link #_type} field of its inputs. + */ + public abstract Type compute(); + + /** + * This function rewrites the current Node into a more "idealized" form. + * This is the bulk of our peephole rewrite rules, and we use this to + * e.g. turn arbitrary collections of adds and multiplies with mixed + * constants into a normal form that's easy for hardware to implement. + * Example: An array addressing expression: + *

   ary[idx+1]
+ * might turn into Sea-of-Nodes IR: + *
   (ary+12)+((idx+1) * 4)
+ * This expression can then be idealized into: + *
   ary + ((idx*4) + (12 + (1*4)))
+ * And more folding: + *
   ary + ((idx<<2) + 16)
+ * And during code-gen: + *
   MOV4 Rary,Ridx,16 // or some such hardware-specific notation 
+ *

+ * {@link #idealize} has a very specific calling convention: + *

    + *
  • If NO change is made, return {@code null} + *
  • If ANY change is made, return not-null; this can be {@code this} + *
  • The returned Node does NOT call {@link #peephole} on itself; the {@link #peephole} call will recursively peephole it. + *
  • Any NEW nodes that are not directly returned DO call {@link #peephole}. + *
+ *

+ * Examples: + * + * + * + * + * + *
before after return comment
{@code (x+5) }{@code (x+5) }{@code null }No change
{@code (5+x) }{@code (x+5) }{@code this }Swapped arguments
{@code ((x+1)+2)}{@code (x+(1+2))}{@code (x+_) }Returns 2 new Nodes
+ * + * The last entry deserves more discussion. The new Node {@code (1+2)} + * created in {@link #idealize} calls {@link #peephole} (which then folds + * into a constant). The other new Node {@code (x+3)} does not call + * peephole, because it is returned and peephole itself will recursively + * call peephole. + *

+ * Since idealize calls peephole and peephole calls idealize, you must be + * careful that all idealizations are monotonic: all transforms remove + * some feature, so that the set of available transforms always shrinks. + * If you don't, you risk an infinite peephole loop! + * + * @return Either a new or changed node, or null for no changes. + */ + public abstract Node idealize(); + + + // ------------------------------------------------------------------------ + // Peephole utilities + + // Swap inputs without letting either input go dead during the swap. + Node swap12() { + Node tmp = in(1); + _inputs.set(1,in(2)); + _inputs.set(2,tmp); + return this; + } + + // does this node contain all constants? + // Ignores in(0), as is usually control. + boolean all_cons() { + for( int i=1; i= 0 ) lhs = lhs.idom(); + if( comp <= 0 ) rhs = rhs.idom(); + } + if( lhs==null ) return null; + _idepth = lhs._idepth+1; + return (_idom=lhs); + } + + public boolean inProgress() { return false; } +} diff --git a/chapter08/src/main/java/com/seaofnodes/simple/node/ReturnNode.java b/chapter08/src/main/java/com/seaofnodes/simple/node/ReturnNode.java new file mode 100644 index 00000000..4dea773a --- /dev/null +++ b/chapter08/src/main/java/com/seaofnodes/simple/node/ReturnNode.java @@ -0,0 +1,46 @@ +package com.seaofnodes.simple.node; + +import com.seaofnodes.simple.type.*; + +import java.util.BitSet; + +/** + * The Return node has two inputs. The first input is a control node and the + * second is the data node that supplies the return value. + *

+ * In this presentation, Return functions as a Stop node, since multiple return statements are not possible. + * The Stop node will be introduced in Chapter 6 when we implement if statements. + *

+ * The Return's output is the value from the data node. + */ +public class ReturnNode extends Node { + + public ReturnNode(Node ctrl, Node data) { + super(ctrl, data); + } + + public Node ctrl() { return in(0); } + public Node expr() { return in(1); } + + @Override + public String label() { return "Return"; } + + @Override + StringBuilder _print1(StringBuilder sb, BitSet visited) { + return expr()._print0(sb.append("return "), visited).append(";"); + } + + @Override public boolean isCFG() { return true; } + + @Override + public Type compute() { + return new TypeTuple(ctrl()._type,expr()._type); + } + + @Override + public Node idealize() { + if( ctrl()._type==Type.XCONTROL ) + return ctrl(); + return null; + } +} diff --git a/chapter08/src/main/java/com/seaofnodes/simple/node/ScopeNode.java b/chapter08/src/main/java/com/seaofnodes/simple/node/ScopeNode.java new file mode 100644 index 00000000..e486a390 --- /dev/null +++ b/chapter08/src/main/java/com/seaofnodes/simple/node/ScopeNode.java @@ -0,0 +1,193 @@ +package com.seaofnodes.simple.node; + +import com.seaofnodes.simple.Parser; +import com.seaofnodes.simple.type.Type; + +import java.util.*; + +/** + * The Scope node is purely a parser helper - it tracks names to nodes with a + * stack of scopes. + */ +public class ScopeNode extends Node { + + /** + * The control is a name that binds to the currently active control + * node in the graph + */ + public static final String CTRL = "$ctrl"; + public static final String ARG0 = "arg"; + + /** + * Names for every input edge + */ + public final Stack> _scopes; + + + // A new ScopeNode + public ScopeNode() { + _scopes = new Stack<>(); + _type = Type.BOTTOM; + } + + + @Override public String label() { return "Scope"; } + + @Override + StringBuilder _print1(StringBuilder sb, BitSet visited) { + sb.append("Scope[ "); + String[] names = reverse_names(); + for( int j=0; j syms : _scopes ) + for( String name : syms.keySet() ) + names[syms.get(name)] = name; + return names; + } + + @Override public Type compute() { return Type.BOTTOM; } + + @Override public Node idealize() { return null; } + + public void push() { _scopes.push(new HashMap<>()); } + public void pop() { pop_n(_scopes.pop().size()); } + + // Create a new name in the current scope + public Node define( String name, Node n ) { + HashMap syms = _scopes.lastElement(); + if( syms.put(name,nIns()) != null ) + return null; // Double define + return add_def(n); + } + + // Lookup a name. It is recursive to support lazy Phis on loops. + public Node lookup(String name) { return update(name,null,_scopes.size()-1); } + // If the name is present in any scope, then redefine else null + public Node update(String name, Node n) { return update(name,n,_scopes.size()-1); } + // Both recursive lookup and update. If a Lazy Phi is found at any level, + // a concrete Phi is inserted at the loop head and also in the current + // scope. The Lazy Phi does a recursive lookup for its input which may + // itself be Lazy. + private Node update( String name, Node n, int i ) { + if( i<0 ) return null; // Missed in all scopes, not found + var syms = _scopes.get(i); + var idx = syms.get(name); + if( idx == null ) return update(name,n,i-1); // Missed in this scope, recursively look + Node old = in(idx); + if( old instanceof ScopeNode loop ) { + // Lazy Phi! + old = loop.in(idx) instanceof PhiNode phi && loop.ctrl()==phi.region() + // Loop already has a real Phi, use it + ? loop.in(idx) + // Set real Phi in the loop head + // The phi takes its one input (no backedge yet) from a recursive + // lookup, which might have insert a Phi in every loop nest. + : loop.set_def(idx,new PhiNode(name,loop.ctrl(),loop.update(name,null,i),null).peephole()); + set_def(idx,old); + } + return n==null ? old : set_def(idx,n); // Not lazy, so this is the answer + } + + public Node ctrl() { return in(0); } + + /** + * The ctrl of a ScopeNode is always bound to the currently active + * control node in the graph, via a special name '$ctrl' that is not + * a valid identifier in the language grammar and hence cannot be + * referenced in Simple code. + * + * @param n The node to be bound to '$ctrl' + * + * @return Node that was bound + */ + public Node ctrl(Node n) { return set_def(0,n); } + + /** + * Duplicate a ScopeNode; including all levels, up to Nodes. So this is + * neither shallow (would dup the Scope but not the internal HashMap + * tables), nor deep (would dup the Scope, the HashMap tables, but then + * also the program Nodes). + *

+ * If the {@code loop} flag is set, the edges are filled in as the original + * Scope, as a indication of Lazy Phis at loop heads. The goal here is to + * not make Phis at loop heads for variables which are never touched in the + * loop body. + *

+ * The new Scope is a full-fledged Node with proper use<->def edges. + */ + public ScopeNode dup() { return dup(false); } + public ScopeNode dup(boolean loop) { + ScopeNode dup = new ScopeNode(); + // Our goals are: + // 1) duplicate the name bindings of the ScopeNode across all stack levels + // 2) Make the new ScopeNode a user of all the nodes bound + // 3) Ensure that the order of defs is the same to allow easy merging + for( HashMap syms : _scopes ) + dup._scopes.push(new HashMap<>(syms)); + String[] reverse = Parser.LAZY ? null : reverse_names(); + dup.add_def(ctrl()); // Control input is just copied + for( int i=1; isee for example this set of slides. + *

+ * The types form a lattice; @see a symmetric complete bounded (ranked) lattice. + *

+ * This wild lattice theory will be needed later to allow us to easily beef up + * the analysis and optimization of the Simple compiler... but we don't need it + * now, just know that it is coming along in a later Chapter. + *

g + * One of the fun things here is that while the theory is deep and subtle, the + * actual implementation is darn near trivial and is generally really obvious + * what we're doing with it. Right now, it's just simple integer math to do + * simple constant folding e.g. 1+2 == 3 stuff. + */ + +public class Type { + + // ---------------------------------------------------------- + // Simple types are implemented fully here. "Simple" means: the code and + // type hierarchy are simple, not that the Type is conceptually simple. + static final byte TBOT = 0; // Bottom (ALL) + static final byte TTOP = 1; // Top (ANY) + static final byte TCTRL = 2; // Ctrl flow bottom + static final byte TXCTRL = 3; // Ctrl flow top (mini-lattice: any-xctrl-ctrl-all) + static final byte TSIMPLE = 4; // End of the Simple Types + static final byte TINT = 5; // All Integers; see TypeInteger + static final byte TTUPLE = 6; // Tuples; finite collections of unrelated Types, kept in parallel + + public final byte _type; + + public boolean is_simple() { return _type < TSIMPLE; } + private static final String[] STRS = new String[]{"Bot","Top","Ctrl","~Ctrl"}; + protected Type(byte type) { _type = type; } + + public static final Type BOTTOM = new Type( TBOT ); // ALL + public static final Type TOP = new Type( TTOP ); // ANY + public static final Type CONTROL = new Type( TCTRL ); // Ctrl + public static final Type XCONTROL = new Type( TXCTRL ); // ~Ctrl + + public boolean isConstant() { return _type == TTOP || _type == TXCTRL; } + + public StringBuilder _print(StringBuilder sb) {return is_simple() ? sb.append(STRS[_type]) : sb;} + + public final Type meet(Type t) { + // Shortcut for the self case + if( t == this ) return this; + // Same-type is always safe in the subclasses + if( _type==t._type ) return xmeet(t); + // Reverse; xmeet 2nd arg is never "is_simple" and never equal to "this". + if( is_simple() ) return this.xmeet(t ); + if( t.is_simple() ) return t .xmeet(this); + return BOTTOM; // Mixing 2 unrelated types + } + + // Compute meet right now. Overridden in subclasses. + // Handle cases where 'this.is_simple()' and unequal to 't'. + // Subclassed xmeet calls can assert that '!t.is_simple()'. + protected Type xmeet(Type t) { + assert is_simple(); // Should be overridden in subclass + // ANY meet anything is thing; thing meet ALL is ALL + if( _type==TBOT || t._type==TTOP ) return this; + if( _type==TTOP || t._type==TBOT ) return t; + // 'this' is {TCTRL,TXCTRL} + if( !t.is_simple() ) return BOTTOM; + // 't' is {TCTRL,TXCTRL} + return _type==TCTRL || t._type==TCTRL ? CONTROL : XCONTROL; + } + + @Override + public final String toString() { + return _print(new StringBuilder()).toString(); + } +} diff --git a/chapter08/src/main/java/com/seaofnodes/simple/type/TypeInteger.java b/chapter08/src/main/java/com/seaofnodes/simple/type/TypeInteger.java new file mode 100644 index 00000000..93f5eae3 --- /dev/null +++ b/chapter08/src/main/java/com/seaofnodes/simple/type/TypeInteger.java @@ -0,0 +1,63 @@ +package com.seaofnodes.simple.type; + +/** + * Integer Type + */ +public class TypeInteger extends Type { + + public final static TypeInteger TOP = new TypeInteger(false, 0); + public final static TypeInteger BOT = new TypeInteger(false, 1); + public final static TypeInteger ZERO= new TypeInteger(true, 0); + + private final boolean _is_con; + + /** + * The constant value or + * if not constant then 0=bottom, 1=top. + */ + private final long _con; + + public TypeInteger(boolean is_con, long con) { + super(TINT); + _is_con = is_con; + _con = con; + } + + public static TypeInteger constant(long con) { return new TypeInteger(true, con); } + + public boolean isTop() { return !_is_con && _con==0; } + public boolean isBot() { return !_is_con && _con==1; } + + @Override + public StringBuilder _print(StringBuilder sb) { + if (isTop()) return sb.append("IntTop"); + if (isBot()) return sb.append("IntBot"); + return sb.append(_con); + } + + @Override + public boolean isConstant() { return _is_con; } + + public long value() { return _con; } + + @Override + public Type xmeet(Type other) { + if( this==other ) return this; + if (!(other instanceof TypeInteger i)) return super.meet(other); + // BOT wins + if ( isBot() ) return this; + if ( i.isBot() ) return i ; + // TOP loses + if ( i.isTop() ) return this; + if ( isTop() ) return i ; + assert isConstant() && i.isConstant(); + return _con==i._con ? this : TypeInteger.BOT; + } + + @Override + public boolean equals( Object o ) { + if( o==this ) return true; + if( !(o instanceof TypeInteger i) ) return false; + return _con==i._con && _is_con==i._is_con; + } +} diff --git a/chapter08/src/main/java/com/seaofnodes/simple/type/TypeTuple.java b/chapter08/src/main/java/com/seaofnodes/simple/type/TypeTuple.java new file mode 100644 index 00000000..f8c3b3c8 --- /dev/null +++ b/chapter08/src/main/java/com/seaofnodes/simple/type/TypeTuple.java @@ -0,0 +1,32 @@ +package com.seaofnodes.simple.type; + +public class TypeTuple extends Type { + + public final Type[] _types; + + public TypeTuple(Type... _types) { + super(TTUPLE); + this._types = _types; + } + + @Override + public Type xmeet(Type other) { + throw new UnsupportedOperationException("Meet on Tuple Type not yet implemented"); + } + + @Override + public StringBuilder _print(StringBuilder sb) { + sb.append("["); + for( Type t : _types ) + t._print(sb).append(","); + sb.setLength(sb.length()-1); + sb.append("]"); + return sb; + } + + public static final TypeTuple IF_BOTH = new TypeTuple(new Type[]{Type.CONTROL, Type.CONTROL}); + public static final TypeTuple IF_NEITHER = new TypeTuple(new Type[]{Type.XCONTROL,Type.XCONTROL}); + public static final TypeTuple IF_TRUE = new TypeTuple(new Type[]{Type.CONTROL, Type.XCONTROL}); + public static final TypeTuple IF_FALSE = new TypeTuple(new Type[]{Type.XCONTROL,Type.CONTROL}); + +} diff --git a/chapter08/src/test/java/com/seaofnodes/simple/Chapter08Test.java b/chapter08/src/test/java/com/seaofnodes/simple/Chapter08Test.java new file mode 100644 index 00000000..00178a64 --- /dev/null +++ b/chapter08/src/test/java/com/seaofnodes/simple/Chapter08Test.java @@ -0,0 +1,1028 @@ +package com.seaofnodes.simple; + +import com.seaofnodes.simple.node.*; +import com.seaofnodes.simple.type.TypeInteger; +import org.junit.Test; + +import java.util.ArrayList; + +import static org.junit.Assert.*; + +public class Chapter08Test { + + @Test + public void testChapter7Example() { + Parser parser = new Parser( + """ + while(arg < 10) { + arg = arg + 1; + #showGraph; + } + return arg; + """); + Node._disablePeephole = true; + StopNode stop = parser.parse(true); + assertEquals("return Phi(Loop6,arg,(Phi_arg+1));", stop.toString()); + assertTrue(stop.ret().ctrl() instanceof ProjNode); + Node._disablePeephole = false; + } + + @Test + public void testChapter7Regression() { + Parser parser = new Parser( + +""" +int a = 1; +if(arg){}else{ + while(a < 10) { + a = a + 1; + } +} +return a; +"""); + StopNode stop = parser.parse(true); + assertEquals("return Phi(Region23,1,Phi(Loop11,1,(Phi_a+1)));", stop.toString()); + } + + @Test + public void testChapter7WhileNested() { + Parser parser = new Parser( + +""" +int sum = 0; +int i = 0; +while(i < arg) { + i = i + 1; + int j = 0; + while( j < arg ) { + sum = sum + j; + j = j + 1; + } +} +return sum; +"""); + StopNode stop = parser.parse(true); + assertEquals("return Phi(Loop8,0,Phi(Loop21,Phi_sum,(Phi(Loop,0,(Phi_j+1))+Phi_sum)));", stop.toString()); + System.out.println(IRPrinter.prettyPrint(stop,99)); + } + + @Test + public void testChapter7WhileScope() { + Parser parser = new Parser( +""" +int a = 1; +int b = 2; +while(a < 10) { + if (a == 2) a = 3; + else b = 4; +} +return b; +"""); + Node._disablePeephole = true; + StopNode stop = parser.parse(true); + assertEquals("return Phi(Loop8,2,Phi(Region27,Phi_b,4));", stop.toString()); + assertTrue(stop.ret().ctrl() instanceof ProjNode); + Node._disablePeephole = false; + System.out.println(IRPrinter.prettyPrint(stop,99)); + } + + @Test + public void testChapter7WhileNestedIfAndInc() { + Parser parser = new Parser( +""" +int a = 1; +int b = 2; +while(a < 10) { + if (a == 2) a = 3; + else b = 4; + b = b + 1; + a = a + 1; +} +return b; +"""); + StopNode stop = parser.parse(true); + assertEquals("return Phi(Loop8,2,(Phi(Region27,Phi_b,4)+1));", stop.toString()); + assertTrue(stop.ret().ctrl() instanceof ProjNode); + System.out.println(IRPrinter.prettyPrint(stop,99)); + } + + + @Test + public void testChapter7While() { + Parser parser = new Parser( +""" +int a = 1; +while(a < 10) { + a = a + 1; + a = a + 2; +} +return a; +"""); + Node._disablePeephole = true; + StopNode stop = parser.parse(true); + assertEquals("return Phi(Loop7,1,((Phi_a+1)+2));", stop.toString()); + assertTrue(stop.ret().ctrl() instanceof ProjNode); + Node._disablePeephole = false; + } + + @Test + public void testChapter7WhilePeep() { + Parser parser = new Parser( +""" +int a = 1; +while(a < 10) { + a = a + 1; + a = a + 2; +} +return a; +"""); + StopNode stop = parser.parse(true); + assertEquals("return Phi(Loop7,1,(Phi_a+3));", stop.toString()); + assertTrue(stop.ret().ctrl() instanceof ProjNode); + } + + @Test + public void testChapter7While2() { + Parser parser = new Parser( +""" +int a = 1; +while(arg) a = 2; +return a; +"""); + Node._disablePeephole = true; + StopNode stop = parser.parse(true); + assertEquals("return Phi(Loop7,1,2);", stop.toString()); + assertTrue(stop.ret().ctrl() instanceof ProjNode); + Node._disablePeephole = false; + System.out.println(IRPrinter.prettyPrint(stop,99)); + } + + @Test + public void testChapter7While2Peep() { + Parser parser = new Parser( +""" +int a = 1; +while(arg) a = 2; +return a; +"""); + StopNode stop = parser.parse(false); + assertEquals("return Phi(Loop7,1,2);", stop.toString()); + assertTrue(stop.ret().ctrl() instanceof ProjNode); + System.out.println(IRPrinter.prettyPrint(stop, 99)); + } + + @Test + public void testChapter7While3() { + Parser parser = new Parser( +""" +int a = 1; +while(a < 10) { + int b = a + 1; + a = b + 2; +} +return a; +"""); + Node._disablePeephole = true; + StopNode stop = parser.parse(true); + assertEquals("return Phi(Loop7,1,((Phi_a+1)+2));", stop.toString()); + assertTrue(stop.ret().ctrl() instanceof ProjNode); + Node._disablePeephole = false; + } + + @Test + public void testChapter7While3Peep() { + Parser parser = new Parser( +""" +int a = 1; +while(a < 10) { + int b = a + 1; + a = b + 2; +} +return a; +"""); + StopNode stop = parser.parse(true); + assertEquals("return Phi(Loop7,1,(Phi_a+3));", stop.toString()); + assertTrue(stop.ret().ctrl() instanceof ProjNode); + } + + @Test + public void testChapter7While4() { + Parser parser = new Parser( +""" +int a = 1; +int b = 2; +while(a < 10) { + int b = a + 1; + a = b + 2; +} +return a; +"""); + Node._disablePeephole = true; + StopNode stop = parser.parse(true); + assertEquals("return Phi(Loop8,1,((Phi_a+1)+2));", stop.toString()); + assertTrue(stop.ret().ctrl() instanceof ProjNode); + Node._disablePeephole = false; + } + + @Test + public void testChapter7While4Peep() { + Parser parser = new Parser( +""" +int a = 1; +int b = 2; +while(a < 10) { + int b = a + 1; + a = b + 2; +} +return a; +"""); + StopNode stop = parser.parse(true); + assertEquals("return Phi(Loop8,1,(Phi_a+3));", stop.toString()); + assertTrue(stop.ret().ctrl() instanceof ProjNode); + } + + @Test + public void testChapter6PeepholeReturn() { + Parser parser = new Parser( +""" +if( true ) return 2; +return 1; +"""); + StopNode stop = parser.parse(true); + assertEquals("return 2;", stop.toString()); + assertTrue(stop.ret().ctrl() instanceof ProjNode); + } + + @Test + public void testChapter6PeepholeRotate() { + Parser parser = new Parser( +""" +int a = 1; +if (arg) + a = 2; +return (arg < a) < 3; +"""); + StopNode stop = parser.parse(false); + assertEquals("return ((argchapter04 chapter05 chapter06 + chapter07 + chapter08