forked from riscv/riscv-opcodes
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'migration-to-new-format' of https://github.com/incorese…
…mi/riscv-opcodes into incoresemi-migration-to-new-format
- Loading branch information
Showing
113 changed files
with
2,484 additions
and
1,867 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,47 +1,57 @@ | ||
SHELL := /bin/sh | ||
|
||
EXTENSIONS := "rv*" "unratified/rv*" | ||
ISASIM_H := ../riscv-isa-sim/riscv/encoding.h | ||
PK_H := ../riscv-pk/machine/encoding.h | ||
ENV_H := ../riscv-tests/env/encoding.h | ||
OPENOCD_H := ../riscv-openocd/src/target/riscv/encoding.h | ||
INSTALL_HEADER_FILES := $(ISASIM_H) $(PK_H) $(ENV_H) $(OPENOCD_H) | ||
|
||
ALL_REAL_ILEN32_OPCODES := opcodes-rv32i opcodes-rv64i opcodes-rv32m opcodes-rv64m opcodes-rv32a opcodes-rv64a opcodes-rv32h opcodes-rv64h opcodes-rv32f opcodes-rv64f opcodes-rv32d opcodes-rv64d opcodes-rv32q opcodes-rv64q opcodes-rv32xbitmanip opcodes-rv64xbitmanip opcodes-system opcodes-svinval opcodes-rv32zfh opcodes-rv32d-zfh opcodes-rv32q-zfh opcodes-rv64zfh opcodes-rvzk opcodes-rv32zk opcodes-rv64zk opcodes-zicbo opcodes-rv32zba opcodes-rv32zbb opcodes-rv32zbc opcodes-rv32zbkb opcodes-rv32zbkx opcodes-rv32zbs opcodes-rv64zba opcodes-rv64zbb opcodes-rv64zbkb | ||
ALL_REAL_OPCODES := $(ALL_REAL_ILEN32_OPCODES) opcodes-rvc opcodes-rv32c opcodes-rv64c opcodes-custom opcodes-rvv opcodes-rvp | ||
|
||
ALL_OPCODES := opcodes-pseudo $(ALL_REAL_OPCODES) opcodes-rvv-pseudo | ||
default: everything | ||
|
||
install: encoding.out.h inst.chisel inst.spinalhdl instr-table.tex priv-instr-table.tex | ||
install: everything | ||
set -e; for FILE in $(INSTALL_HEADER_FILES); do cp -f encoding.out.h $$FILE; done | ||
|
||
encoding.out.h: $(ALL_OPCODES) parse_opcodes encoding.h | ||
echo "/*" > $@ | ||
echo " * This file is auto-generated by running 'make' in" >> $@ | ||
echo " * https://github.com/riscv/riscv-opcodes (`git log -1 --format="format:%h"`)" >> $@ | ||
echo " */" >> $@ | ||
echo >> $@ | ||
cat encoding.h >> $@ | ||
cat $(ALL_OPCODES) | ./parse_opcodes -c >> $@ | ||
.PHONY : everything | ||
everything: | ||
@./parse.py -c -go -chisel -sverilog -rust -latex -spinalhdl $(EXTENSIONS) | ||
|
||
inst.chisel: $(ALL_OPCODES) parse_opcodes | ||
cat $(ALL_OPCODES) | ./parse_opcodes -chisel > $@ | ||
.PHONY : encoding.out.h | ||
encoding.out.h: | ||
@./parse.py -c $(EXTENSIONS) | ||
|
||
inst.spinalhdl: $(ALL_OPCODES) parse_opcodes | ||
cat $(ALL_OPCODES) | ./parse_opcodes -spinalhdl > $@ | ||
.PHONY : inst.chisel | ||
inst.chisel: | ||
@./parse.py -chisel $(EXTENSIONS) | ||
|
||
inst.go: $(ALL_REAL_ILEN32_OPCODES) parse_opcodes | ||
cat $(ALL_REAL_ILEN32_OPCODES) | ./parse_opcodes -go > $@ | ||
.PHONY : inst.go | ||
inst.go: | ||
@./parse.py -go $(EXTENSIONS) | ||
|
||
inst.rs: $(ALL_OPCODES) parse_opcodes | ||
cat $(ALL_OPCODES) | ./parse_opcodes -rust > $@ | ||
.PHONY : latex | ||
latex: | ||
@./parse.py -latex $(EXTENSIONS) | ||
|
||
inst.sverilog: $(ALL_OPCODES) parse_opcodes | ||
cat $(ALL_OPCODES) | ./parse_opcodes -sverilog > $@ | ||
.PHONY : inst.sverilog | ||
inst.sverilog: | ||
@./parse.py -sverilog $(EXTENSIONS) | ||
|
||
instr-table.tex: $(ALL_OPCODES) parse_opcodes | ||
cat $(ALL_OPCODES) | ./parse_opcodes -tex > $@ | ||
.PHONY : inst.rs | ||
inst.rs: | ||
@./parse.py -rust $(EXTENSIONS) | ||
|
||
priv-instr-table.tex: $(ALL_OPCODES) parse_opcodes | ||
cat $(ALL_OPCODES) | ./parse_opcodes -privtex > $@ | ||
.PHONY : clean | ||
clean: | ||
rm -f inst* priv-instr-table.tex encoding.out.h | ||
|
||
.PHONY : install | ||
install: c | ||
set -e; for FILE in $(INSTALL_HEADER_FILES); do cp -f encoding.out.h $$FILE; done | ||
|
||
.PHONY: instr-table.tex | ||
instr-table.tex: latex | ||
|
||
.PHONY: priv-instr-table.tex | ||
priv-instr-table.tex: latex | ||
|
||
.PHONY: inst.spinalhdl | ||
inst.spinalhdl: | ||
@./parse.py -spinalhdl $(EXTENSIONS) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,212 @@ | ||
riscv-opcodes | ||
=========================================================================== | ||
# riscv-opcodes | ||
|
||
This repo enumerates standard RISC-V instruction opcodes and control and | ||
status registers. It also contains a script to convert them into several | ||
formats (C, Scala, LaTeX). | ||
|
||
This repo is not meant to stand alone; it is a subcomponent of | ||
[riscv-tools](https://github.com/riscv/riscv-tools) and assumes that it | ||
is part of that directory structure. | ||
Artifacts (encoding.h, latex-tables, etc) from this repo are used in other | ||
tools and projects like Spike, PK, RISC-V Manual, etc. | ||
|
||
## Project Structure | ||
|
||
```bash | ||
├── constants.py # contains variables, constants and data-structures used in parse.py | ||
├── encoding.h # the template encoding.h file | ||
├── LICENSE # license file | ||
├── Makefile # makefile to generate artifacts | ||
├── parse.py # python file to perform checks on the instructions and generate artifacts | ||
├── README.md # this file | ||
├── rv* # instruction opcode files | ||
└── unratified # contains unratified instruction opcode files | ||
``` | ||
|
||
## File Naming Policy | ||
|
||
This project follows a very specific file structure to define the instruction encodings. All files | ||
containing instruction encodings start with the prefix `rv`. These files can either be present in | ||
the root directory (if the instructions have been ratified) of the `unratified` directory. The exact | ||
file-naming policy and location is as mentioned below: | ||
|
||
1. `rv_x` - contains instructions common within the 32-bit and 64-bit modes of extension X. | ||
2. `rv32_x` - contains instructions present in rv32x only (absent in rv64x e.g.. brev8) | ||
3. `rv64_x` - contains instructions present in rv64x only (absent in rv32x, e.g. addw) | ||
4. `rv_x_y` - contains instructions when both extension X and Y are available/enabled. It is recommended to follow canonical ordering for such file names as specified by the spec. | ||
5. `unratified` - this directory will also contain files similar to the above policies, but will | ||
correspond to instructions which have not yet been ratified. | ||
|
||
When an instruction is present in multiple extensions and the spec is vague in defining the extension which owns the instruction, the instruction encoding must be placed in the first canonically ordered extension and should be imported(via the `$import` keyword) in the remaining extensions. | ||
|
||
## Encoding Syntax | ||
|
||
|
||
The encoding syntax uses `$` to indicate keywords. As of now 2 keywords have been identified : `$import` and `$pseudo_op` (described below). The syntax also uses `::` as a means to define the relationship between extension and instruction. `..` is used to defined bit ranges. We use `#` to define comments in the files. All comments must be in a separate line. In-line comments are not supported. | ||
|
||
Instruction syntaxes used in this project are broadly categorized into three: | ||
|
||
- **regular instructions** :- these are instructions which hold a unique opcode in the encoding space. A very generic syntax guideline | ||
for these instructions is as follows: | ||
``` | ||
<instruction name> <instruction args> <bit-encodings> | ||
``` | ||
Examples: | ||
``` | ||
lui rd imm20 6..2=0x0D 1..0=3 | ||
beq bimm12hi rs1 rs2 bimm12lo 14..12=0 6..2=0x18 1..0=3 | ||
``` | ||
The bit encodings are usually of 2 types: | ||
- *single bit assignment* : here the value of a single bit is assigned using syntax `<bit-position>=<value>`. For e.g. `6=1` means bit 6 should be 1. Here the value must be 1 or 0. | ||
- *range assignment*: here a range of bits is assigned a value using syntax: `<msb>..<lsb>=<val>`. For e.g. `31..24=0xab`. The value here can be either unsigned integer, hex (0x) or binary (0b). | ||
|
||
- **pseudo_instructions** (a.k.a pseudo\_ops) - These are instructions which are aliases of regular instructions. Their encodings force | ||
certain restrictions over the regular instruction. The syntax for such instructions uses the `$pseudo_op` keyword as follows: | ||
``` | ||
$pseudo_op <extension>::<base-instruction> <instruction name> <instruction args> <bit-encodings> | ||
``` | ||
Here the `<extension>` specifies the extension which contains the base instruction. `<base-instruction>` indicates the name of the instruction | ||
this pseudo-instruction is an alias of. The remaining fields are the same as the regular instruction syntax, where all the args and the fields | ||
of the pseudo instruction are specified. | ||
|
||
Example: | ||
``` | ||
$pseudo_op rv_zicsr::csrrs frflags rd 19..15=0 31..20=0x001 14..12=2 6..2=0x1C 1..0=3 | ||
``` | ||
|
||
If a ratified instruction is a pseudo\_op of a regular unratified | ||
instruction, it is recommended to maintain this pseudo\_op relationship i.e. | ||
define the new instruction as a pseudo\_op of the unratified regular | ||
instruction, as this avoids existence of overlapping opcodes for users who are | ||
experimenting with unratified extensions as well. | ||
|
||
- **imported_instructions** - these are instructions which are borrowed from an extension into a new/different extension/sub-extension. Only regular instructions can be imported. Pseudo-op instructions cannot be imported. Example: | ||
``` | ||
$import rv32_zkne::aes32esmi | ||
``` | ||
## Flow for parse.py | ||
|
||
The `parse.py` python file is used to perform checks on the current set of instruction encodings and also generates multiple artifacts : latex tables, encoding.h header file, etc. This section will provide a brief overview of the flow within the python file. | ||
|
||
To start with, `parse.py` creates a list of all `rv*` files currently checked into the repo (including those inside the `unratified` directory as well). | ||
It then starts parsing each file line by line. In the first pass, we only capture regular instructions and ignore the imported or pseudo instructions. | ||
For each regular instruction, the following checks are performed : | ||
|
||
- for range-assignment syntax, the *msb* position must be higher than the *lsb* position | ||
- for range-assignment syntax, the value of the range must representable in the space identified by *msb* and *lsb* | ||
- values for the same bit positions should not be defined multiple times. | ||
- All bit positions must be accounted for (either as args or constant value fields) | ||
|
||
Once the above checks are passed for a regular instruction, we then create a dictionary for this instruction which contains the following fields: | ||
- encoding : contains a 32-bit string defining the encoding of the instruction. Here `-` is used to represent instruction argument fields | ||
- extension : string indicating which extension/filename this instruction was picked from | ||
- mask : a 32-bit hex value indicating the bits of the encodings that must be checked for legality of that instruction | ||
- match : a 32-bit hex value indicating the values the encoding must take for the bits which are set as 1 in the mask above | ||
- variable_fields : This is list of args required by the instruction | ||
|
||
The above dictionary elements are added to a main `instr_dict` dictionary under the instruction node. This process continues until all regular | ||
instructions have been processed. In the second pass, we now process the `$pseudo_op` instructions. Here, we first check if the *base-instruction* of | ||
this pseudo instruction exists in the relevant extension/filename or not. If it is present, the the remaining part of the syntax undergoes the same | ||
checks as above. Once the checks pass and if the *base-instruction* is not already added to the main `instr_dict` then the pseudo-instruction is added to | ||
the list. In the third, and final, pass we process the imported instructions. | ||
|
||
The case where the *base-instruction* for a pseudo-instruction may not be present in the main `instr_dict` after the first pass is if the only a subset | ||
of extensions are being processed such that the *base-instruction* is not included. | ||
|
||
|
||
## Artifact Generation and Usage | ||
|
||
The following artifacts can be generated using parse.py: | ||
|
||
- instr\_dict.yaml : This is file generated always by parse.py and contains the | ||
entire main dictionary `instr\_dict` in YAML format. Note, in this yaml the | ||
*dots* in an instruction are replaced with *underscores* | ||
- encoding.out.h : this is the header file that is used by tools like spike, pk, etc | ||
- instr-table.tex : the latex table of instructions used in the riscv-unpriv spec | ||
- priv-instr-table.tex : the latex table of instruction used in the riscv-priv spec | ||
- inst.chisel : chisel code to decode instructions | ||
- inst.sverilog : system verilog code to decode instructions | ||
- inst.rs : rust code containing mask and match variables for all instructions | ||
- inst.spinalhdl : spinalhdl code to decode instructions | ||
|
||
Make sure you install the required python pre-requisites are installed by executing the following | ||
command: | ||
|
||
``` | ||
sudo apt-get install python-pip3 | ||
pip3 install -r requirements.txt | ||
``` | ||
|
||
To generate all the above artifacts for all instructions currently checked in, simply run `make` from the root-directory. This should print the following log on the command-line: | ||
|
||
``` | ||
Running with args : ['./parse.py', '-c', '-chisel', '-sverilog', '-rust', '-latex', 'rv*', 'unratified/rv*'] | ||
Extensions selected : ['rv*', 'unratified/rv*'] | ||
INFO:: encoding.out.h generated successfully | ||
INFO:: inst.chisel generated successfully | ||
INFO:: inst.sverilog generated successfully | ||
INFO:: inst.rs generated successfully | ||
INFO:: instr-table.tex generated successfully | ||
INFO:: priv-instr-table.tex generated successfully | ||
``` | ||
|
||
By default all extensions are enabled. To select only a subset of extensions you can change the `EXTENSIONS` variable of the makefile to contains only the file names of interest. | ||
For example if you want only the I and M extensions you can do the following: | ||
|
||
```bash | ||
make EXTENSIONS='rv*_i rv*_m' | ||
``` | ||
|
||
Which will print the following log: | ||
|
||
``` | ||
Running with args : ['./parse.py', '-c', '-chisel', '-sverilog', '-rust', '-latex', 'rv32_i', 'rv64_i', 'rv_i', 'rv64_m', 'rv_m'] | ||
Extensions selected : ['rv32_i', 'rv64_i', 'rv_i', 'rv64_m', 'rv_m'] | ||
INFO:: encoding.out.h generated successfully | ||
INFO:: inst.chisel generated successfully | ||
INFO:: inst.sverilog generated successfully | ||
INFO:: inst.rs generated successfully | ||
INFO:: instr-table.tex generated successfully | ||
INFO:: priv-instr-table.tex generated successfully | ||
``` | ||
|
||
If you only want a specific artifact you can use one or more of the following targets : `c`, `rust`, `chisel`, `sverilog`, `latex` | ||
|
||
You can use the `clean` target to remove all artifacts. | ||
|
||
## Adding a new extension | ||
|
||
To add a new extension of instructions, create an appropriate `rv*` file based on the policy defined in [File Structure](#file-naming-policy). Run `make` from the root directory to ensure that all checks pass and all artifacts are created correctly. A successful run should print the following log on the terminal: | ||
|
||
``` | ||
Running with args : ['./parse.py', '-c', '-chisel', '-sverilog', '-rust', '-latex', 'rv*', 'unratified/rv*'] | ||
Extensions selected : ['rv*', 'unratified/rv*'] | ||
INFO:: encoding.out.h generated successfully | ||
INFO:: inst.chisel generated successfully | ||
INFO:: inst.sverilog generated successfully | ||
INFO:: inst.rs generated successfully | ||
INFO:: instr-table.tex generated successfully | ||
INFO:: priv-instr-table.tex generated successfully | ||
``` | ||
|
||
Create a PR for review. | ||
|
||
## Enabling Debug logs in parse.py | ||
|
||
To enable debug logs in parse.py change `level=logging.INFO` to `level=logging.DEBUG` and run the python command. You will now see debug statements on | ||
the terminal like below: | ||
``` | ||
DEBUG:: Collecting standard instructions first | ||
DEBUG:: Parsing File: ./rv_i | ||
DEBUG:: Processing line: lui rd imm20 6..2=0x0D 1..0=3 | ||
DEBUG:: Processing line: auipc rd imm20 6..2=0x05 1..0=3 | ||
DEBUG:: Processing line: jal rd jimm20 6..2=0x1b 1..0=3 | ||
DEBUG:: Processing line: jalr rd rs1 imm12 14..12=0 6..2=0x19 1..0=3 | ||
DEBUG:: Processing line: beq bimm12hi rs1 rs2 bimm12lo 14..12=0 6..2=0x18 1..0=3 | ||
DEBUG:: Processing line: bne bimm12hi rs1 rs2 bimm12lo 14..12=1 6..2=0x18 1..0=3 | ||
``` | ||
|
||
## How do I find where an instruction is defined? | ||
|
||
You can use `grep "^\s*<instr-name>" rv* unratified/rv*` OR run `make` and open | ||
`instr_dict.yaml` and search of the instruction you are looking for. Within that | ||
instruction the `extension` field will indicate which file the instruction was | ||
picked from. | ||
|
Oops, something went wrong.