The luck possessed by thee is unparalleled, for thou art did grant the blessing bestoweth upon this enchiridion's grandeur. This definite guide will answer all your questions about this ancient and mysterious programming language. The dialect used will be x86-64 GAS syntax/AT&T for the GCC compiler, but you can apply these concepts to most Assembly dialects. Prerequisites include basic knowledge of the C programming language and/or full completion of the hit game Human Resource Machine will help grasp concepts quicker.
- before learning the fundamentals, it is important to understand the different ways to execute assembly code
- similar to inline assembly
- able to define .data segment
- need to make your own main function
cat > bruh.s <<- 'BRUH'
.globl main
.data
bruh: .ascii "bruh moment"
.text
main:
push %rbp
mov %rsp, %rbp
mov $bruh, %rdi
mov $0, %rax
call puts
mov %rbp, %rsp
pop %rbp
BRUH
gcc -no-pie -g bruh.s -o bruh
./bruh
cat > bruh.s <<- 'BRUH'
.global _start
.data
bruh: .ascii "bruh moment"
bruh_len = . - bruh
.text
_start:
mov $1, %rax
mov $1, %rdi
mov $bruh, %rsi
mov $bruh_len, %rdx
syscall
mov $60, %rax
xor %rdi, %rdi
syscall
BRUH
as -g ./bruh.s -o bruh.o
ld bruh.o -o bruh
./bruh
int main() {
asm volatile(R"(
movb $'b', (%rsp)
movb $'r', 1(%rsp)
movb $'u', 2(%rsp)
movb $'h', 3(%rsp)
movb $0, 4(%rsp)
mov %rsp, %rdi
call puts
)");
}
- Useful if you want to just test a few instructions but it's a bit complicated to get started
- Read More
int main() {
char * bruh_moment = "bruh\0";
asm volatile(R"(
call puts
)"
:
: "D" (bruh_moment)
);
}
- Section [.data || .text]
- Literal [$5 || $'\n' || $bruh || $3+5 || $0b1010 || $0xdeadbeef || $0777]
- Register [%rax || %esi || %bh || %cl]
- Memory [(%rsp) || 8(%rsp,%rcx) || -16(%rsp, %rcx, 8)]
- Label [lab: || yourmom: || bruh:]
- Instruction [mov $5, %rax]
.globl main
- allows for the file to call your code
# gcc
.globl main
.text
main:
# as
.globl _start:
.text
_start:
.data
- define constants
.text
- rest of your code
- literals are prefixed with a $ symbol
# number 32
$32
# single quote char
$'\''
# expression (6 / 9) + 5 (int division)
$(6/9)+5
$ ( 6 / 9 ) + 5
# value of the constant "bruh" address
.data
bruh: .ascii "bruh\0
.text
$bruh
# binary
$0b1010101
# octal
$0777
# hexadecimal
$0xabc
- registers are prefixed with a % symbol
- all non-xmm registers have a 64, 32, 16, 8 bit sections (same register, different chunks)
- only rax, rbx, rcx, rdx have a 16-8 bit higher (h) byte section
- most common register
- specify syscall instrution number
- dividend and quotient in integer division (the "a" in [a = a / b])
- return value of function call
- 1st argument in function call / syscall
- 2nd argument in function call / syscall
- 3nd argument in function call / syscall
- remainder in integer division (the "d" in [d = a % b])
- 4th argument in function call / syscall
- looping variable
- 5th argument in function call / syscall
- 6th argument in function call / syscall
- SIMD float register
- 1st argument when calling function (if float)
- return value from function (if float)
- more SIMD float registers
- store an old value of %rsp for functions
- can store arrays (base stack pointer)
- points to the top of the stack
- can be used to create local variables for recursion
- push and pop will modify (need to utilise %rbp)
- can store arrays (stack pointer)
- don't need to use this as most instructions abstract it away
- allows you to move the instruction pointer
- don't need to use this as there are instructions referencing individual flags
There exists a prison containing an infinite amount of prison cells. There are registered bad guys convicted with the most heinous crimes. It so happens that they have names including but not limited to %rax, %rbx, %rcx. They are given a prison numbers $1, $2, $3 respectively. When they get to their prison cell via their prison number, they find a random amount of chicken inside. In this hypothetical thought experiment, the prison cells reference the individual memory cells that can be indexed via the numbers of the prisoners. The amount of chicken refers to the values that you find when indexing the prison cells. This value will change depending on the size of the prison cell you choose to use. For example, if you use the q suffix as seen in "movq", it assume that the prison cell is 8 bytes wide. The instruction will refer to the index plus 7 bytes afterwards as a single value. By this logic, it is safe to assume the size of one's prison cell is directly proportional to the amount of chicken that is available.
- store local variables for recursion
- store variables that are instantly recognizable for improved coding speed
- multiplication via lea instruction
- storing and indexing arrays
- usually you want an offset of %rbp or %rsp
# *rbp (assumes 8 bytes since %rbp is 8 byte register)
(%rbp)
# *(8 + rbp)
8(%rbp)
# *(-8 + rsp)
-8(%rsp)
# *(rbp + rax)
(%rbp,%rax)
# *(-6969 + rbp + rax * 8) (useful for looping through an array at an offset)
# only 1, 2, 4, 8 allowed
-6969(%rbp,%rax,8)
# rsp = "ab\0"
movb $'a', 0(%rsp)
movb $'b', 1(%rsp)
movb $0, 2(%rsp)
Disclaimer: GAS syntax instructions are backwards when compared to conventional programming languages, very epic gamer moment.
- most instructions are split into 3 components
- the "name" will use the "source" to modify the "destination"
- instruction name (mnemonic)
- source
- destination
- the most fundamental instruction is assignment
- move A into B
- when working with memory specify the suffix
# 1 -> rax (rax = 1)
mov $1, %rax
# rbp -> rdi
mov %rbp, %rdi
- register reference assignment
- useful for scanf int / char
# &var_6969 -> rcx (rcx = &var_6969)
lea -6969(%rbp), %rcx
- add A to B
# rax + 1 -> rax (rax += 1)
add $1, %rax
# rax + rcx -> rax (rax += rcx)
add %rcx, %rax
- regular negation
- flips all bits and adds 1
- bitwise xor (^)
- bitwise and (&)
- bitwise negation (~)
- flips all bits
- bitwise left shift (<<)
- bitwise right shift (>>)
- maintain signed bit
- bitwise right shift
- also shift the signed bit
- unsigned division of rax
- ensure %rdx = 0
mov $0, %rdx
# rax / rcx -> rax (rax /= rcx)
# rax % rcx -> rdx (rdx = rax % rcx)
idiv %rcx
- unsigned multiplication of rax
# rax * rcx -> rax (rax *= rcx)
imul %rcx
- exchange the source and destination (a, b) = (b, a)
- jmp: jump if 6<9 (unconditional jump)
- jz / je: jump if zero
- jnz / jne: jump if not zero
- jg: jump if greater
- jge: jump if greater or equal
- jl: jump if less than
- jle: jump if less than or equal
- jcxz: jump if %rcx is zero (no cmp)
if (rax < 10) {
rax = 1;
} else {
rax = 0;
}
cmp $10, %rax
# jump to true label if 10 > rax (rax < 10)
# fall-through if no jump
jl true
# jump to false label
jmp false
# true goto label
true:
mov $1, %rax
# jump to end label to avoid fallthrough
jmp end
false:
mov $0, %rax
end:
int rax = 1;
int rbx = 7;
int rcx = 5;
do {
rax *= rbx;
} while (--rcx);
mov $1, %rax
mov $7, %rbx
mov $5, %rcx
# goto label name
for_loop:
# rax * rbx -> rax
imul %rbx
# loop only works with rcx
loop for_loop
int rax = 123;
rax = sqrt(rax);
mov $123, %rax
# convert single integer to single double
cvtsi2sd %rax, %xmm0
# sqrt(xmm0) -> xmm1
sqrtsd %xmm0, %xmm1
# convert single double to single integer
cvtsd2si %xmm1, %rax
int rax = 123;
double rax = sqrt(rax);
mov $123, %rax
cvtsi2sd %rax, %xmm0
sqrtsd %xmm0, %xmm1
# move quadword (idk why you need quadword)
movq %xmm1, %rax
int rax = 123;
double rax_f = sqrt(rax);
printf("%f", rax_f);
mov $123, %rax
cvtsi2sd %rax, %xmm0
sqrtsd %xmm0, %xmm1
movq %xmm1, %rax
mov $'%', (%rbp)
mov $'f', 1(%rbp)
mov $0, 2(%rbp)
mov %rbp, %rdi
mov %rax, %rsi
# rax = 1 is float print
mov $1, %rax
call printf
# getchar() -> rax
call getchar
char * rbp = "%d\0";
int var_6969;
scanf(rbp, &var_6969);
rbp = "%d\0";
printf(rbp, var_6969);
# rbp = "%d\0"
# *(rbp + 0) = '%'
mov $'%', (%rbp)
# *(rbp + 1) = 'd'
mov $'d', 1(%rbp)
# *(rbp + 2) = 0
mov $0, 2(%rbp)
# &var_6969 -> rcx (rcx = &var_6969)
lea -6969(%rbp), %rcx
# rbp -> rdi (1st arg is format str)
mov %rbp, %rdi
# rcx -> rsi (2nd arg is var ref)
mov %rcx, %rsi
# if rax = 1 it will read float
mov $0, %rax
# scanf(rbp, &var_6969)
call scanf
mov $'%', (%rbp)
mov $'d', 1(%rbp)
mov $0, 2(%rbp)
mov %rbp, %rdi
mov -6969(%rbp), %rsi
# if rax = 1 it will print float
mov $0, %rax
# printf(rbp, var_6969)
call printf
int rax, rcx, rdi;
rax = 500;
int * rsp = 69;
char * rsi = &rsp;
*rsi = '\n';
rcx = 10;
int len = 1;
do {
*--rsi = (rax % rcx) + '0';
rax /= rcx;
len++;
} while(rax);
rax = 1;
rdi = 1;
syscall(rax, rdi, rsi, len);
mov $500, %rax
print_num:
lea -1(%rsp), %rsi
movb $10, (%rsi)
# base 10
mov $10, %rcx
print_digit:
xor %rdx, %rdx
div %rcx
add $'0', %rdx
dec %rsi
movb %dl, (%rsi)
test %rax, %rax
jne print_digit
mov $1, %rax
mov $1, %rdi
mov %rsp, %rdx
sub %rsi, %rdx
syscall
int f(int n) {
if (n < 5) {return f(n + 1);}
return n;
}
int main() {
printf("%d", f(1));
}
.globl main
.data
bruh: .string "%d"
.text
main:
push %rbp
mov %rsp, %rbp
mov $1, %rcx
mov %rcx, %rdi
call f
mov $bruh, %rdi
mov %rax, %rsi
mov $0, %rax
call printf
mov %rbp, %rsp
pop %rbp
f:
push %rbp
mov %rsp, %rbp
# will "push" the recursed n value each time by moving the stack pointer up
sub $8, %rsp
mov %rdi, -8(%rbp)
cmpq $4, -8(%rbp)
jg else_ret
mov -8(%rbp), %eax
inc %rax
mov %rax, %rdi
call f
jmp if_ret
else_ret:
mov -8(%rbp), %rax
if_ret:
# allows for the function to backtrack (no idea how it work)
leave
ret