Skip to content

Commit

Permalink
Architecture updater (auto-sync) - Updating ARM (capstone-engine#1949)
Browse files Browse the repository at this point in the history
* Add auto-sync updater.

* Update Capstone core with auto-sync changes.

* Update ARM via auto-sync.

* Make changes to arch modules which are introduced by auto-sync.

* Update tests for ARM.

* Fix build warnings for make

* Remove meson.build

* Print shift amount in decimal

* Patch non LLVM register alias.

* Change type of immediate operand to unsiged (due to: capstone-engine#771)

* Replace all occurances of a register with its alias.

* Fix printing of signed imms

* Print rotate amount in decimal

* CHange imm type to int64_t to match LLVM imm type.

* Fix search for register names, by completing string first.

* Print ModImm operands always in decimal

* Use number format of previous capstone version.

* Correct implicit writes and update_flags according to SBit.

* Add missing test for RegImmShift

* Reverse incorrect comparision.

* Set shift information for move instructions.

* Set mem access for all memory operands

* Set subtracted flag if offset is negative.

* Add flag for post-index memory operands.

* Add detail op for BX_RET and MOVPCLR

* Use instruction post_index operand.

* Add VPOP and VPUSH as unique CS IDs.

* Add shifting info for MOVsr.

* Add TODOs.

* Add in LLVM hardcoded operands to detail.

* Move detail editing from InstPrinter to Mapping

* Formatting

* Add removed check.

* Add writeback register and constraints to RFEI instructions.

* Translate shift immediate

* Print negative immediates

* Remove duplicate invalid entry

* Add CS groups to instructions

* Fix write attriutes of stores.

* Add missing names of added instructions

* Fix LLVM bug

* Add more post_index flags

* http -> https

* Make generated functions static

* Remove tab prefix for alias instructions.

* Set ValidateMCOperand to NULL.

* Fix AddrMode3Operand operands

* Allow getting system and banked register name via API

* Add writeback to STC/LDC instructions.

* Fix (hopefully) last case where disp is negative and subtracted = true

* Remove accidentially introduced regressions
  • Loading branch information
Rot127 authored Jul 19, 2023
1 parent be5a26c commit 104f693
Show file tree
Hide file tree
Showing 241 changed files with 133,011 additions and 35,573 deletions.
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "suite/auto-sync/vendor/tree-sitter-cpp"]
path = suite/auto-sync/vendor/tree-sitter-cpp
url = https://github.com/tree-sitter/tree-sitter-cpp.git
18 changes: 12 additions & 6 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ set(SOURCES_ENGINE
Mapping.c
MCInst.c
MCInstrDesc.c
MCInstPrinter.c
MCRegisterInfo.c
SStream.c
utils.c
Expand All @@ -115,6 +116,7 @@ set(HEADERS_ENGINE
MCFixedLenDisassembler.h
MCInst.h
MCInstrDesc.h
MCInstPrinter.h
MCRegisterInfo.h
SStream.h
utils.h
Expand All @@ -141,6 +143,7 @@ set(HEADERS_COMMON
include/capstone/sh.h
include/capstone/tricore.h
include/capstone/platform.h
include/capstone/sh.h
)

set(TEST_SOURCES test_basic.c test_detail.c test_skipdata.c test_iter.c)
Expand All @@ -149,28 +152,30 @@ set(TEST_SOURCES test_basic.c test_detail.c test_skipdata.c test_iter.c)
if(CAPSTONE_ARM_SUPPORT)
add_definitions(-DCAPSTONE_HAS_ARM)
set(SOURCES_ARM
arch/ARM/ARMBaseInfo.c
arch/ARM/ARMDisassembler.c
arch/ARM/ARMDisassemblerExtension.c
arch/ARM/ARMInstPrinter.c
arch/ARM/ARMMapping.c
arch/ARM/ARMModule.c
)
set(HEADERS_ARM
arch/ARM/ARMAddressingModes.h
arch/ARM/ARMBaseInfo.h
arch/ARM/ARMDisassembler.h
arch/ARM/ARMDisassemblerExtension.h
arch/ARM/ARMInstPrinter.h
arch/ARM/ARMLinkage.h
arch/ARM/ARMMapping.h
arch/ARM/ARMGenAsmWriter.inc
arch/ARM/ARMGenDisassemblerTables.inc
arch/ARM/ARMGenInstrInfo.inc
arch/ARM/ARMGenRegisterInfo.inc
arch/ARM/ARMGenSubtargetInfo.inc
arch/ARM/ARMMappingInsn.inc
arch/ARM/ARMMappingInsnOp.inc
arch/ARM/ARMGenRegisterName.inc
arch/ARM/ARMGenRegisterName_digit.inc
arch/ARM/ARMGenCSFeatureName.inc
arch/ARM/ARMGenCSMappingInsn.inc
arch/ARM/ARMGenCSMappingInsnOp.inc
arch/ARM/ARMGenCSMappingInsnName.inc
arch/ARM/ARMGenSystemRegister.inc
arch/ARM/ARMMappingInsnName.inc
)
set(TEST_SOURCES ${TEST_SOURCES} test_arm.c)
endif()
Expand Down Expand Up @@ -696,6 +701,7 @@ if(CAPSTONE_INSTALL)
include("GNUInstallDirs")

install(FILES ${HEADERS_COMMON} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/capstone)
install(FILES ${HEADERS_INC} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/capstone/inc)

configure_file(capstone.pc.in ${CMAKE_BINARY_DIR}/capstone.pc @ONLY)
install(FILES ${CMAKE_BINARY_DIR}/capstone.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
Expand Down
25 changes: 25 additions & 0 deletions HACK.TXT
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,35 @@ Coding style
- C code follows Linux kernel coding style, using tabs for indentation.
- Python code uses 4 spaces for indentation.

Updating an Architecture
------------------------

The update tool for Capstone is called `auto-sync` and can be found in `suite/auto-sync`.

Not all architectures are supported yet.
Run `suite/auto-sync/Update-Arch.sh -h` to get a list of currently supported architectures.

The documentation how to update with `auto-sync` or refactor an architecture module
can be found in [docs/AutoSync.md](docs/AutoSync.md).

If a module does not support `auto-sync` yet, it is highly recommended to refactor it
instead of attempting to update it manually.
Refactoring will take less time and updates it during the procedure.

The one exception is `x86`. In LLVM we use several emitter backends to generate C code.
One of those LLVM backends (the `DecoderEmitter`) has two versions.
One for `x86` and another for all the other architectures.
Until now it was not worth it to refactoring this unique `x86` backend. So `x86` is not
supported currently.

Adding an architecture
----------------------

If your architecture is supported in LLVM or one of its forks, you can use `auto-sync` to
add the new module.

<!-- TODO: Move this info to the auto-sync docs -->

Obviously, you first need to write all the logic and put it in a new directory arch/newarch
Then, you have to modify other files.
(You can look for one architecture such as EVM in these files to get what you need to do)
Expand Down
14 changes: 13 additions & 1 deletion MCInst.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ void MCInst_Init(MCInst *inst)
inst->size = 0;
inst->has_imm = false;
inst->op1_size = 0;
inst->writeback = false;
inst->ac_idx = 0;
inst->popcode_adjust = 0;
inst->assembly[0] = '\0';
Expand Down Expand Up @@ -268,3 +267,16 @@ bool MCInst_opIsTying(const MCInst *MI, unsigned OpNum)
assert(OpNum < MAX_MC_OPS && "Maximum number of MC operands exceeded.");
return MI->tied_op_idx[OpNum] != -1;
}

/// Returns the value of the @MCInst operand at index @OpNum.
uint64_t MCInst_getOpVal(MCInst *MI, unsigned OpNum)
{
assert(OpNum < MAX_MC_OPS);
MCOperand *op = MCInst_getOperand(MI, OpNum);
if (MCOperand_isReg(op))
return MCOperand_getReg(op);
else if (MCOperand_isImm(op))
return MCOperand_getImm(op);
else
assert(0 && "Operand type not handled in this getter.");
}
3 changes: 2 additions & 1 deletion MCInst.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ struct MCOperand {
kDFPImmediate, ///< Double-Floating-point immediate operand.
kExpr, ///< Relocatable immediate operand.
kInst ///< Sub-instruction operand.

} MachineOperandType;
unsigned char Kind;

Expand Down Expand Up @@ -162,4 +161,6 @@ bool MCInst_opIsTied(const MCInst *MI, unsigned OpNum);

bool MCInst_opIsTying(const MCInst *MI, unsigned OpNum);

uint64_t MCInst_getOpVal(MCInst *MI, unsigned OpNum);

#endif
227 changes: 227 additions & 0 deletions MCInstPrinter.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,227 @@
/* Capstone Disassembly Engine */
/* By Rot127 <[email protected]>, 2023 */

#include "MCInstPrinter.h"
#include "cs_priv.h"
#include <capstone/platform.h>

extern bool ARM_getFeatureBits(unsigned int mode, unsigned int feature);

static bool testFeatureBits(const MCInst *MI, uint32_t Value)
{
assert(MI && MI->csh);
switch (MI->csh->arch) {
default:
assert(0 && "Not implemented for current arch.");
case CS_ARCH_ARM:
return ARM_getFeatureBits(MI->csh->mode, Value);
}
}

static bool matchAliasCondition(MCInst *MI, const MCRegisterInfo *MRI,
unsigned *OpIdx, const AliasMatchingData *M,
const AliasPatternCond *C,
bool *OrPredicateResult)
{
// Feature tests are special, they don't consume operands.
if (C->Kind == AliasPatternCond_K_Feature)
return testFeatureBits(MI, C->Value);
if (C->Kind == AliasPatternCond_K_NegFeature)
return !testFeatureBits(MI, C->Value);
// For feature tests where just one feature is required in a list, set the
// predicate result bit to whether the expression will return true, and only
// return the real result at the end of list marker.
if (C->Kind == AliasPatternCond_K_OrFeature) {
*OrPredicateResult |= testFeatureBits(MI, C->Value);
return true;
}
if (C->Kind == AliasPatternCond_K_OrNegFeature) {
*OrPredicateResult |= !(testFeatureBits(MI, C->Value));
return true;
}
if (C->Kind == AliasPatternCond_K_EndOrFeatures) {
bool Res = *OrPredicateResult;
*OrPredicateResult = false;
return Res;
}

// Get and consume an operand.
MCOperand *Opnd = MCInst_getOperand(MI, *OpIdx);
++(*OpIdx);

// Check the specific condition for the operand.
switch (C->Kind) {
case AliasPatternCond_K_Imm:
// Operand must be a specific immediate.
return MCOperand_isImm(Opnd) &&
MCOperand_getImm(Opnd) == (int32_t)C->Value;
case AliasPatternCond_K_Reg:
// Operand must be a specific register.
return MCOperand_isReg(Opnd) && MCOperand_getReg(Opnd) == C->Value;
case AliasPatternCond_K_TiedReg:
// Operand must match the register of another operand.
return MCOperand_isReg(Opnd) &&
MCOperand_getReg(Opnd) ==
MCOperand_getReg(MCInst_getOperand(MI, C->Value));
case AliasPatternCond_K_RegClass:
// Operand must be a register in this class. Value is a register class
// id.
return MCOperand_isReg(Opnd) &&
MCRegisterClass_contains(
MCRegisterInfo_getRegClass(MRI, C->Value),
MCOperand_getReg(Opnd));
case AliasPatternCond_K_Custom:
// Operand must match some custom criteria.
assert(M->ValidateMCOperand && "A custom validator should be set but isn't.");
return M->ValidateMCOperand(Opnd, C->Value);
case AliasPatternCond_K_Ignore:
// Operand can be anything.
return true;
case AliasPatternCond_K_Feature:
case AliasPatternCond_K_NegFeature:
case AliasPatternCond_K_OrFeature:
case AliasPatternCond_K_OrNegFeature:
case AliasPatternCond_K_EndOrFeatures:
assert(0 && "handled earlier");
}
assert(0 && "invalid kind");
}

/// Check if PatternsForOpcode is all zero.
static inline bool validOpToPatter(const PatternsForOpcode *P)
{
return !(P->Opcode == 0 && P->PatternStart == 0 && P->NumPatterns == 0);
}

const char *matchAliasPatterns(MCInst *MI, const AliasMatchingData *M)
{
// TODO Rewrite to C

// auto It = lower_bound(M.OpToPatterns, MI->getOpcode(),
// [](const PatternsForOpcode &L, unsigned Opcode) {
// return L.Opcode < Opcode;
// });
// if (It == M.OpToPatterns.end() || It->Opcode != MI->getOpcode())
// return nullptr;

// Binary search by opcode. Return false if there are no aliases for this
// opcode.
unsigned MIOpcode = MI->Opcode;
size_t i = 0;
uint32_t PatternOpcode = M->OpToPatterns[i].Opcode;
while (PatternOpcode < MIOpcode && validOpToPatter(&M->OpToPatterns[i]))
PatternOpcode = M->OpToPatterns[++i].Opcode;
if (PatternOpcode != MI->Opcode || !validOpToPatter(&M->OpToPatterns[i]))
return NULL;

// // Try all patterns for this opcode.
uint32_t AsmStrOffset = ~0U;
const AliasPattern *Patterns = M->Patterns + M->OpToPatterns[i].PatternStart;
for (const AliasPattern *P = Patterns;
P != Patterns + M->OpToPatterns[i].NumPatterns; ++P) {
// Check operand count first.
if (MCInst_getNumOperands(MI) != P->NumOperands)
return NULL;

// Test all conditions for this pattern.
const AliasPatternCond *Conds = M->PatternConds + P->AliasCondStart;
unsigned OpIdx = 0;
bool OrPredicateResult = false;
bool allMatch = true;
for (const AliasPatternCond *C = Conds; C != Conds + P->NumConds; ++C) {
if (!matchAliasCondition(MI, MI->MRI, &OpIdx, M, C, &OrPredicateResult)) {
allMatch = false;
break;
}
}
if (allMatch) {
AsmStrOffset = P->AsmStrOffset;
break;
}
}
// If no alias matched, don't print an alias.
if (AsmStrOffset == ~0U)
return NULL;

// Go to offset AsmStrOffset and use the null terminated string there. The
// offset should point to the beginning of an alias string, so it should
// either be zero or be preceded by a null byte.
return M->AsmStrings + AsmStrOffset;
}

// TODO Add functionality to toggle the flag.
bool getUseMarkup(void) { return false; }

/// Utility functions to make adding mark ups simpler.
const char *markup(const char *s)
{
static const char *no_markup = "";
if (getUseMarkup())
return s;
else
return no_markup;
}

// binary search for encoding in IndexType array
// return -1 if not found, or index if found
unsigned int binsearch_IndexTypeEncoding(const struct IndexType *index, size_t size, uint16_t encoding)
{
// binary searching since the index is sorted in encoding order
size_t left, right, m;

right = size - 1;

if (encoding < index[0].encoding || encoding > index[right].encoding)
// not found
return -1;

left = 0;

while(left <= right) {
m = (left + right) / 2;
if (encoding == index[m].encoding) {
return m;
}

if (encoding < index[m].encoding)
right = m - 1;
else
left = m + 1;
}

// not found
return -1;
}

// binary search for encoding in IndexTypeStr array
// return -1 if not found, or index if found
unsigned int binsearch_IndexTypeStrEncoding(const struct IndexTypeStr *index, size_t size, const char *name)
{
// binary searching since the index is sorted in encoding order
size_t left, right, m;

right = size - 1;

size_t str_left_cmp = strcmp(name, index[0].name);
size_t str_right_cmp = strcmp(name, index[right].name);
if (str_left_cmp < 0 || str_right_cmp > 0)
// not found
return -1;

left = 0;

while(left <= right) {
m = (left + right) / 2;
if (strcmp(name, index[m].name) == 0) {
return m;
}

if (strcmp(name, index[m].name) < 0)
right = m - 1;
else
left = m + 1;
}

// not found
return -1;
}
Loading

0 comments on commit 104f693

Please sign in to comment.