forked from nmlgc/ReC98
-
Notifications
You must be signed in to change notification settings - Fork 0
/
decomp.hpp
187 lines (163 loc) · 7 KB
/
decomp.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
/* ReC98
* -----
* Declarations to help decompiling the seemingly impossible
*/
#define DECOMP_HPP
// Flag comparisons
// ----------------
// When used inside a conditional expression like
// if(FLAGS_*) { goto some_label; | return; }
// these assemble into the single given instruction. Apply the ! operator to
// get the N versions.
#define FLAGS_CARRY (_FLAGS & 0x01) /* JC / JAE / JB */
#define FLAGS_ZERO (_FLAGS & 0x40) /* JZ */
#define FLAGS_SIGN (_FLAGS & 0x80) /* JS */
// ----------------
// Alternate version that doesn't spill the port number to DX
#define outportb2(port, val) _asm { \
mov al, val; \
out port, al; \
}
// Alternate version that sets the value first
#define outport2(port, val) _asm { \
mov ax, val; \
mov dx, port; \
out dx, ax; \
}
// Should just be unwrapped wherever it appears. Code that directly uses T
// would be much cleaner.
template <class T> union StupidBytewiseWrapperAround {
T t;
int8_t byte[sizeof(T)];
uint8_t ubyte[sizeof(T)];
};
// Does exactly the same as Turbo C++'s __memcpy__() intrinsic, just with
// stupidly reordered instructions. Can be replaced with regular copy
// assignment wherever it appears.
#if (GAME == 5)
#define copy_near_struct_member(dst, src, src_type, member) { \
_CX = (sizeof(dst) / sizeof(uint16_t)); \
asm { push ds; pop es; } \
_DI = FP_OFF(&dst); \
_SI = FP_OFF(&src); \
_SI += offsetof(src_type, member); \
asm { rep movsw; } \
}
#else
#define copy_near_struct_member(dst, src, src_type, member) { \
asm { push ds; pop es; } \
_DI = FP_OFF(&dst); \
__memcpy__(MK_FP(_ES, _DI), &src.member, sizeof(dst)); \
}
#endif
// Trying to assign a `near` function to a nearfunc_t_near* outside the group
// of the function will cause a fixup overflow error at link time. The only
// known workaround involves lying to the compiler about the true distance of
// the function, removing any declaration of the function from global scope to
// prevent a redefinition error, and casting away its segment.
// TODO: Might no longer be necessary once we can fully rely on segment names
// for code layout, and don't have to mess with groups anymore.
#define set_nearfunc_ptr_to_farfunc(ptr, func) { \
void pascal far func(void); \
ptr = reinterpret_cast<nearfunc_t_near>(func); \
}
// poke() versions that actually inline with pseudoregisters
// ---------------------------------------------------------
#define pokew(sgm, off, val) { *(uint16_t far *)(MK_FP(sgm, off)) = val; }
// Turbo C++ 4.0 generates wrong segment prefix opcodes for the _FS and _GS
// pseudoregisters - 0x46 (INC SI) and 0x4E (DEC SI) rather than the correct
// 0x64 and 0x65, respectively. These prefixes are also not supported in
// inline assembly, which is limited to pre-386 anyway. Compiling via assembly
// (`#pragma inline`) would work and generate the correct instructions here,
// but that would incur yet another dependency on a 16-bit TASM, for something
// honestly quite insignificant.
//
// So, can we somehow work around this issue while retaining the readability
// of the usage code and pretending that this bug doesn't exist? Comparisons
// with segment registers unfortunately don't inline, so something like
// if(sgm == _FS)
// wouldn't work, even inside a macro that replaces [sgm] with _FS. But since
// __emit__() *does* inline, we can use function templates! The default
// versions provide the regularly intended C code for all other registers,
// while explicit specializations for _FS and _GS __emit__() the correct
// instruction opcodes for all offset registers needed. Then, we only need to
// somehow move the pseudoregisters up into the type system... which can
// simply be done by turning them into class names via preprocessor token
// pasting. Sure, this limits this approach to raw registers with no immediate
// offsets, but let's hope we won't ever need those...
//
// Also, hey, no need for the MK_FP() macro if we directly return the correct
// types.
#if defined(__TURBOC__) && defined(__MSDOS__)
// Declared in <dos.h> in these compilers.
void __emit__(uint8_t __byte, ...);
#endif
struct Decomp_ES { void __seg* value() { return (void __seg *)(_ES); } };
struct Decomp_FS { void __seg* value() { return (void __seg *)(_FS); } };
struct Decomp_GS { void __seg* value() { return (void __seg *)(_GS); } };
struct Decomp_DI { void __near* value() { return (void __near *)(_DI); } };
// Removing [val] from the parameter lists of the template functions below
// perfects the inlining.
#define poked(sgm, off, val) \
_EAX = val; \
poked_eax((Decomp##sgm *)nullptr, (Decomp##off *)nullptr, (uint8_t)(0x89));
#define poke_or_d(sgm, off, val) \
_EAX = val; \
poked_eax((Decomp##sgm *)nullptr, (Decomp##off *)nullptr, (uint8_t)(0x09));
template <class Segment, class Offset> inline void poked_eax(
Segment *sgm, Offset *off, uint8_t op
) {
if(op == 0x89) {
*(uint32_t far *)(sgm->value() + off->value()) = _EAX;
} else if(op == 0x09) {
*(uint32_t far *)(sgm->value() + off->value()) |= _EAX;
}
}
inline void poked_eax(Decomp_FS *sgm, Decomp_DI *off, uint8_t op) {
__emit__(0x66, 0x64, op, 0x05); // [op] FS:[DI], EAX
}
inline void poked_eax(Decomp_GS *sgm, Decomp_DI *off, uint8_t op) {
__emit__(0x66, 0x65, op, 0x05); // [op] GS:[DI], EAX
}
// ---------------------------------------------------------
// Circumventing compiler optimizations
// ------------------------------------
// If you don't want to recreate the code layout of the original PC-98
// binaries, these can be safely deleted. They just make the code worse.
#if defined(__TURBOC__) && defined(__MSDOS__)
// Use this function wherever the original code used a immediate 0 literal
// that Turbo C++ would optimize away, e.g. in register assignments
// (_AX = 0 → XOR AX, AX) or comparisons (_AX == 0 → OR AX, AX). This way,
// the compiler is forced to leave space for any potential offset, with the
// literal 0 then being spelled out by the linker.
template <class T> inline T keep_0(T x) {
if(x == 0) {
extern void *near address_0;
return reinterpret_cast<T>(&address_0);
}
return x;
}
// Bypasses the -Z -3 function parameter optimization, where [x] would be
// combined with any potential subsequent 16-bit parameter adjacent in
// memory to form a 32-bit PUSH.
// (Interestingly, using a template function inlines either too well or
// too badly. Only this macro guarantees the intended 16-bit PUSH to be
// consistently emitted.)
#define inhibit_Z3(x) \
*reinterpret_cast<int16_t near *>(reinterpret_cast<uint16_t>(&x))
// Calling an empty inlined function prevents certain jump optimizations.
inline void optimization_barrier(void) {
}
#else
#define keep_0(x) x
#define inhibit_Z3(x) x
#define optimization_barrier()
#endif
// ------------------------------------
// Neither WAIT nor FWAIT emit the emulated WAIT we want...
#define FWAIT_EMU db 0xCD, 0x3D;
// 32-bit ASM instructions not supported by Turbo C++ 4.0J's built-in
// assembler. Makes no sense to compile with `#pragma inline` (and thus,
// require a 16-bit TASM) just for those.
#define MOVSD __emit__(0x66, 0xA5);
#define REP __emit__(0xF3);