forked from torvalds/linux
-
Notifications
You must be signed in to change notification settings - Fork 0
/
x_unfl.S
268 lines (252 loc) · 6.8 KB
/
x_unfl.S
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
|
| x_unfl.sa 3.4 7/1/91
|
| fpsp_unfl --- FPSP handler for underflow exception
|
| Trap disabled results
| For 881/2 compatibility, sw must denormalize the intermediate
| result, then store the result. Denormalization is accomplished
| by taking the intermediate result (which is always normalized) and
| shifting the mantissa right while incrementing the exponent until
| it is equal to the denormalized exponent for the destination
| format. After denormalization, the result is rounded to the
| destination format.
|
| Trap enabled results
| All trap disabled code applies. In addition the exceptional
| operand needs to made available to the user with a bias of $6000
| added to the exponent.
|
| Copyright (C) Motorola, Inc. 1990
| All Rights Reserved
|
| For details on the license for this file, please see the
| file, README, in this same directory.
X_UNFL: |idnt 2,1 | Motorola 040 Floating Point Software Package
|section 8
#include "fpsp.h"
|xref denorm
|xref round
|xref store
|xref g_rndpr
|xref g_opcls
|xref g_dfmtou
|xref real_unfl
|xref real_inex
|xref fpsp_done
|xref b1238_fix
.global fpsp_unfl
fpsp_unfl:
link %a6,#-LOCAL_SIZE
fsave -(%a7)
moveml %d0-%d1/%a0-%a1,USER_DA(%a6)
fmovemx %fp0-%fp3,USER_FP0(%a6)
fmoveml %fpcr/%fpsr/%fpiar,USER_FPCR(%a6)
|
bsrl unf_res |denormalize, round & store interm op
|
| If underflow exceptions are not enabled, check for inexact
| exception
|
btstb #unfl_bit,FPCR_ENABLE(%a6)
beqs ck_inex
btstb #E3,E_BYTE(%a6)
beqs no_e3_1
|
| Clear dirty bit on dest resister in the frame before branching
| to b1238_fix.
|
bfextu CMDREG3B(%a6){#6:#3},%d0 |get dest reg no
bclrb %d0,FPR_DIRTY_BITS(%a6) |clr dest dirty bit
bsrl b1238_fix |test for bug1238 case
movel USER_FPSR(%a6),FPSR_SHADOW(%a6)
orl #sx_mask,E_BYTE(%a6)
no_e3_1:
moveml USER_DA(%a6),%d0-%d1/%a0-%a1
fmovemx USER_FP0(%a6),%fp0-%fp3
fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
frestore (%a7)+
unlk %a6
bral real_unfl
|
| It is possible to have either inex2 or inex1 exceptions with the
| unfl. If the inex enable bit is set in the FPCR, and either
| inex2 or inex1 occurred, we must clean up and branch to the
| real inex handler.
|
ck_inex:
moveb FPCR_ENABLE(%a6),%d0
andb FPSR_EXCEPT(%a6),%d0
andib #0x3,%d0
beqs unfl_done
|
| Inexact enabled and reported, and we must take an inexact exception
|
take_inex:
btstb #E3,E_BYTE(%a6)
beqs no_e3_2
|
| Clear dirty bit on dest resister in the frame before branching
| to b1238_fix.
|
bfextu CMDREG3B(%a6){#6:#3},%d0 |get dest reg no
bclrb %d0,FPR_DIRTY_BITS(%a6) |clr dest dirty bit
bsrl b1238_fix |test for bug1238 case
movel USER_FPSR(%a6),FPSR_SHADOW(%a6)
orl #sx_mask,E_BYTE(%a6)
no_e3_2:
moveb #INEX_VEC,EXC_VEC+1(%a6)
moveml USER_DA(%a6),%d0-%d1/%a0-%a1
fmovemx USER_FP0(%a6),%fp0-%fp3
fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
frestore (%a7)+
unlk %a6
bral real_inex
unfl_done:
bclrb #E3,E_BYTE(%a6)
beqs e1_set |if set then branch
|
| Clear dirty bit on dest resister in the frame before branching
| to b1238_fix.
|
bfextu CMDREG3B(%a6){#6:#3},%d0 |get dest reg no
bclrb %d0,FPR_DIRTY_BITS(%a6) |clr dest dirty bit
bsrl b1238_fix |test for bug1238 case
movel USER_FPSR(%a6),FPSR_SHADOW(%a6)
orl #sx_mask,E_BYTE(%a6)
moveml USER_DA(%a6),%d0-%d1/%a0-%a1
fmovemx USER_FP0(%a6),%fp0-%fp3
fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
frestore (%a7)+
unlk %a6
bral fpsp_done
e1_set:
moveml USER_DA(%a6),%d0-%d1/%a0-%a1
fmovemx USER_FP0(%a6),%fp0-%fp3
fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
unlk %a6
bral fpsp_done
|
| unf_res --- underflow result calculation
|
unf_res:
bsrl g_rndpr |returns RND_PREC in d0 0=ext,
| ;1=sgl, 2=dbl
| ;we need the RND_PREC in the
| ;upper word for round
movew #0,-(%a7)
movew %d0,-(%a7) |copy RND_PREC to stack
|
|
| If the exception bit set is E3, the exceptional operand from the
| fpu is in WBTEMP; else it is in FPTEMP.
|
btstb #E3,E_BYTE(%a6)
beqs unf_E1
unf_E3:
lea WBTEMP(%a6),%a0 |a0 now points to operand
|
| Test for fsgldiv and fsglmul. If the inst was one of these, then
| force the precision to extended for the denorm routine. Use
| the user's precision for the round routine.
|
movew CMDREG3B(%a6),%d1 |check for fsgldiv or fsglmul
andiw #0x7f,%d1
cmpiw #0x30,%d1 |check for sgldiv
beqs unf_sgl
cmpiw #0x33,%d1 |check for sglmul
bnes unf_cont |if not, use fpcr prec in round
unf_sgl:
clrl %d0
movew #0x1,(%a7) |override g_rndpr precision
| ;force single
bras unf_cont
unf_E1:
lea FPTEMP(%a6),%a0 |a0 now points to operand
unf_cont:
bclrb #sign_bit,LOCAL_EX(%a0) |clear sign bit
sne LOCAL_SGN(%a0) |store sign
bsrl denorm |returns denorm, a0 points to it
|
| WARNING:
| ;d0 has guard,round sticky bit
| ;make sure that it is not corrupted
| ;before it reaches the round subroutine
| ;also ensure that a0 isn't corrupted
|
| Set up d1 for round subroutine d1 contains the PREC/MODE
| information respectively on upper/lower register halves.
|
bfextu FPCR_MODE(%a6){#2:#2},%d1 |get mode from FPCR
| ;mode in lower d1
addl (%a7)+,%d1 |merge PREC/MODE
|
| WARNING: a0 and d0 are assumed to be intact between the denorm and
| round subroutines. All code between these two subroutines
| must not corrupt a0 and d0.
|
|
| Perform Round
| Input: a0 points to input operand
| d0{31:29} has guard, round, sticky
| d1{01:00} has rounding mode
| d1{17:16} has rounding precision
| Output: a0 points to rounded operand
|
bsrl round |returns rounded denorm at (a0)
|
| Differentiate between store to memory vs. store to register
|
unf_store:
bsrl g_opcls |returns opclass in d0{2:0}
cmpib #0x3,%d0
bnes not_opc011
|
| At this point, a store to memory is pending
|
opc011:
bsrl g_dfmtou
tstb %d0
beqs ext_opc011 |If extended, do not subtract
| ;If destination format is sgl/dbl,
tstb LOCAL_HI(%a0) |If rounded result is normal,don't
| ;subtract
bmis ext_opc011
subqw #1,LOCAL_EX(%a0) |account for denorm bias vs.
| ;normalized bias
| ; normalized denormalized
| ;single $7f $7e
| ;double $3ff $3fe
|
ext_opc011:
bsrl store |stores to memory
bras unf_done |finish up
|
| At this point, a store to a float register is pending
|
not_opc011:
bsrl store |stores to float register
| ;a0 is not corrupted on a store to a
| ;float register.
|
| Set the condition codes according to result
|
tstl LOCAL_HI(%a0) |check upper mantissa
bnes ck_sgn
tstl LOCAL_LO(%a0) |check lower mantissa
bnes ck_sgn
bsetb #z_bit,FPSR_CC(%a6) |set condition codes if zero
ck_sgn:
btstb #sign_bit,LOCAL_EX(%a0) |check the sign bit
beqs unf_done
bsetb #neg_bit,FPSR_CC(%a6)
|
| Finish.
|
unf_done:
btstb #inex2_bit,FPSR_EXCEPT(%a6)
beqs no_aunfl
bsetb #aunfl_bit,FPSR_AEXCEPT(%a6)
no_aunfl:
rts
|end