Implementing figFORTH on SH3 assembler
Revision | e1e44e82d09c4a090d82195ddd07e19bbe97036e (tree) |
---|---|
Time | 2014-03-07 14:08:02 |
Author | Joel Matthew Rees <reiisi@user...> |
Commiter | Joel Matthew Rees |
U*, U/ (buggy)
@@ -33,11 +33,61 @@ | ||
33 | 33 | |
34 | 34 | .cpu sh3 |
35 | 35 | |
36 | +; For huge things, like U/ (USLASH) | |
37 | +PRIORITY_SIZE: .DEFINE "1" | |
38 | + | |
39 | + | |
36 | 40 | NATURAL_SIZE: .equ 4 ; 4 byte word |
37 | 41 | HALF_SIZE: .equ ( NATURAL_SIZE / 2 ) |
38 | 42 | ALIGN_MASK: .equ ( H'FF & ~(NATURAL_SIZE - 1) ) |
39 | 43 | ALL_BITS8: .equ ( H'FF & -1 ) |
40 | 44 | |
45 | +; byte offsets for various parts of significance | |
46 | +; In the SH3, natural width is 32 bits, half-width is 16, byte is 8. | |
47 | +; | |
48 | + .AIFDEF _LIT ; (least significant byte first) | |
49 | +LSHALF: .equ 0 | |
50 | +LSBYTEinNAT: .equ 0 | |
51 | +LSBYTEinHALF: .equ 0 | |
52 | +MSHALF: .equ HALF_SIZE | |
53 | +MSBYTEinNAT: .equ ( NATURAL_SIZE - 1 ) | |
54 | +MSBYTEinHALF: .equ ( HALF_SIZE - 1 ) | |
55 | + .AELSE ; _BIG is default ; (most significant byte first) | |
56 | +LSHALF: .equ HALF_SIZE | |
57 | +LSBYTEinNAT: .equ ( NATURAL_SIZE - 1 ) | |
58 | +LSBYTEinHALF: .equ ( HALF_SIZE - 1 ) | |
59 | +MSHALF: .equ 0 | |
60 | +MSBYTEinNAT: .equ 0 | |
61 | +MSBYTEinHALF: .equ 0 | |
62 | + .AENDI | |
63 | +; | |
64 | +; Convenience definitions? -- Or just confusing? | |
65 | +BYTE_hLO_bLO: .equ LSBYTEinNAT | |
66 | +BYTE_hLO_bHI: .equ ( LSHALF + MSBYTEinHALF ) ; high byte of low half | |
67 | +BYTE_hHI_bLO: .equ ( MSHALF + LSBYTEinHALF ) ; low byte of high half | |
68 | +BYTE_hHI_bHI: .equ MSBYTEinNAT | |
69 | +; | |
70 | +; The word "word" is soooooo confusing! | |
71 | +; (Word == half in many people's minds, but "word" has other meanings, especially here.) | |
72 | +; (Long == natural in many people's minds, but what happens when 64 bits happen?) | |
73 | +; | |
74 | +; ****************************************************************************** | |
75 | +; * ***** VERY IMPORTANT NOTE on byte/word order ***** | |
76 | +; * | |
77 | +; * FORTH allows the CPU's natural byte order. | |
78 | +; * | |
79 | +; * ****** BUT! ****** | |
80 | +; * | |
81 | +; * fig-FORTH assumes that | |
82 | +; * DOUBLE natural width integers | |
83 | +; * will be Most-Significant-word-First in memory. | |
84 | +; * | |
85 | +; * So, even on CPUs running least-significant-byte-first, | |
86 | +; * DOUBLE words will still be stored most-significant-natural-word-first. | |
87 | +; * | |
88 | +; * (Don't play games with this. It's bad mojo.) | |
89 | +; ****************************************************************************** | |
90 | + | |
41 | 91 | |
42 | 92 | ; The SH-3 has modern dev tools, so we don't have to pay much attention |
43 | 93 | ; to memory layout, |
@@ -214,9 +264,9 @@ _s\characteristic: .sdata .substr("\name", 0, .len("\name")-1) | ||
214 | 264 | _PREVNAME: .assign _s\characteristic |
215 | 265 | ; Point to the characteristic code for this Word (symbol) to execute. |
216 | 266 | \characteristic .equ $ |
217 | - .data.l _p\characteristic | |
267 | + .data.l _f\characteristic | |
218 | 268 | ; Point to the "parameter" area of the symbol. |
219 | -_p\characteristic .equ $ | |
269 | +_f\characteristic .equ $ | |
220 | 270 | ; This area will contain executable code for primitive (leaf) definitions. |
221 | 271 | ; It will contain a list of virtual instructions for non-primitive (non-leaf) definitions. |
222 | 272 | ; For language global/static constants and variables, it will contain the actual value. |
@@ -240,6 +290,13 @@ exit\@: | ||
240 | 290 | .endm |
241 | 291 | |
242 | 292 | |
293 | +; Utility macros: | |
294 | +; These should help reduce errors. | |
295 | + .macro mTARGET target | |
296 | + .data.l \target-$-NATURAL_SIZE | |
297 | + .endm | |
298 | + | |
299 | + | |
243 | 300 | ; Convenience macros: |
244 | 301 | ; *** Use these instead of stealing code. |
245 | 302 | ; Branch to address is limited to +- 4k, |
@@ -41,31 +41,161 @@ | ||
41 | 41 | ; U* ( u1 u2 --- ud ) |
42 | 42 | ; Multiplies the top two unsigned integers, yielding a double |
43 | 43 | ; integer product. |
44 | -; SH3 MAC is a signed multiply/add, so we can't cheat on U*. | |
45 | -; If we cheat and use memory access to grab half words, | |
46 | -; we have to know whether we are LSB or MSB first. | |
44 | +; | |
45 | +; Rejoice, there is a double unsigned multiply! | |
46 | +; | |
47 | +; ***** FORTH order for double wide is most-significant-first! | |
47 | 48 | ; |
48 | 49 | HEADER U*, USTAR |
49 | - .AIFDEF _LIT | |
50 | - mov.w @fSP, r0 | |
51 | - mov.w @(NATURAL_SIZE,fSP), r1 | |
52 | - .etc | |
53 | - .AELSE ; _BIG is default | |
54 | - mov.w @(HALF_SIZE,fSP), r0 | |
55 | - mov.w @(NATURAL_SIZE+HALF_SIZE,fSP), r1 | |
50 | + mov.l @fSP+, r1 | |
51 | + mov.l @fSP+, r0 | |
52 | + dmulu.l r1, r0 | |
53 | + sts.l macl, @-fSP | |
54 | + rts | |
55 | + sts.l mach, @-fSP | |
56 | 56 | |
57 | - .AENDI | |
57 | + | |
58 | +; Put this close to the test, so that we don't worry about the .AREPEAT | |
59 | +PUDIVover: | |
60 | + mov.b #-1, r0 ; Or we could trap this, if we take the time to define traps. | |
61 | + mov.l r0, @fSP | |
62 | + rts | |
63 | + mov.l r0, @(NATURAL_SIZE,fSP) | |
64 | +; | |
65 | +; (UDIV) ( ud u --- uquotient ) | |
66 | +; Divides the top unsigned integer into the second and third words | |
67 | +; on the stack as a single unsigned double integer, | |
68 | +; leaving only the quotient as an unsigned integer. | |
69 | +; | |
70 | +; The smaller the divisor, the more likely dropping the high word | |
71 | +; of the quotient loses significant bits. | |
72 | +; | |
73 | +; The SH3 manual seems to indicate that we can't trust the remainder | |
74 | +; to remain a true remainder to the end. | |
75 | +; It strongly recommends using multiply-subtract instead, | |
76 | +; to get the remainder. | |
77 | +; | |
78 | +; ***** FORTH order for double wide is most-significant-first! | |
79 | +; | |
80 | + .AIFDEF PRIORITY_SIZE | |
81 | +DIVIDELENGTH: .DEFINE "16" ; repeat count * 2 cycles * count in r3 | |
82 | + .AELSE | |
83 | +DIVIDELENGTH: .DEFINE "32" ; repeat count * 2 cycles | |
84 | + .AENDI | |
85 | +; | |
86 | + HEADER (UDIV), PUDIV | |
87 | + mov.l @fSP+, r2 ; divisor | |
88 | + mov.l @fSP+, r0 ; dividend high part | |
89 | + cmp/hs r2, r0 ; zero divide or overflow? | |
90 | + bt PUDIVover | |
91 | + mov.l @fSP, r1 ; dividend low part | |
92 | + .AIFDEF PRIORITY_SIZE | |
93 | + mov.b #2, r3 ; Trade speed for size | |
94 | + .AENDI | |
95 | + div0u ; Get the flags ready | |
96 | +PUDIVloop: | |
97 | + .AREPEAT DIVIDELENGTH | |
98 | + rotcl r1 | |
99 | + div1 r2, r0 | |
100 | + .AENDR | |
101 | + .AIFDEF PRIORITY_SIZE | |
102 | + dt r3 ; + 4 cycles * count in r3 | |
103 | + bf PUDIVloop | |
104 | + .AENDI | |
105 | + rotcl r1 | |
106 | + rts | |
107 | + mov.l r1, @fSP | |
108 | + | |
109 | + | |
110 | +; U/ ( ud u --- uremainder uquotient ) | |
111 | +; Divides the top unsigned integer into the second and third words | |
112 | +; on the stack as a single unsigned double integer, leaving the | |
113 | +; remainder and quotient (quotient on top) as unsigned integers. | |
114 | +; | |
115 | +; The smaller the divisor, the more likely dropping the high word | |
116 | +; of the quotient loses significant bits. | |
117 | +; | |
118 | +; ***** FORTH order for double wide is most-significant-first! | |
119 | +; | |
120 | + HEADER U/, USLASH | |
121 | + mov.l @(2*NATURAL_SIZE,fSP), r0 | |
122 | + mov.l r0, @-fSP | |
123 | + mov.l @(2*NATURAL_SIZE,fSP), r0 | |
124 | + mov.l r0, @-fSP | |
125 | + mov.l @(2*NATURAL_SIZE,fSP), r0 | |
126 | + bsr _fPUDIV | |
127 | + mov.l r0, @-fSP ; Save the divisor as we go. | |
128 | +; | |
129 | + mov.l @fSP+, fW ; grab the quotient | |
130 | + mov fW, r2 | |
131 | + mov.l @fSP+, r1 ; grab the divisor | |
132 | + dmulu.l r1, r2 ; multiply quotient * divisor | |
133 | + sts.l macl, @-fSP | |
134 | + bsr _fDSUB | |
135 | + sts.l mach, @-fSP ; Store most significant as we go. | |
136 | +; | |
137 | + mov.l @fSP, r0 ; remainder | |
138 | + mov.l r0, @(NATURAL_SIZE,fSP) | |
139 | + rts | |
140 | + mov.l fW, @fSP | |
141 | + | |
58 | 142 | |
59 | 143 | ; + ( n1 n2 --- n1+n2 ) |
60 | 144 | ; Add top two words. |
61 | 145 | ; |
62 | 146 | HEADER +, PLUS |
63 | - mov.l @fSP+, r0 | |
64 | - mov.l @fSP, r1 | |
147 | + mov.l @fSP+, r1 | |
148 | + mov.l @fSP, r0 | |
65 | 149 | add r1, r0 |
66 | 150 | rts |
67 | 151 | mov.l r0, @fSP |
68 | 152 | |
69 | 153 | |
154 | +; D+ ( d1 d2 --- d1+d2 ) | |
155 | +; Add top two double words, leaving the double sum. | |
156 | +; | |
157 | +; ***** FORTH order for double wide is most-significant-first! | |
158 | +; | |
159 | + HEADER D+, DPLUS | |
160 | + mov.l @fSP+, r2 ; high part | |
161 | + mov.l @fSP+, r3 ; low part | |
162 | + mov.l @(NATURAL_SIZE,fSP), r1 ; high part | |
163 | + mov.l @fSP, r0 ; low part | |
164 | + clrt | |
165 | + addc r3, r1 | |
166 | + addc r2, r0 | |
167 | + mov.l r1, @(NATURAL_SIZE,fSP) | |
168 | + rts | |
169 | + mov.l r0, @fSP | |
170 | + | |
70 | 171 | |
172 | +; - ( n1 n2 --- n1-n2 ) | |
173 | +; Subtract top word from second, leaving the difference. | |
174 | +; | |
175 | + HEADER -, SUB | |
176 | + mov.l @fSP+, r1 | |
177 | + mov.l @fSP, r0 | |
178 | + sub r1, r0 | |
179 | + rts | |
180 | + mov.l r0, @fSP | |
181 | + | |
182 | + | |
183 | +; D- ( d1 d2 --- d1+d2 ) | |
184 | +; Subtract top double from second, leaving the double difference. | |
185 | +; | |
186 | +; ***** FORTH order for double wide is most-significant-first! | |
187 | +; | |
188 | + HEADER D-, DSUB | |
189 | + mov.l @fSP+, r2 ; high part | |
190 | + mov.l @fSP+, r3 ; low part | |
191 | + mov.l @(NATURAL_SIZE,fSP), r1 ; high part | |
192 | + mov.l @fSP, r0 ; low part | |
193 | + clrt | |
194 | + subc r3, r1 | |
195 | + subc r2, r0 | |
196 | + mov.l r1, @(NATURAL_SIZE,fSP) | |
197 | + rts | |
198 | + mov.l r0, @fSP | |
199 | + | |
200 | + | |
71 | 201 |
@@ -28,12 +28,14 @@ | ||
28 | 28 | ; Increment the OUT per USER variable. |
29 | 29 | ; |
30 | 30 | HEADER EMIT, EMIT |
31 | - mov.l #PEMIT, r1 ; May be within range of absolute call? | |
31 | + sts.l PR, @-fRP | |
32 | + mov.l #_fPEMIT, r1 ; May be within range of absolute call? | |
32 | 33 | jsr @r1 |
33 | 34 | nop |
34 | 35 | mov.l #XOUT, r0 ; We defined XOUT as the offset itself. |
35 | 36 | mov.l @(r0,fUP), r1 |
36 | 37 | add #1, r1 |
38 | + lds.l @fRP+, PR | |
37 | 39 | rts |
38 | 40 | mov.l r1, @(r0,fUP) |
39 | 41 |
@@ -42,12 +44,14 @@ | ||
42 | 44 | ; Leave the ascii value of the next terminal key struck. |
43 | 45 | ; |
44 | 46 | HEADER KEY, KEY |
45 | - mov.l #PKEY, r1 ; May be within range of absolute call? | |
47 | + sts.l PR, @-fRP | |
48 | + mov.l #_fPKEY, r1 ; May be within range of absolute call? | |
46 | 49 | jsr @r1 |
47 | 50 | nop |
48 | 51 | mov.l @fSP, r1 |
49 | 52 | mov.l #H'000000ff, r0 |
50 | 53 | and r1, r0 |
54 | + lds.l @fRP+, PR | |
51 | 55 | rts |
52 | 56 | mov.l r0, @fSP |
53 | 57 |
@@ -62,9 +66,11 @@ | ||
62 | 66 | ; and may not give exactly these results. |
63 | 67 | ; |
64 | 68 | HEADER ?TERMINAL, QTERM |
65 | - mov.l #PQTER, r1 ; May be within range of absolute call? | |
69 | + sts.l PR, @-fRP | |
70 | + mov.l #_fPQTER, r1 ; May be within range of absolute call? | |
66 | 71 | jsr @r1 |
67 | 72 | nop ; Might need to filter results? |
73 | + lds.l @fRP+, PR | |
68 | 74 | rts |
69 | 75 | nop |
70 | 76 |
@@ -74,9 +80,11 @@ | ||
74 | 80 | ; device. |
75 | 81 | ; |
76 | 82 | HEADER CR, CR |
77 | - mov.l #PCR, r1 ; May be within range of absolute call? | |
83 | + sts.l PR, @-fRP | |
84 | + mov.l #_fPCR, r1 ; May be within range of absolute call? | |
78 | 85 | jsr @r1 |
79 | 86 | nop ; Might push a CR and EMIT, then a LF and EMIT? |
87 | + lds.l @fRP+, PR | |
80 | 88 | rts |
81 | 89 | nop |
82 | 90 |
@@ -5,9 +5,10 @@ | ||
5 | 5 | ; 2014.02.28 |
6 | 6 | |
7 | 7 | .include "context.inc" |
8 | - | |
8 | + | |
9 | 9 | |
10 | 10 | .section main, code, locate=h'8c000000 |
11 | + | |
11 | 12 | .org $ |
12 | 13 | COLD: |
13 | 14 | mov.l #PER_USER, fUP |
@@ -19,13 +20,39 @@ COLD: | ||
19 | 20 | nop |
20 | 21 | |
21 | 22 | TEST_THINGY: |
22 | - .data.l LIT | |
23 | - .data.l 1 | |
24 | - .data.l LIT | |
25 | - .data.l -1 | |
23 | + .data.l LIT, 1 | |
24 | + .data.l LIT, -1 | |
25 | + .data.l BRAN | |
26 | + mTARGET BRAN_THINGY | |
27 | + .data.l 4, 3, 2, 1, 0 ; should branch over these | |
28 | +BRAN_THINGY: | |
26 | 29 | .data.l PLUS |
30 | + .data.l ZBRAN | |
31 | + mTARGET ZBRAN_THINGY0 | |
32 | + .data.l 0, 1, 2, 3 ; should branch over these | |
33 | +ZBRAN_THINGY0: | |
34 | + .data.l LIT, 20 | |
35 | + .data.l LIT, 19 | |
36 | + .data.l SUB | |
37 | + .data.l ZBRAN | |
38 | + mTARGET ZBRAN_THINGY0 | |
39 | + .data.l LIT, 15 | |
40 | + .data.l LIT, 10 | |
41 | + .data.l XDO | |
42 | +LOOP_THINGY: | |
43 | + .data.l LIT, "*" | |
44 | + .data.l EMIT | |
45 | + .data.l XLOOP | |
46 | + mTARGET LOOP_THINGY | |
47 | + .data.l LIT, h'f0f0f0f0 | |
48 | + .data.l LIT, h'0f0f0f0f | |
49 | + .data.l USTAR | |
50 | + .data.l LIT, h'10010000 | |
51 | + .data.l LIT, h'10011001 | |
52 | + .data.l LIT, h'10010 | |
53 | + .data.l USLASH | |
27 | 54 | .data.l BRAN |
28 | - .data.l $+NATURAL_SIZE-TEST_THINGY | |
55 | + mTARGET TEST_THINGY | |
29 | 56 | |
30 | 57 | |
31 | 58 | ; For various reasons, including the above "locate" declaration, |
@@ -48,12 +75,12 @@ PER_USER: .equ $ | ||
48 | 75 | |
49 | 76 | .section pstack, stack, locate=PER_USER+h'E000 |
50 | 77 | fSP_LIMIT: .equ $ |
51 | - .res.b h'1800 | |
78 | + .res.b h'1F00 | |
52 | 79 | fSP_BASE: .equ $ |
53 | 80 | |
54 | - .section rstack, stack, locate=h'8c01F800 | |
81 | + .section rstack, stack, locate=h'8c01FF00 | |
55 | 82 | fRP_LIMIT: .equ $ |
56 | - .res.b h'800 | |
83 | + .res.b h'100 | |
57 | 84 | fRP_BASE: .equ $ |
58 | 85 | |
59 | 86 | .section thevoid, dummy, locate=h'8c020000 |
@@ -171,17 +171,18 @@ XPLOOPminus: | ||
171 | 171 | ; |
172 | 172 | HEADER CMOVE, CMOVE |
173 | 173 | mov.l @fSP, r0 ; count |
174 | + cmp/eq #0, r0 | |
175 | + bt CMOVEdone | |
174 | 176 | mov.l @(NATURAL_SIZE,fSP), r2 ; target |
175 | - bra CMOVEenter | |
176 | - mov.l @(2*NATURAL_SIZE,fSP), r1 ; source (as we jump) | |
177 | + mov.l @(2*NATURAL_SIZE,fSP), r1 ; source | |
177 | 178 | CMOVEloop: |
178 | 179 | mov.b @r1+, r3 |
179 | - mov.b r3, @r2+ | |
180 | - add #-1, r0 | |
181 | -CMOVEenter: | |
182 | - cmp/eq #0, r0 | |
183 | - bf CMOVEloop | |
180 | + mov.b r3, @r2 | |
181 | + dt r0 | |
182 | + bf/s CMOVEloop | |
183 | + add #1, r2 ; Inc as we loop, since there is no auto-inc store. | |
184 | 184 | ; |
185 | +CMOVEdone: | |
185 | 186 | rts |
186 | 187 | add #3*NATURAL_SIZE, fSP ; Drop the parameters as we go. |
187 | 188 |
@@ -62,7 +62,7 @@ PCHKNAMEloop: | ||
62 | 62 | PCHKNAMEno: |
63 | 63 | mov #0, r3 ; r3 is not touched by xNAMESCAN |
64 | 64 | PCHKNAMEret: |
65 | - bsr PNAMESCAN | |
65 | + bsr _fPNAMESCAN | |
66 | 66 | mov.l r2, @fSP ; save it as we go |
67 | 67 | lds.l @fRP+, pr ; Gotta have that return address! |
68 | 68 | rts |
@@ -90,7 +90,7 @@ PCHKNAMElast: | ||
90 | 90 | mov.l @fSP, r0 |
91 | 91 | PFINDloop: |
92 | 92 | mov.b @r0, fW ; We aren't using fW anyway, and it doesn't get walked in. |
93 | - bsr PCHKNAME | |
93 | + bsr _fPCHKNAME | |
94 | 94 | mov.l @fSP+, r0 ; Did we find it? |
95 | 95 | cmp/eq #0, r0 |
96 | 96 | bf/s PFINDfound ; Use the true flag in r0 |