Initial revision
[ssr] / StraySrc / Libraries / Sapphire / sail / _s / tokenise
1 ;
2 ; tokenise.s
3 ;
4 ; Tokenise a Termite script
5 ;
6 ; © 1995 Straylight
7 ;
8
9 ;----- Standard Header ------------------------------------------------------
10
11 GET libs:header
12 GET libs:swis
13
14 GET libs:stream
15
16 ;----- External dependencies ------------------------------------------------
17
18 GET sh.anchor
19 GET sh.tokens
20 GET sh.var
21
22 ;----- Main code ------------------------------------------------------------
23
24 AREA |TermScript$$Code|,CODE,READONLY
25
26 ; --- tokenise ---
27 ;
28 ; On entry: R0 == pointer to source buffer
29 ; R1 == size of source text
30 ; R2 == pointer to destination buffer
31 ; R3 == flags:
32 ; bit 0 == start reading a statement
33 ; R11 == pointer to Termite upcall block
34 ; R12 == pointer to script anchor
35 ;
36 ; On exit: May return an error
37 ;
38 ; Use: Tokenises a Termite script into an output buffer.
39
40 EXPORT tokenise
41 tokenise ROUT
42
43 STMFD R13!,{R0-R10,R14} ;Save some registers
44 MOV R10,R2 ;Point to output buffer
45 MOV R7,R0 ;Point to input buffer
46 ADD R8,R7,R1 ;Point to input buffer limit
47 ADR R9,tsc_misc ;Point to temporary buffer
48 ANDS R14,R3,#1 ;Awaiting a statement?
49 MOVNE R6,#tState__stmt ;Yes -- select that state
50 MOVEQ R6,#tState__dunno ;No -- normal scanning state
51 MOVNE R14,#1 ;We start on line 1
52 STR R14,[R13,#-8]! ;Save this initial value
53
54 ; --- Ok, start lexing ---
55
56 MOV R3,#0 ;No flags set yet
57 00tokenise CMP R7,R8 ;Finished yet?
58 BHI %10tokenise ;Yes -- stop then
59 LDRB R0,[R7],#1 ;No -- load the next byte
60 MOVEQ R0,#-1 ;Yes -- get an EOF character
61
62 MOV R14,PC
63 ADD PC,PC,R6,LSL #2 ;And leap off into oblivion
64 B %00tokenise
65
66 B tok__stmt
67 B tok__dunno
68 B tok__string
69 B tok__dblQte
70 B tok__star
71 B tok__decimal
72 B tok__hex
73 B tok__ident
74 B tok__keyWord
75 B tok__label
76 B tok__expLab
77 B tok__lineCmt
78 B tok__blkCmt
79 B tok__cmtStar
80
81 ; --- Tidy up if we've finished ---
82
83 10tokenise MOV R14,#255 ;Mark the end of the script
84 STRB R14,[R10],#1 ;Save that on the end
85 ADD R13,R13,#8 ;Restore the stack pointer
86 LDMFD R13!,{R0-R10,R14} ;And return to caller
87 BICS PC,R14,#V_flag
88
89 LTORG
90
91 ; --- tok__error ---
92 ;
93 ; On entry: R0 == pointer to error block
94 ;
95 ; On exit: --
96 ;
97 ; Use: Reports an error during tokenising.
98
99 tok__error ROUT
100
101 ADD R13,R13,#12 ;Restore the stack pointer
102 LDMFD R13!,{R1-R10,R14} ;Restore registers too
103 ORRS PC,R14,#V_flag ;And return the error
104
105 LTORG
106
107 ; --- tok__incLineNo ---
108 ;
109 ; On entry: --
110 ;
111 ; On exit: --
112 ;
113 ; Use: Increments the current line number.
114
115 tok__incLineNo ROUT
116
117 LDR R1,[R13,#0] ;Yes -- load line number
118 CMP R1,#0 ;Are we scanning a file?
119 ADDNE R1,R1,#1 ;Increment it
120 STRNE R1,[R13,#0] ;And store it back
121 MOVS PC,R14
122
123 LTORG
124
125 ; --- tok__stmt ---
126
127 tok__stmt ROUT
128
129 CMP R0,#'*' ;Is this a *command?
130 STREQB R0,[R10],#1 ;Yes -- store the * character
131 MOVEQ R6,#tState__star ;And read a star command
132 MOVEQS PC,R14 ;And return to caller
133
134 CMP R0,#'.' ;Is this a label
135 ORREQ R3,R3,#tFlag__readDot ;We've just read a dot
136 MOVEQ R6,#tState__label ;Read a label, please
137 MOVEQ R4,#0 ;No character read yet
138 MOVEQS PC,R14 ;And return to caller
139
140 ; Drop through to dunno
141
142 ; --- tok__dunno ---
143
144 tok__dunno ROUT
145
146 ; --- Ignore whitespace ---
147 ;
148 ; Other states will have dealt with this as necessary already
149 ; so we don't need to bother.
150
151 MOV R2,R14 ;Preserve the link
152 CMP R0,#10 ;Is it a newline
153 STREQB R0,[R10],#1 ;Yes -- store it in buffer
154 BLEQ tok__incLineNo ;Increment the line number
155 MOVEQ R6,#tState__stmt ;And start a new statement
156 CMPNE R0,#' ' ;Is it a space
157 CMPNE R0,#9 ;Or a tab?
158 MOVEQS PC,R2 ;Yes -- don't do anything
159 MOV R14,R2 ;Put the link back
160
161 ; --- Now find an appropriate state ---
162
163 CMP R0,#'/' ;Is this a slash?
164 STREQ R6,[R13,#4] ;Yes -- save old state away
165
166 SUBS R1,R0,#'+' ;Check for strange chars
167 SUBNES R1,R0,#'-' ;Check for strange chars
168 SUBNES R1,R0,#'*' ;Check for strange chars
169 SUBNES R1,R0,#'/' ;Check for strange chars
170 SUBNES R1,R0,#'<' ;Check for strange chars
171 SUBNES R1,R0,#'>' ;Check for strange chars
172 SUBNE R1,R0,#'A' ;Otherwise check uppercase
173 CMP R1,#26 ;Is it in the right range?
174 SUBCC R7,R7,#1 ;Yes -- backtrack one char
175 MOVCC R6,#tState__keyWord ;And read a keyword
176 MOVCC R5,#0 ;Entry in state table
177 MOVCC R4,#0 ;No characters read yet
178 MOVCC R2,#0 ;No token discovered yet
179 MOVCCS PC,R14 ;And return to caller
180
181 MOV R6,#tState__dunno ;Read char, so not new stmt
182
183 CMP R0,#'"' ;Is it a string?
184 STREQB R0,[R10],#1 ;Yes -- store opening quote
185 MOVEQ R6,#tState__string ;And read a string literal
186 MOVEQS PC,R14 ;And return to caller
187
188 SUB R1,R0,#'0' ;Check if it's a digit
189 CMP R1,#10 ;Is it in the right range?
190 STRCCB R0,[R10],#1 ;Yes -- store digit
191 MOVCC R6,#tState__decimal ;And read a decimal number
192 MOVCCS PC,R14 ;And return to caller
193
194 CMP R0,#'&' ;Also check for hex numbers
195 STREQB R0,[R10],#1 ;Yes -- store the ampersand
196 MOVEQ R6,#tState__hex ;And read a hex number
197 MOVEQS PC,R14 ;And return to caller
198
199 SUBS R1,R0,#'_' ;Is this an underscore?
200 SUBNE R1,R0,#'a' ;Check if it's a lowercase
201 CMP R1,#26 ;Is it in the right range?
202 STRCCB R0,[R10],#1 ;Yes -- store the character
203 MOVCC R6,#tState__ident ;And read an identifier
204 MOVCCS PC,R14 ;And return to caller
205
206 CMP R0,#':' ;Is this a colon?
207 STREQB R0,[R10],#1 ;Yes -- store the character
208 MOVEQ R6,#tState__stmt ;And start a new statement
209 MOVEQS PC,R14 ;And return to caller
210
211 STRB R0,[R10],#1
212 MOVS PC,R14 ;And return to caller
213
214 LTORG
215
216 ; --- tok__star ---
217
218 tok__star ROUT
219
220 MOV R2,R14 ;Preserve the link
221 STRB R0,[R10],#1 ;Save the character
222 CMP R0,#10 ;Is this a newline?
223 MOVEQ R6,#tState__stmt ;Start a new statement
224 BLEQ tok__incLineNo ;Increment the line number
225 MOVS PC,R2 ;And return to caller
226
227 LTORG
228
229 ; --- tok__string ---
230
231 tok__string ROUT
232
233 STRB R0,[R10],#1 ;Save the character
234 CMP R0,#'"' ;Is it another quote?
235 MOVEQ R6,#tState__dblQte ;Yes -- change state
236 MOVEQS PC,R14 ;And return
237 MOV R2,R14 ;Preserve the link
238 CMP R0,#10 ;Is it newline?
239 MOVEQ R6,#tState__stmt ;Yes -- change state
240 BLEQ tok__incLineNo ;...increment line number
241 MOVS PC,R2 ;And return
242
243 LTORG
244
245 ; --- tok__dblQte ---
246
247 tok__dblQte ROUT
248
249 CMP R0,#'"' ;Is this a 2nd quote?
250 MOVEQ R6,#tState__string ;Yes -- go back to string
251 STREQB R0,[R10],#1 ;..and store it away
252 SUBNE R7,R7,#1 ;Otherwise backtrack
253 MOVNE R6,#tState__dunno ;..and enter dunno state
254 MOVS PC,R14
255
256 LTORG
257
258 ; --- tok__decimal ---
259
260 tok__decimal ROUT
261
262 SUB R1,R0,#'0' ;Set up for range check
263 CMP R1,#10 ;Are we in range?
264 STRCCB R0,[R10],#1 ;Yes -- store the number
265 MOVCCS PC,R14 ;And return
266
267 ; --- A bit of bodgery now ---
268 ;
269 ; This hackery introduces a space between two numbers, which
270 ; would otherwise severely upset something like
271 ;
272 ; DIM a%!24 64
273
274 tok__numHack CMP R0,#&20 ;Is this a space?
275 BNE %f00 ;No -- just stop normally
276 LDRB R1,[R7,#0] ;Get the next byte
277 SUB R1,R1,#'0' ;Is it a digit?
278 CMP R1,#10 ;Quick check
279 STRCCB R0,[R10],#1 ;Yes -- store the space
280 00 SUB R7,R7,#1 ;Backtrack a little
281 MOV R6,#tState__dunno ;...and change state
282 MOVS PC,R14 ;Return to caller
283
284 LTORG
285
286 ; --- tok__hex ---
287
288 tok__hex ROUT
289
290 SUB R1,R0,#'a' ;Set up for range check
291 CMP R1,#6 ;Are we in range?
292 SUBCC R0,R0,#'a'-'A' ;Force to uppercase
293 SUBCS R1,R0,#'0'
294 CMPCS R1,#10
295 SUBCS R1,R0,#'A'
296 CMPCS R1,#6
297 STRCCB R0,[R10],#1 ;Yes -- store the number
298 MOVCCS PC,R14 ;And return
299
300 ; --- Hack as above ---
301
302 B tok__numHack ;Use hacking code above
303
304 LTORG
305
306 ; --- tok__ident ---
307
308 tok__ident ROUT
309
310 CMP R0,#'$' ;Is it a dollar sign?
311 CMPNE R0,#'%' ;Or a percentage?
312 STREQB R0,[R10],#1 ;Yes -- store it then
313 MOVEQ R6,#tState__dunno ;Change state
314 MOVEQS PC,R14 ;And return to caller
315
316 SUBS R1,R0,#'_' ;Is it an underscore?
317 SUBNE R1,R0,#'0' ;Or a number?
318 CMP R1,#10
319 SUBCS R1,R0,#'A' ;Or a capital letter?
320 CMPCS R1,#26
321 SUBCS R1,R0,#'a' ;Or a lowercase letter?
322 CMPCS R1,#26
323 MOVCS R1,#' ' ;If not valid, append space
324 MOVCC R1,R0 ;Otherwise write character
325 STRB R1,[R10],#1 ;Store a character
326 SUBCS R7,R7,#1 ;No -- backtrack a little
327 MOVCS R6,#tState__dunno ;...and change state
328 MOVS PC,R14 ;Return to caller
329
330 ; --- tok__label ---
331
332 tok__label ROUT
333
334 SUBS R1,R0,#'_' ;Is it an underscore?
335 SUBNE R1,R0,#'0' ;Or a number?
336 CMP R1,#10
337 SUBCS R1,R0,#'A' ;Or a capital letter?
338 CMPCS R1,#26
339 SUBCS R1,R0,#'a' ;Or a lowercase letter?
340 CMPCS R1,#26
341 BCS %05tok__label ;No -- do other things then
342
343 TST R3,#tFlag__readDot + tFlag__readDEF
344 STREQB R0,[R10],#1 ;No -- store the number
345 STRNEB R0,[R9],#1 ;Otherwise store in scratch
346 MOVS PC,R14 ;...and return
347
348 ; --- Are we defining this label? ---
349
350 05tok__label TST R3,#tFlag__readDot + tFlag__readDEF
351 SUBEQ R7,R7,#1 ;No -- backtrack a little
352 MOVEQ R6,#tState__dunno ;...change state
353 MOVEQS PC,R14 ;...and return
354
355 ; --- Create the variable then ---
356
357 STMFD R13!,{R3,R14} ;Preserve R3 and link
358 MOV R14,#0 ;Terminate scratch buffer
359 STRB R14,[R9],#1 ;To make things nice
360 TST R3,#tFlag__readDot ;Have we just read a dot?
361 MOVNE R0,#vType_label ;Yes -- create a label
362 BNE %10tok__label ;...and jump ahead
363 TST R3,#tFlag__readFN ;Is this a DEFFN?
364 MOVNE R0,#vType_fn ;Yes -- define one of these
365 MOVEQ R0,#vType_proc ;No -- define a DEFPROC then
366 10tok__label ADR R9,tsc_misc ;Point to scratch start
367 MOV R1,R9 ;Point to label name
368 MOV R2,R10 ;Get the file address
369 LDR R3,[R13,#8] ;Load the line number
370 CMP R3,#0 ;Are we scanning the file?
371 BLNE var_create ;Create the variable
372 LDMVSFD R13!,{R3,R14} ;If it failed, unstack...
373 BVS tok__error ;...and die horridly
374
375 SUB R7,R7,#1 ;No -- backtrack a little
376 MOV R6,#tState__dunno ;...change state
377 LDMFD R13!,{R3,R14} ;Restore flags word
378 BIC R3,R3,#tFlag__readDot + tFlag__readDEF
379 MOVS PC,R14 ;Return to caller
380
381 LTORG
382
383 ; --- tok__expLab ---
384
385 ROUT
386
387 tok__expLab CMP R0,#' ' ;Is it a space?
388 CMPNE R0,#9 ;Or a TAB char?
389 SUBNE R7,R7,#1 ;No -- backtrack a little
390 MOVNE R6,#tState__label ;...we are reading a label
391 MOVS PC,R14 ;Return
392
393 LTORG
394
395 ; --- tok__lineCmt ---
396
397 tok__lineCmt ROUT
398
399 MOV R2,R14 ;Preserve the link
400 CMP R0,#10 ;Is this a newline?
401 STREQB R0,[R10],#1 ;Save the newline character
402 MOVEQ R6,#tState__stmt ;Start a new statement
403 BLEQ tok__incLineNo ;Increment the line number
404 MOVS PC,R2 ;And return to caller
405
406 LTORG
407
408 ; --- tok__blkCmt ---
409
410 tok__blkCmt ROUT
411
412 MOV R2,R14 ;Preserve the link
413 CMP R0,#10 ;Is this a newline?
414 MOVEQ R0,#31 ;Yes -- insert a weird char
415 STREQB R0,[R10],#1 ;Put it in the buffer
416 BLEQ tok__incLineNo ;Increment the line number
417 MOVEQS PC,R2 ;And return
418 CMP R0,#'*' ;Is it a star?
419 MOVEQ R6,#tState__cmtStar ;Yes -- change mode then
420 MOVS PC,R2 ;Return to caller
421
422 LTORG
423
424 ; --- tok__cmtStar ---
425
426 tok__cmtStar ROUT
427
428 MOV R2,R14 ;Preserve the link
429 CMP R0,#10 ;Is this a newline?
430 MOVEQ R1,#31 ;Yes -- insert a weird char
431 STREQB R1,[R10],#1 ;Put it in the buffer
432 BLEQ tok__incLineNo ;Increment the line number
433 CMP R0,#'/' ;Is the comment over now?
434 LDREQ R6,[R13,#4] ;Yes -- load previous state
435 CMPNE R0,#'*' ;Is it still a star?
436 MOVNE R6,#tState__blkCmt ;No -- change state back
437 MOVS PC,R2 ;And return to caller
438
439 LTORG
440
441 ; --- tok__keyWord ---
442
443 tok__keyWord ROUT
444
445 STMFD R13!,{R14}
446 ADR R1,tokTable ;Point to the toaken table
447 ADD R1,R1,R5,LSR #16 ;Point into the table
448 CMP R0,#'.' ;Is this a dot?
449 BEQ %18tok__keyWord ;Yes -- jump ahead then
450 ADD R4,R4,#1 ;Increment char count
451 10tok__keyWord LDR R14,[R1],#4 ;Load LSB
452 CMP R14,#0 ;Is this the end?
453 BEQ %15tok__keyWord ;Yes -- jump ahead
454 CMP R0,R14,LSR #24 ;Is this a match?
455 BNE %10tok__keyWord ;No -- keep looking
456
457 BIC R14,R14,#&FF000000 ;Clear char to match byte
458 MOVS R0,R14,LSR #16 ;Get the token byte
459 MOVNE R2,R0 ;This is a token
460 MOVNE R4,#0 ;So clear backtrack count
461 MOVS R5,R14,LSL #16 ;Shift it up a bit
462 LDMNEFD R13!,{PC}^ ;And return to caller
463
464 ; --- Come to the end of the line ---
465
466 15tok__keyWord SUB R7,R7,R4 ;Do the backtracking
467 CMP R2,#0 ;Did we find a token?
468 MOVEQ R6,#tState__ident ;No -- read an identifier
469 LDMEQFD R13!,{PC}^ ;Bad luck then
470
471 ; --- We have found a match ---
472
473 11tok__keyWord LDMFD R13!,{R14} ;Restore return address
474 MOV R5,R2 ;Get the matched token
475
476 ; --- Skip over REMS ---
477
478 CMP R5,#tok_rem ;Check for REM statements
479 CMPNE R5,#tok_DD ;Or a // comment
480 MOVEQ R6,#tState__lineCmt ;Introduces line comments
481 MOVEQS PC,R14 ;Return if it was one
482
483 CMP R5,#tok_DT ;Is it a /* comment?
484 MOVEQ R6,#tState__blkCmt ;Yes -- it's a block comment
485 MOVEQS PC,R14 ;And return to caller
486
487 ; --- Set up various flags and things ---
488
489 17tok__keyWord STRB R5,[R10],#1 ;Store in the block
490
491 BIC R3,R3,#tFlag__readFN+tFlag__readPROC
492 CMP R5,#tok_proc ;Is this a PROC?
493 ORREQ R3,R3,#tFlag__readPROC ;Yes -- remember this
494 CMP R5,#tok_fn ;Or a FN?
495 ORREQ R3,R3,#tFlag__readFN ;Yes -- remember this
496 TST R3,#tFlag__readPROC+tFlag__readFN
497 MOVNE R6,#tState__label ;If either -- change state
498 MOVNES PC,R14 ;...and return
499
500 BIC R3,R3,#tFlag__readDEF ;No -- clear DEF flag
501 CMP R5,#tok_def ;Was it a DEF then?
502 ORREQ R3,R3,#tFlag__readDEF ;Yes -- set the def flag
503
504 ; --- Are we expecting a label next? ---
505
506 CMP R5,#tok_goto ;Is there a label next?
507 CMPNE R5,#tok_gosub
508 CMPNE R5,#tok_restore
509 MOVEQ R6,#tState__expLab ;Yes -- change state
510 MOVEQ R4,#0 ;...No characters read yet
511 BIC R3,R3,#tFlag__readDot ;We are not expecting a dot
512 MOVEQS PC,R14 ;...and return
513
514 ; --- Return to caller ---
515
516 MOV R6,#tState__dunno ;Change state back again
517 MOVS PC,R14 ;And return to caller
518
519 ; --- User has abbreviated key word ---
520
521 18tok__keyWord ADR R0,tokTable ;Point to the table
522 19tok__keyWord LDR R5,[R1,#0] ;Load the next index
523 MOVS R4,R5,LSL #16 ;Shift it up a bit
524 ADDNE R1,R0,R4,LSR #16 ;If more to go -- point
525 BNE %19tok__keyWord ;...and keep on looping
526 BIC R2,R5,#&FF000000 ;Clear the match char
527 MOV R2,R2,LSR #16 ;And get the final token
528 B %11tok__keyWord ;Deal with the key word
529
530 GET sh.tokTable
531
532 LTORG
533
534 ; --- States for the tokeniser ---
535
536 ^ 0
537 tState__stmt # 1 ;Start of a new statement
538 tState__dunno # 1 ;Not sure what to expect
539 tState__string # 1 ;Tokenising a string
540 tState__dblQte # 1 ;Checking for double quotes
541 tState__star # 1 ;Processing a *command
542 tState__decimal # 1 ;Reading a decimal/bin number
543 tState__hex # 1 ;Reading a hex number
544 tState__ident # 1 ;Processing an identifier
545 tState__keyWord # 1 ;Checking for keywords
546 tState__label # 1 ;Reading a label
547 tState__expLab # 1 ;Waiting for a label
548 tState__lineCmt # 1 ;Skipping a line comment
549 tState__blkCmt # 1 ;Skipping a block comment
550 tState__cmtStar # 1 ;Found star in block comment
551
552 ; --- Flags ---
553
554 tFlag__readDot EQU (1<<0) ;Creating a label
555 tFlag__readDEF EQU (1<<1) ;We're doing a def
556 tFlag__readFN EQU (1<<5) ;Just read a FN
557 tFlag__readPROC EQU (1<<6) ;Just read a PROC
558
559 ;----- Workspace ------------------------------------------------------------
560
561 ;----- That's all, folks ----------------------------------------------------
562
563 END