4 ; Tokenise a Termite script
9 ;----- Standard Header ------------------------------------------------------
16 ;----- External dependencies ------------------------------------------------
22 ;----- Main code ------------------------------------------------------------
24 AREA |TermScript$$Code|,CODE,READONLY
28 ; On entry: R0 == pointer to source buffer
29 ; R1 == size of source text
30 ; R2 == pointer to destination buffer
32 ; bit 0 == start reading a statement
33 ; R11 == pointer to Termite upcall block
34 ; R12 == pointer to script anchor
36 ; On exit: May return an error
38 ; Use: Tokenises a Termite script into an output buffer.
43 STMFD R13!,{R0-R10,R14} ;Save some registers
44 MOV R10,R2 ;Point to output buffer
45 MOV R7,R0 ;Point to input buffer
46 ADD R8,R7,R1 ;Point to input buffer limit
47 ADR R9,sail_misc ;Point to temporary buffer
48 ANDS R14,R3,#1 ;Awaiting a statement?
49 MOVNE R6,#tState__stmt ;Yes -- select that state
50 MOVEQ R6,#tState__dunno ;No -- normal scanning state
51 MOVNE R14,#1 ;We start on line 1
52 STR R14,[R13,#-8]! ;Save this initial value
54 ; --- Ok, start lexing ---
56 MOV R3,#0 ;No flags set yet
57 00tokenise CMP R7,R8 ;Finished yet?
58 BHI %10tokenise ;Yes -- stop then
59 LDRB R0,[R7],#1 ;No -- load the next byte
60 MOVEQ R0,#-1 ;Yes -- get an EOF character
63 ADD PC,PC,R6,LSL #2 ;And leap off into oblivion
81 ; --- Tidy up if we've finished ---
83 10tokenise MOV R14,#255 ;Mark the end of the script
84 STRB R14,[R10],#1 ;Save that on the end
85 ADD R13,R13,#8 ;Restore the stack pointer
86 LDMFD R13!,{R0-R10,R14} ;And return to caller
93 ; On entry: R0 == pointer to error block
97 ; Use: Reports an error during tokenising.
101 ADD R13,R13,#12 ;Restore the stack pointer
102 LDMFD R13!,{R1-R10,R14} ;Restore registers too
103 ORRS PC,R14,#V_flag ;And return the error
107 ; --- tok__incLineNo ---
113 ; Use: Increments the current line number.
117 LDR R1,[R13,#0] ;Yes -- load line number
118 CMP R1,#0 ;Are we scanning a file?
119 ADDNE R1,R1,#1 ;Increment it
120 STRNE R1,[R13,#0] ;And store it back
129 CMP R0,#'*' ;Is this a *command?
130 STREQB R0,[R10],#1 ;Yes -- store the * character
131 MOVEQ R6,#tState__star ;And read a star command
132 MOVEQS PC,R14 ;And return to caller
134 CMP R0,#'.' ;Is this a label
135 ORREQ R3,R3,#tFlag__readDot ;We've just read a dot
136 MOVEQ R6,#tState__label ;Read a label, please
137 MOVEQ R4,#0 ;No character read yet
138 MOVEQS PC,R14 ;And return to caller
140 ; Drop through to dunno
146 ; --- Ignore whitespace ---
148 ; Other states will have dealt with this as necessary already
149 ; so we don't need to bother.
151 MOV R2,R14 ;Preserve the link
152 CMP R0,#10 ;Is it a newline
153 STREQB R0,[R10],#1 ;Yes -- store it in buffer
154 BLEQ tok__incLineNo ;Increment the line number
155 MOVEQ R6,#tState__stmt ;And start a new statement
156 CMPNE R0,#' ' ;Is it a space
157 CMPNE R0,#9 ;Or a tab?
158 MOVEQS PC,R2 ;Yes -- don't do anything
159 MOV R14,R2 ;Put the link back
161 ; --- Now find an appropriate state ---
163 CMP R0,#'/' ;Is this a slash?
164 STREQ R6,[R13,#4] ;Yes -- save old state away
166 SUBS R1,R0,#'+' ;Check for strange chars
167 SUBNES R1,R0,#'-' ;Check for strange chars
168 SUBNES R1,R0,#'*' ;Check for strange chars
169 SUBNES R1,R0,#'/' ;Check for strange chars
170 SUBNES R1,R0,#'<' ;Check for strange chars
171 SUBNES R1,R0,#'>' ;Check for strange chars
172 SUBNE R1,R0,#'A' ;Otherwise check uppercase
173 CMP R1,#26 ;Is it in the right range?
174 SUBCC R7,R7,#1 ;Yes -- backtrack one char
175 MOVCC R6,#tState__keyWord ;And read a keyword
176 MOVCC R5,#0 ;Entry in state table
177 MOVCC R4,#0 ;No characters read yet
178 MOVCC R2,#0 ;No token discovered yet
179 MOVCCS PC,R14 ;And return to caller
181 MOV R6,#tState__dunno ;Read char, so not new stmt
183 CMP R0,#'"' ;Is it a string?
184 STREQB R0,[R10],#1 ;Yes -- store opening quote
185 MOVEQ R6,#tState__string ;And read a string literal
186 MOVEQS PC,R14 ;And return to caller
188 SUB R1,R0,#'0' ;Check if it's a digit
189 CMP R1,#10 ;Is it in the right range?
190 STRCCB R0,[R10],#1 ;Yes -- store digit
191 MOVCC R6,#tState__decimal ;And read a decimal number
192 MOVCCS PC,R14 ;And return to caller
194 CMP R0,#'&' ;Also check for hex numbers
195 STREQB R0,[R10],#1 ;Yes -- store the ampersand
196 MOVEQ R6,#tState__hex ;And read a hex number
197 MOVEQS PC,R14 ;And return to caller
199 SUBS R1,R0,#'_' ;Is this an underscore?
200 SUBNE R1,R0,#'a' ;Check if it's a lowercase
201 CMP R1,#26 ;Is it in the right range?
202 STRCCB R0,[R10],#1 ;Yes -- store the character
203 MOVCC R6,#tState__ident ;And read an identifier
204 MOVCCS PC,R14 ;And return to caller
206 CMP R0,#':' ;Is this a colon?
207 STREQB R0,[R10],#1 ;Yes -- store the character
208 MOVEQ R6,#tState__stmt ;And start a new statement
209 MOVEQS PC,R14 ;And return to caller
212 MOVS PC,R14 ;And return to caller
220 MOV R2,R14 ;Preserve the link
221 STRB R0,[R10],#1 ;Save the character
222 CMP R0,#10 ;Is this a newline?
223 MOVEQ R6,#tState__stmt ;Start a new statement
224 BLEQ tok__incLineNo ;Increment the line number
225 MOVS PC,R2 ;And return to caller
229 ; --- tok__string ---
233 STRB R0,[R10],#1 ;Save the character
234 CMP R0,#'"' ;Is it another quote?
235 MOVEQ R6,#tState__dblQte ;Yes -- change state
236 MOVEQS PC,R14 ;And return
237 MOV R2,R14 ;Preserve the link
238 CMP R0,#10 ;Is it newline?
239 MOVEQ R6,#tState__stmt ;Yes -- change state
240 BLEQ tok__incLineNo ;...increment line number
241 MOVS PC,R2 ;And return
245 ; --- tok__dblQte ---
249 CMP R0,#'"' ;Is this a 2nd quote?
250 MOVEQ R6,#tState__string ;Yes -- go back to string
251 STREQB R0,[R10],#1 ;..and store it away
252 SUBNE R7,R7,#1 ;Otherwise backtrack
253 MOVNE R6,#tState__dunno ;..and enter dunno state
258 ; --- tok__decimal ---
262 SUB R1,R0,#'0' ;Set up for range check
263 CMP R1,#10 ;Are we in range?
264 STRCCB R0,[R10],#1 ;Yes -- store the number
265 MOVCCS PC,R14 ;And return
267 ; --- A bit of bodgery now ---
269 ; This hackery introduces a space between two numbers, which
270 ; would otherwise severely upset something like
274 tok__numHack CMP R0,#&20 ;Is this a space?
275 BNE %f00 ;No -- just stop normally
276 LDRB R1,[R7,#0] ;Get the next byte
277 SUB R1,R1,#'0' ;Is it a digit?
278 CMP R1,#10 ;Quick check
279 STRCCB R0,[R10],#1 ;Yes -- store the space
280 00 SUB R7,R7,#1 ;Backtrack a little
281 MOV R6,#tState__dunno ;...and change state
282 MOVS PC,R14 ;Return to caller
290 SUB R1,R0,#'a' ;Set up for range check
291 CMP R1,#6 ;Are we in range?
292 SUBCC R0,R0,#'a'-'A' ;Force to uppercase
297 STRCCB R0,[R10],#1 ;Yes -- store the number
298 MOVCCS PC,R14 ;And return
300 ; --- Hack as above ---
302 B tok__numHack ;Use hacking code above
310 CMP R0,#'$' ;Is it a dollar sign?
311 CMPNE R0,#'%' ;Or a percentage?
312 STREQB R0,[R10],#1 ;Yes -- store it then
313 MOVEQ R6,#tState__dunno ;Change state
314 MOVEQS PC,R14 ;And return to caller
316 SUBS R1,R0,#'_' ;Is it an underscore?
317 SUBNE R1,R0,#'0' ;Or a number?
319 SUBCS R1,R0,#'A' ;Or a capital letter?
321 SUBCS R1,R0,#'a' ;Or a lowercase letter?
323 MOVCS R1,#' ' ;If not valid, append space
324 MOVCC R1,R0 ;Otherwise write character
325 STRB R1,[R10],#1 ;Store a character
326 SUBCS R7,R7,#1 ;No -- backtrack a little
327 MOVCS R6,#tState__dunno ;...and change state
328 MOVS PC,R14 ;Return to caller
334 SUBS R1,R0,#'_' ;Is it an underscore?
335 SUBNE R1,R0,#'0' ;Or a number?
337 SUBCS R1,R0,#'A' ;Or a capital letter?
339 SUBCS R1,R0,#'a' ;Or a lowercase letter?
341 BCS %05tok__label ;No -- do other things then
343 TST R3,#tFlag__readDot + tFlag__readDEF
344 STREQB R0,[R10],#1 ;No -- store the number
345 STRNEB R0,[R9],#1 ;Otherwise store in scratch
346 MOVS PC,R14 ;...and return
348 ; --- Are we defining this label? ---
350 05tok__label TST R3,#tFlag__readDot + tFlag__readDEF
351 SUBEQ R7,R7,#1 ;No -- backtrack a little
352 MOVEQ R6,#tState__dunno ;...change state
353 MOVEQS PC,R14 ;...and return
355 ; --- Create the variable then ---
357 STMFD R13!,{R3,R14} ;Preserve R3 and link
358 MOV R14,#0 ;Terminate scratch buffer
359 STRB R14,[R9],#1 ;To make things nice
360 TST R3,#tFlag__readDot ;Have we just read a dot?
361 MOVNE R0,#vType_label ;Yes -- create a label
362 BNE %10tok__label ;...and jump ahead
363 TST R3,#tFlag__readFN ;Is this a DEFFN?
364 MOVNE R0,#vType_fn ;Yes -- define one of these
365 MOVEQ R0,#vType_proc ;No -- define a DEFPROC then
366 10tok__label ADR R9,sail_misc ;Point to scratch start
367 MOV R1,R9 ;Point to label name
368 MOV R2,R10 ;Get the file address
369 LDR R3,[R13,#8] ;Load the line number
370 CMP R3,#0 ;Are we scanning the file?
371 BLNE var_create ;Create the variable
372 LDMVSFD R13!,{R3,R14} ;If it failed, unstack...
373 BVS tok__error ;...and die horridly
375 SUB R7,R7,#1 ;No -- backtrack a little
376 MOV R6,#tState__dunno ;...change state
377 LDMFD R13!,{R3,R14} ;Restore flags word
378 BIC R3,R3,#tFlag__readDot + tFlag__readDEF
379 MOVS PC,R14 ;Return to caller
383 ; --- tok__expLab ---
387 tok__expLab CMP R0,#' ' ;Is it a space?
388 CMPNE R0,#9 ;Or a TAB char?
389 SUBNE R7,R7,#1 ;No -- backtrack a little
390 MOVNE R6,#tState__label ;...we are reading a label
395 ; --- tok__lineCmt ---
399 MOV R2,R14 ;Preserve the link
400 CMP R0,#10 ;Is this a newline?
401 STREQB R0,[R10],#1 ;Save the newline character
402 MOVEQ R6,#tState__stmt ;Start a new statement
403 BLEQ tok__incLineNo ;Increment the line number
404 MOVS PC,R2 ;And return to caller
408 ; --- tok__blkCmt ---
412 MOV R2,R14 ;Preserve the link
413 CMP R0,#10 ;Is this a newline?
414 MOVEQ R0,#31 ;Yes -- insert a weird char
415 STREQB R0,[R10],#1 ;Put it in the buffer
416 BLEQ tok__incLineNo ;Increment the line number
417 MOVEQS PC,R2 ;And return
418 CMP R0,#'*' ;Is it a star?
419 MOVEQ R6,#tState__cmtStar ;Yes -- change mode then
420 MOVS PC,R2 ;Return to caller
424 ; --- tok__cmtStar ---
428 MOV R2,R14 ;Preserve the link
429 CMP R0,#10 ;Is this a newline?
430 MOVEQ R1,#31 ;Yes -- insert a weird char
431 STREQB R1,[R10],#1 ;Put it in the buffer
432 BLEQ tok__incLineNo ;Increment the line number
433 CMP R0,#'/' ;Is the comment over now?
434 LDREQ R6,[R13,#4] ;Yes -- load previous state
435 CMPNE R0,#'*' ;Is it still a star?
436 MOVNE R6,#tState__blkCmt ;No -- change state back
437 MOVS PC,R2 ;And return to caller
441 ; --- tok__keyWord ---
446 ADR R1,tokTable ;Point to the toaken table
447 ADD R1,R1,R5,LSR #16 ;Point into the table
448 CMP R0,#'.' ;Is this a dot?
449 BEQ %18tok__keyWord ;Yes -- jump ahead then
450 ADD R4,R4,#1 ;Increment char count
451 10tok__keyWord LDR R14,[R1],#4 ;Load LSB
452 CMP R14,#0 ;Is this the end?
453 BEQ %15tok__keyWord ;Yes -- jump ahead
454 CMP R0,R14,LSR #24 ;Is this a match?
455 BNE %10tok__keyWord ;No -- keep looking
457 BIC R14,R14,#&FF000000 ;Clear char to match byte
458 MOVS R0,R14,LSR #16 ;Get the token byte
459 MOVNE R2,R0 ;This is a token
460 MOVNE R4,#0 ;So clear backtrack count
461 MOVS R5,R14,LSL #16 ;Shift it up a bit
462 LDMNEFD R13!,{PC}^ ;And return to caller
464 ; --- Come to the end of the line ---
466 15tok__keyWord SUB R7,R7,R4 ;Do the backtracking
467 CMP R2,#0 ;Did we find a token?
468 MOVEQ R6,#tState__ident ;No -- read an identifier
469 LDMEQFD R13!,{PC}^ ;Bad luck then
471 ; --- We have found a match ---
473 11tok__keyWord LDMFD R13!,{R14} ;Restore return address
474 MOV R5,R2 ;Get the matched token
476 ; --- Skip over REMS ---
478 CMP R5,#tok_rem ;Check for REM statements
479 CMPNE R5,#tok_DD ;Or a // comment
480 MOVEQ R6,#tState__lineCmt ;Introduces line comments
481 MOVEQS PC,R14 ;Return if it was one
483 CMP R5,#tok_DT ;Is it a /* comment?
484 MOVEQ R6,#tState__blkCmt ;Yes -- it's a block comment
485 MOVEQS PC,R14 ;And return to caller
487 ; --- Set up various flags and things ---
489 17tok__keyWord STRB R5,[R10],#1 ;Store in the block
491 BIC R3,R3,#tFlag__readFN+tFlag__readPROC
492 CMP R5,#tok_proc ;Is this a PROC?
493 ORREQ R3,R3,#tFlag__readPROC ;Yes -- remember this
494 CMP R5,#tok_fn ;Or a FN?
495 ORREQ R3,R3,#tFlag__readFN ;Yes -- remember this
496 TST R3,#tFlag__readPROC+tFlag__readFN
497 MOVNE R6,#tState__label ;If either -- change state
498 MOVNES PC,R14 ;...and return
500 BIC R3,R3,#tFlag__readDEF ;No -- clear DEF flag
501 CMP R5,#tok_def ;Was it a DEF then?
502 ORREQ R3,R3,#tFlag__readDEF ;Yes -- set the def flag
504 ; --- Are we expecting a label next? ---
506 CMP R5,#tok_goto ;Is there a label next?
508 CMPNE R5,#tok_restore
509 MOVEQ R6,#tState__expLab ;Yes -- change state
510 MOVEQ R4,#0 ;...No characters read yet
511 BIC R3,R3,#tFlag__readDot ;We are not expecting a dot
512 MOVEQS PC,R14 ;...and return
514 ; --- Return to caller ---
516 MOV R6,#tState__dunno ;Change state back again
517 MOVS PC,R14 ;And return to caller
519 ; --- User has abbreviated key word ---
521 18tok__keyWord ADR R0,tokTable ;Point to the table
522 19tok__keyWord LDR R5,[R1,#0] ;Load the next index
523 MOVS R4,R5,LSL #16 ;Shift it up a bit
524 ADDNE R1,R0,R4,LSR #16 ;If more to go -- point
525 BNE %19tok__keyWord ;...and keep on looping
526 BIC R2,R5,#&FF000000 ;Clear the match char
527 MOV R2,R2,LSR #16 ;And get the final token
528 B %11tok__keyWord ;Deal with the key word
534 ; --- States for the tokeniser ---
537 tState__stmt # 1 ;Start of a new statement
538 tState__dunno # 1 ;Not sure what to expect
539 tState__string # 1 ;Tokenising a string
540 tState__dblQte # 1 ;Checking for double quotes
541 tState__star # 1 ;Processing a *command
542 tState__decimal # 1 ;Reading a decimal/bin number
543 tState__hex # 1 ;Reading a hex number
544 tState__ident # 1 ;Processing an identifier
545 tState__keyWord # 1 ;Checking for keywords
546 tState__label # 1 ;Reading a label
547 tState__expLab # 1 ;Waiting for a label
548 tState__lineCmt # 1 ;Skipping a line comment
549 tState__blkCmt # 1 ;Skipping a block comment
550 tState__cmtStar # 1 ;Found star in block comment
554 tFlag__readDot EQU (1<<0) ;Creating a label
555 tFlag__readDEF EQU (1<<1) ;We're doing a def
556 tFlag__readFN EQU (1<<5) ;Just read a FN
557 tFlag__readPROC EQU (1<<6) ;Just read a PROC
559 ;----- Workspace ------------------------------------------------------------
561 ;----- That's all, folks ----------------------------------------------------