X-Git-Url: https://thelambdalab.xyz/gitweb/index.cgi?p=jars.git;a=blobdiff_plain;f=parser.scm;fp=parser.scm;h=11491b7b65bd9bb5715a585d59cee5c009e32c14;hp=0000000000000000000000000000000000000000;hb=6fa8a83fb4cf917fabe4c2bb930b8b092c9c7519;hpb=28a3308e193e60e376fe9f171513ef541bb08385 diff --git a/parser.scm b/parser.scm new file mode 100644 index 0000000..11491b7 --- /dev/null +++ b/parser.scm @@ -0,0 +1,174 @@ +(module parser + (string->prog) + + (import scheme + (chicken base) + (chicken irregex) + (chicken io) + (chicken string) + mars) + + (define (string->prog str) + (let ((idx 0) + (l (string-length str)) + (whitespace-irx (irregex "^[ \t]+")) + (newline-irx (irregex "^\n")) + (comma-irx (irregex "^,")) + (period-irx (irregex "^\\.")) + (redcode-irx (irregex "^;redcode\n")) + (name-start-irx (irregex "^;[ \t]*name ")) + (name-irx (irregex "^[a-zA-Z0-9]+")) + (comment-irx (irregex "^(;[^\n]*)?\n")) + (org-irx (irregex "^ORG")) + (opcode-DAT-irx (irregex "^DAT")) + (opcode-MOV-irx (irregex "^MOV")) + (opcode-ADD-irx (irregex "^ADD")) + (opcode-SUB-irx (irregex "^SUB")) + (opcode-MUL-irx (irregex "^MUL")) + (opcode-DIV-irx (irregex "^DIV")) + (opcode-MOD-irx (irregex "^MOD")) + (opcode-JMP-irx (irregex "^JMP")) + (opcode-JMZ-irx (irregex "^JMZ")) + (opcode-JMN-irx (irregex "^JMN")) + (opcode-DMN-irx (irregex "^JMN")) + (opcode-DJN-irx (irregex "^DJN")) + (opcode-CMP-irx (irregex "^CMP")) + (opcode-SEQ-irx (irregex "^SEQ")) + (opcode-SNE-irx (irregex "^SNE")) + (opcode-SLT-irx (irregex "^SLT")) + (opcode-SPL-irx (irregex "^SPL")) + (opcode-NOP-irx (irregex "^NOP")) + (modifier-A-irx (irregex "^A")) + (modifier-B-irx (irregex "^B")) + (modifier-AB-irx (irregex "^AB")) + (modifier-BA-irx (irregex "^BA")) + (modifier-F-irx (irregex "^F")) + (modifier-X-irx (irregex "^X")) + (modifier-I-irx (irregex "^I")) + (mode-immediate-irx (irregex "^#")) + (mode-direct-irx (irregex "^\\$")) + (mode-indirect-A-irx (irregex "^\\*")) + (mode-indirect-B-irx (irregex "^@")) + (mode-pre-indirect-A-irx (irregex "^\\{")) + (mode-pre-indirect-B-irx (irregex "^<")) + (mode-post-indirect-A-irx (irregex "^\\}")) + (mode-post-indirect-B-irx (irregex "^>")) + (number-irx (irregex "^(\\+|-)?[0-9]+"))) + (define (accept-token irx . rest) + (let ((wsmatch (irregex-search whitespace-irx (substring str idx)))) + (if wsmatch + (set! idx (+ idx (irregex-match-end-index wsmatch))))) ;Skip leading whitespace + (let ((mandatory (and (= (length rest) 1) (car rest))) + (res (irregex-search irx (substring str idx)))) + (if res + (begin + (set! idx (+ idx (irregex-match-end-index res))) + (irregex-match-substring res)) + (if mandatory + (error "Unexpected token at input string index" idx) + #f)))) + (define (load-file) + (accept-token redcode-irx #t) + (let loop ((instrs '()) + (offset 0) + (name '())) + (let ((this-line (line))) + (if this-line + (case (car this-line) + ((name) (loop instrs offset (cdr this-line))) + ((comment) (loop instrs offset name)) + ((org) (loop instrs (cdr this-line) name)) + ((instr) (loop (cons (cdr this-line) instrs) offset name))) + (make-prog name (reverse instrs) offset))))) + (define (line) + (or (name-line) + (comment-line) + (org-line) + (instruction-line))) + (define (name-line) + (if (accept-token name-start-irx) + (cons 'name (string->symbol (accept-token name-irx #t))) + #f)) + (define (comment-line) + (if (accept-token comment-irx) + '(comment) + #f)) + (define (org-line) + (if (accept-token org-irx) + (cons 'org (string->number (accept-token number-irx #t))) + #f)) + (define (instruction-line) + (let ((oc (opcode))) + (if oc + (let ((x (accept-token period-irx #t)) + (modif (modifier)) + (A-mode (mode)) + (A-num (accept-token number-irx #t)) + (y (accept-token comma-irx #t)) + (B-mode (mode)) + (B-num (accept-token number-irx #t)) + (z (accept-token comment-irx #t))) + (cons 'instr (make-instr oc modif A-mode A-num B-mode B-num))) + #f))) + (define (opcode) + (let ((res (or (accept-token opcode-DAT-irx) + (accept-token opcode-MOV-irx) + (accept-token opcode-ADD-irx) + (accept-token opcode-SUB-irx) + (accept-token opcode-MUL-irx) + (accept-token opcode-DIV-irx) + (accept-token opcode-MOD-irx) + (accept-token opcode-JMP-irx) + (accept-token opcode-JMZ-irx) + (accept-token opcode-JMN-irx) + (accept-token opcode-DJN-irx) + (accept-token opcode-CMP-irx) + (accept-token opcode-SEQ-irx) + (accept-token opcode-SNE-irx) + (accept-token opcode-SLT-irx) + (accept-token opcode-SPL-irx) + (accept-token opcode-NOP-irx)))) + (if res (string->symbol res) #f))) + (define (modifier) + (string->symbol + (or (accept-token modifier-AB-irx) + (accept-token modifier-BA-irx) + (accept-token modifier-A-irx) + (accept-token modifier-B-irx) + (accept-token modifier-F-irx) + (accept-token modifier-X-irx) + (accept-token modifier-I-irx)))) + (define (mode) + (or (mode-immediate) + (mode-direct) + (mode-indirect-A) + (mode-indirect-B) + (mode-pre-indirect-A) + (mode-pre-indirect-B) + (mode-post-indirect-A) + (mode-post-indirect-B))) + (define (mode-immediate) + (and (accept-token mode-immediate-irx) + 'immediate)) + (define (mode-direct) + (and (accept-token mode-direct-irx) + 'direct)) + (define (mode-indirect-A) + (and (accept-token mode-indirect-A-irx) + 'indirect-A)) + (define (mode-indirect-B) + (and (accept-token mode-indirect-B-irx) + 'indirect-B)) + (define (mode-pre-indirect-A) + (and (accept-token mode-pre-indirect-A-irx) + 'pre-indirect-A)) + (define (mode-pre-indirect-B) + (and (accept-token mode-pre-indirect-B-irx) + 'pre-indirect-B)) + (define (mode-post-indirect-A) + (and (accept-token mode-post-indirect-A-irx) + 'post-indirect-A)) + (define (mode-post-indirect-B) + (and (accept-token mode-post-indirect-B-irx) + 'post-indirect-B)) + (load-file))))