(module parser (string->prog) (import scheme (chicken base) (chicken irregex) (chicken io) (chicken string) srfi-13 mars) (define (string->prog str) (let ((idx 0) (l (string-length str)) (whitespace-irx (irregex "^[ \t]+")) (newline-irx (irregex "^\n")) (comma-irx (irregex "^,")) (period-irx (irregex "^\\.")) (redcode-irx (irregex "^;redcode\n")) (name-start-irx (irregex "^;[ \t]*name ")) (name-irx (irregex "^[^\n]*")) (author-start-irx (irregex "^;[ \t]*author ")) (author-irx (irregex "^[^\n]*")) (comment-irx (irregex "^(;[^\n]*)?\n")) (org-irx (irregex "^ORG")) (opcode-DAT-irx (irregex "^DAT")) (opcode-MOV-irx (irregex "^MOV")) (opcode-ADD-irx (irregex "^ADD")) (opcode-SUB-irx (irregex "^SUB")) (opcode-MUL-irx (irregex "^MUL")) (opcode-DIV-irx (irregex "^DIV")) (opcode-MOD-irx (irregex "^MOD")) (opcode-JMP-irx (irregex "^JMP")) (opcode-JMZ-irx (irregex "^JMZ")) (opcode-JMN-irx (irregex "^JMN")) (opcode-DMN-irx (irregex "^JMN")) (opcode-DJN-irx (irregex "^DJN")) (opcode-CMP-irx (irregex "^CMP")) (opcode-SEQ-irx (irregex "^SEQ")) (opcode-SNE-irx (irregex "^SNE")) (opcode-SLT-irx (irregex "^SLT")) (opcode-SPL-irx (irregex "^SPL")) (opcode-NOP-irx (irregex "^NOP")) (modifier-A-irx (irregex "^A")) (modifier-B-irx (irregex "^B")) (modifier-AB-irx (irregex "^AB")) (modifier-BA-irx (irregex "^BA")) (modifier-F-irx (irregex "^F")) (modifier-X-irx (irregex "^X")) (modifier-I-irx (irregex "^I")) (mode-immediate-irx (irregex "^#")) (mode-direct-irx (irregex "^\\$")) (mode-indirect-A-irx (irregex "^\\*")) (mode-indirect-B-irx (irregex "^@")) (mode-pre-indirect-A-irx (irregex "^\\{")) (mode-pre-indirect-B-irx (irregex "^<")) (mode-post-indirect-A-irx (irregex "^\\}")) (mode-post-indirect-B-irx (irregex "^>")) (number-irx (irregex "^(\\+|-)?[0-9]+"))) (define (accept-token irx . rest) (let ((wsmatch (irregex-search whitespace-irx (substring str idx)))) (if wsmatch (set! idx (+ idx (irregex-match-end-index wsmatch))))) ;Skip leading whitespace (let ((mandatory (and (= (length rest) 1) (car rest))) (res (irregex-search irx (substring str idx)))) (if res (begin (set! idx (+ idx (irregex-match-end-index res))) (irregex-match-substring res)) (if mandatory (error "Unexpected token at input string index" idx) #f)))) (define (load-file) (accept-token redcode-irx #t) (let loop ((instrs '()) (offset 0) (name '()) (author '())) (let ((this-line (line))) (if this-line (case (car this-line) ((name) (loop instrs offset (cdr this-line) author)) ((author) (loop instrs offset name (cdr this-line))) ((comment) (loop instrs offset name author)) ((org) (loop instrs (cdr this-line) name author)) ((instr) (loop (cons (cdr this-line) instrs) offset name author))) (make-prog name author (reverse instrs) offset))))) (define (line) (or (name-line) (author-line) (comment-line) (org-line) (instruction-line))) (define (name-line) (if (accept-token name-start-irx) (cons 'name (string-trim (accept-token name-irx #t))) #f)) (define (author-line) (if (accept-token author-start-irx) (cons 'author (string-trim (accept-token author-irx #t))) #f)) (define (comment-line) (if (accept-token comment-irx) '(comment) #f)) (define (org-line) (if (accept-token org-irx) (cons 'org (string->number (accept-token number-irx #t))) #f)) (define (instruction-line) (let ((oc (opcode))) (if oc (let ((x (accept-token period-irx #t)) (modif (modifier)) (A-mode (mode)) (A-num (string->number (accept-token number-irx #t))) (y (accept-token comma-irx #t)) (B-mode (mode)) (B-num (string->number (accept-token number-irx #t))) (z (accept-token comment-irx #t))) (cons 'instr (make-instr oc modif A-mode A-num B-mode B-num))) #f))) (define (opcode) (let ((res (or (accept-token opcode-DAT-irx) (accept-token opcode-MOV-irx) (accept-token opcode-ADD-irx) (accept-token opcode-SUB-irx) (accept-token opcode-MUL-irx) (accept-token opcode-DIV-irx) (accept-token opcode-MOD-irx) (accept-token opcode-JMP-irx) (accept-token opcode-JMZ-irx) (accept-token opcode-JMN-irx) (accept-token opcode-DJN-irx) (accept-token opcode-CMP-irx) (accept-token opcode-SEQ-irx) (accept-token opcode-SNE-irx) (accept-token opcode-SLT-irx) (accept-token opcode-SPL-irx) (accept-token opcode-NOP-irx)))) (if res (string->symbol res) #f))) (define (modifier) (string->symbol (or (accept-token modifier-AB-irx) (accept-token modifier-BA-irx) (accept-token modifier-A-irx) (accept-token modifier-B-irx) (accept-token modifier-F-irx) (accept-token modifier-X-irx) (accept-token modifier-I-irx)))) (define (mode) (or (mode-immediate) (mode-direct) (mode-indirect-A) (mode-indirect-B) (mode-pre-indirect-A) (mode-pre-indirect-B) (mode-post-indirect-A) (mode-post-indirect-B))) (define (mode-immediate) (and (accept-token mode-immediate-irx) 'immediate)) (define (mode-direct) (and (accept-token mode-direct-irx) 'direct)) (define (mode-indirect-A) (and (accept-token mode-indirect-A-irx) 'indirect-A)) (define (mode-indirect-B) (and (accept-token mode-indirect-B-irx) 'indirect-B)) (define (mode-pre-indirect-A) (and (accept-token mode-pre-indirect-A-irx) 'pre-indirect-A)) (define (mode-pre-indirect-B) (and (accept-token mode-pre-indirect-B-irx) 'pre-indirect-B)) (define (mode-post-indirect-A) (and (accept-token mode-post-indirect-A-irx) 'post-indirect-A)) (define (mode-post-indirect-B) (and (accept-token mode-post-indirect-B-irx) 'post-indirect-B)) (load-file))))