X-Git-Url: https://thelambdalab.xyz/gitweb/index.cgi?p=ez.git;a=blobdiff_plain;f=ez.el;h=af348a982f49420db40b407bda96f4819116140c;hp=f4f698ecd33ef8f18b5830bee982a3fbf41f80ea;hb=HEAD;hpb=7f9ce877293321348e0ce0eaf20162199b39e2ad diff --git a/ez.el b/ez.el index f4f698e..a6f1f9c 100644 --- a/ez.el +++ b/ez.el @@ -93,12 +93,24 @@ (defun ez-mem-ref-byte (addr) (aref ez-memory addr)) +(defun ez-mem-ref-bytes (addr count) + (mapcar (lambda (offset) (ez-mem-ref-byte (+ addr offset))) + (number-sequence 0 (- count 1)))) + (defun ez-mem-set-byte (addr val) (aset ez-memory addr val)) +(defun ez-mem-set-bytes (addr vals) + (dotimes (i (length vals)) + (ez-mem-set-byte (+ addr i) (elt vals i)))) + (defun ez-mem-ref-word (addr) - (+ (* 256 (aref ez-memory addr)) - (aref ez-memory (+ addr 1)))) + (logior (lsh (aref ez-memory addr) 8) + (aref ez-memory (+ addr 1)))) + +(defun ez-mem-ref-words (addr count) + (mapcar (lambda (offset) (ez-mem-ref-word (+ addr offset))) + (number-sequence 0 (- (* 2 count) 1) 2))) (defun ez-mem-set-word (addr val) (let ((byte-high (/ val 256)) @@ -106,6 +118,9 @@ (aset ez-memory addr byte-high) (aset ez-memory (+ addr 1) byte-low))) +(defun ez-mem-ref-string (addr1 addr2) + (substring ez-memory addr1 addr2)) + (defun ez-parse-header () (setq ez-version (ez-mem-ref-byte #x0)) (setq ez-himem-addr (ez-mem-ref-word #x4)) @@ -280,6 +295,12 @@ (logand (lsh word -5) #b11111) (logand word #b11111))) +(defun ez-make-zstring-word (zchar-triple) + (let ((c1 (elt zchar-triple 0)) + (c2 (elt zchar-triple 1)) + (c3 (elz zchar-triple 2))) + (logand ))) + (defun ez-get-zstring-chars-and-length (base-addr) (let ((addr base-addr) (chars nil) @@ -328,7 +349,8 @@ (defun ez-encode (string) "Encodes a string as a 2-word 5-padded Z-string. -Used for matching input with dictionary entries when tokenizing." +Used for matching input with dictionary entries when tokenizing. +Returns a length-2 list of words for comparison with a dictionary entry." (let ((chars (mapcan (lambda (c) @@ -342,13 +364,28 @@ Used for matching input with dictionary entries when tokenizing." (t (list 5 6 (lsh c -5) (logand c #b11111))))) string))) - (if (< (length chars) 6) - (append chars (make-list (- 6 (length chars)) 5)) - (take 6 chars)))) + (let* ((zstring-chars + (if (< (length chars) 6) + (append chars (make-list (- 6 (length chars)) 5)) + (take 6 chars))) + (c1 (elt zstring-chars 0)) + (c2 (elt zstring-chars 1)) + (c3 (elt zstring-chars 2)) + (c4 (elt zstring-chars 3)) + (c5 (elt zstring-chars 4)) + (c6 (elt zstring-chars 5))) + (list + (logior (lsh c1 10) + (lsh c2 5) + c3) + (logior (lsh 1 15) + (lsh c4 10) + (lsh c5 5) + c6))))) ;; Dictionary -(defun ez-parse-dictionary () +(defun ez-parse-dictionary-header () (let* ((nseps (ez-mem-ref-byte ez-dict-base)) (separators (mapcar (lambda (i) (ez-mem-ref-byte (+ ez-dict-base i))) @@ -357,14 +394,41 @@ Used for matching input with dictionary entries when tokenizing." (nentries (ez-mem-ref-word (+ ez-dict-base 2 nseps))) (entries-base (+ ez-dict-base nseps 4)) (entries nil)) - (dotimes (i nentries) - (let ((this-base (+ entries-base (* 7 i)))) - (setq entries (cons (cons (ez-get-zstring this-base) - this-base) - entries)))) - (setq ez-dict-entries (reverse entries)) + ;; (dotimes (i nentries) + ;; (let ((this-base (+ entries-base (* bytes-per-entry i)))) + ;; (setq entries (cons (cons (ez-get-zstring this-base) + ;; this-base) + ;; entries)))) + ;; (setq ez-dict-entries (reverse entries)) (setq ez-dict-separators separators))) +(defun ez-is-separator (char) + (let* ((nseps (ez-mem-ref-byte ez-dict-base))) + (while (and (> nseps 0) + (not (= (ez-mem-ref-byte (+ ez-dict-base nseps)) + char))) + (setq nseps (- nseps 1))) + (> nseps 0))) + +(defun ez-lookup-dictionary (text) + (let* ((encoded-text (ez-encode text)) + (nseps (ez-mem-ref-byte ez-dict-base)) + (bytes-per-entry (ez-mem-ref-byte (+ ez-dict-base 1 nseps))) + (nentries (ez-mem-ref-word (+ ez-dict-base 2 nseps))) + (entries-seen 0) + (this-entry (+ ez-dict-base nseps 4))) + + (while (and (< entries-seen nentries) + (not (equal + (ez-mem-ref-words this-entry 2) + encoded-text))) + (setq entries-seen (+ entries-seen 1)) + (setq this-entry (+ this-entry bytes-per-entry))) + + (if (< entries-seen nentries) + this-entry + 0))) + ;; Call stack (defvar ez-call-stack nil) @@ -895,31 +959,47 @@ Used for matching input with dictionary entries when tokenizing." (defun ez-op-read2 (input-string) (let* ((baddr1 (car ez--next-read-args)) - (baddr2 (cadr ez--next-read-args)) - (dict (ez-get-dictionary)) - (separators (car dict)) - (wordlist (cdr dict)) - (token-start 0)) + (baddr2 (cadr ez--next-read-args))) (dotimes (i (length input-string)) (let ((char (elt input-string i))) - (ez-mem-set-byte (+ baddr1 1 i) char) + (ez-mem-set-byte (+ baddr1 1 i) char))) (ez-mem-set-byte (+ baddr1 1 (length input-string)) 0) - (ez--tokenize baddr1 baddr2))))) + (ez--tokenize baddr1 baddr2)) + 'run) -(defun ez--tokenize (taddr baddr) +(defun ez--tokenize (tb-baddr pb-baddr) (let ((unfinished t) (token-start 0) (token-end 0) - (token-string "")) + (token-count 0)) + (while unfinished - (let ((char (ez-mem-ref-byte (+ taddr 1 token-end)))) + (let ((next-char (ez-mem-ref-byte (+ tb-baddr 1 token-end)))) (cond - ((eq char ?\s)) - ((memq char ez-dict-separators)) - ) + ((eq next-char ?\s) + ;; Add token + (setq token-end (- token-end 1)) + (let* ((text (ez-mem-ref-string (+ tb-baddr 1 token-start) + (+ tb-baddr 1 token-end))) + (dict-entry (ez-lookup-dictionary text))) + (setq token-count (+ token-count 1)) + (ez-mem-set-word (+ pb-baddr 2 (* token-count 4)) + dict-entry) + (ez-mem-set-bytes (+ pb-baddr 2 (* token-count 4) 2) + (length text) + token-start)) + (setq token-start (+ token-end 1)) + (setq token-end token-start)) + + ((ez-is-separator char) + ;; Add token and separator token + ) + ((eq char 0) + (setq unfinished nil)) + (setq token-end (+ token-end 1))) )))) ;; Execution loop @@ -927,7 +1007,6 @@ Used for matching input with dictionary entries when tokenizing." (defun ez-load-and-run (filename) (ez-load-file filename) (ez-parse-header) - (ez-parse-dictionary) (setq ez-call-stack (list (ez-make-call-stack-frame ez-start-pc))) (ez-run))