+(defun ez-encode (string)
+ "Encodes a string as a 2-word 5-padded Z-string.
+Used for matching input with dictionary entries when tokenizing.
+Returns a length-2 list of words for comparison with a dictionary entry."
+ (let ((chars
+ (mapcan
+ (lambda (c)
+ (cond
+ ((seq-contains-p (elt ez-zstring-alphabets 0) c)
+ (list (seq-position (elt ez-zstring-alphabets 0) c)))
+ ((seq-contains-p (elt ez-zstring-alphabets 1) c)
+ (list 4 (seq-position (elt ez-zstring-alphabets 1) c)))
+ ((seq-contains-p (elt ez-zstring-alphabets 2) c)
+ (list 5 (seq-position (elt ez-zstring-alphabets 2) c)))
+ (t
+ (list 5 6 (lsh c -5) (logand c #b11111)))))
+ string)))
+ (let* ((zstring-chars
+ (if (< (length chars) 6)
+ (append chars (make-list (- 6 (length chars)) 5))
+ (take 6 chars)))
+ (c1 (elt zstring-chars 0))
+ (c2 (elt zstring-chars 1))
+ (c3 (elt zstring-chars 2))
+ (c4 (elt zstring-chars 3))
+ (c5 (elt zstring-chars 4))
+ (c6 (elt zstring-chars 5)))
+ (list
+ (logior (lsh c1 10)
+ (lsh c2 5)
+ c3)
+ (logior (lsh 1 15)
+ (lsh c4 10)
+ (lsh c5 5)
+ c6)))))
+
+;; Dictionary
+
+(defun ez-parse-dictionary-header ()
+ (let* ((nseps (ez-mem-ref-byte ez-dict-base))
+ (separators
+ (mapcar (lambda (i) (ez-mem-ref-byte (+ ez-dict-base i)))
+ (number-sequence 1 nseps)))
+ (bytes-per-entry (ez-mem-ref-byte (+ ez-dict-base 1 nseps)))
+ (nentries (ez-mem-ref-word (+ ez-dict-base 2 nseps)))
+ (entries-base (+ ez-dict-base nseps 4))
+ (entries nil))
+ ;; (dotimes (i nentries)
+ ;; (let ((this-base (+ entries-base (* bytes-per-entry i))))
+ ;; (setq entries (cons (cons (ez-get-zstring this-base)
+ ;; this-base)
+ ;; entries))))
+ ;; (setq ez-dict-entries (reverse entries))
+ (setq ez-dict-separators separators)))
+
+(defun ez-is-separator (char)
+ (let* ((nseps (ez-mem-ref-byte ez-dict-base)))
+ (while (and (> nseps 0)
+ (not (= (ez-mem-ref-byte (+ ez-dict-base nseps))
+ char)))
+ (setq nseps (- nseps 1)))
+ (> nseps 0)))
+
+(defun ez-lookup-dictionary (text)
+ (let* ((encoded-text (ez-encode text))
+ (nseps (ez-mem-ref-byte ez-dict-base))
+ (bytes-per-entry (ez-mem-ref-byte (+ ez-dict-base 1 nseps)))
+ (nentries (ez-mem-ref-word (+ ez-dict-base 2 nseps)))
+ (entries-seen 0)
+ (this-entry (+ ez-dict-base nseps 4)))
+
+ (while (and (< entries-seen nentries)
+ (not (equal
+ (ez-mem-ref-words this-entry 2)
+ encoded-text)))
+ (setq entries-seen (+ entries-seen 1))
+ (setq this-entry (+ this-entry bytes-per-entry)))
+
+ (if (< entries-seen nentries)
+ this-entry
+ 0)))