Files
gulie/test/test-tokenizer.scm
2026-04-01 23:35:50 +02:00

128 lines
4.0 KiB
Scheme

;;; Tests for (gulie tokenizer)
(use-modules (srfi srfi-1)
(srfi srfi-64)
(ice-9 rdelim)
(gulie tokenizer))
(test-begin "tokenizer")
;;; Roundtrip invariant — the most critical test
(test-group "roundtrip"
(define (roundtrip-ok? input)
(let* ((tokens (tokenize input "test.scm"))
(result (string-concatenate (map token-text tokens))))
(string=? input result)))
(test-assert "empty input"
(roundtrip-ok? ""))
(test-assert "simple expression"
(roundtrip-ok? "(define x 42)"))
(test-assert "nested expressions"
(roundtrip-ok? "(define (foo x)\n (+ x 1))\n"))
(test-assert "string with escapes"
(roundtrip-ok? "(define s \"hello \\\"world\\\"\")"))
(test-assert "line comment"
(roundtrip-ok? ";; a comment\n(define x 1)\n"))
(test-assert "block comment"
(roundtrip-ok? "#| block\ncomment |#\n(define x 1)"))
(test-assert "nested block comment"
(roundtrip-ok? "#| outer #| inner |# outer |#"))
(test-assert "datum comment"
(roundtrip-ok? "#;(skip this) (keep this)"))
(test-assert "character literals"
(roundtrip-ok? "(list #\\space #\\newline #\\a #\\x41)"))
(test-assert "keywords"
(roundtrip-ok? "(foo #:bar #:baz)"))
(test-assert "booleans"
(roundtrip-ok? "(list #t #f #true #false)"))
(test-assert "vectors"
(roundtrip-ok? "#(1 2 3)"))
(test-assert "quasiquote and unquote"
(roundtrip-ok? "`(a ,b ,@c)"))
(test-assert "syntax shorthands"
(roundtrip-ok? "#'x #`x #,x #,@x"))
(test-assert "dot notation"
(roundtrip-ok? "(a . b)"))
(test-assert "numbers"
(roundtrip-ok? "(+ 1 2.5 -3 +4 1/3 #xff)"))
(test-assert "square brackets"
(roundtrip-ok? "(let ([x 1] [y 2]) (+ x y))"))
(test-assert "multiline with mixed content"
(roundtrip-ok? "(define-module (foo bar)
#:use-module (ice-9 format)
#:export (baz))
;;; Section header
(define (baz x)
;; body comment
(format #t \"value: ~a\\n\" x))
"))
;; Real-world file roundtrip
(test-assert "real guile source file"
(let ((text (call-with-input-file "refs/guile/module/ice-9/pretty-print.scm"
(lambda (port)
(let lp ((acc '()))
(let ((ch (read-char port)))
(if (eof-object? ch)
(list->string (reverse acc))
(lp (cons ch acc)))))))))
(roundtrip-ok? text))))
;;; Token type classification
(test-group "token-types"
(define (first-token-type input)
(token-type (car (tokenize input "test.scm"))))
(test-eq "symbol" 'symbol (first-token-type "foo"))
(test-eq "number" 'number (first-token-type "42"))
(test-eq "string" 'string (first-token-type "\"hello\""))
(test-eq "open-paren" 'open-paren (first-token-type "("))
(test-eq "close-paren" 'close-paren (first-token-type ")"))
(test-eq "boolean-true" 'boolean (first-token-type "#t"))
(test-eq "boolean-false" 'boolean (first-token-type "#f"))
(test-eq "keyword" 'keyword (first-token-type "#:foo"))
(test-eq "character" 'character (first-token-type "#\\a"))
(test-eq "line-comment" 'line-comment (first-token-type ";; hi"))
(test-eq "prefix-quote" 'prefix (first-token-type "'"))
(test-eq "prefix-quasiquote" 'prefix (first-token-type "`"))
(test-eq "dot" 'dot (first-token-type ". "))
(test-eq "newline" 'newline (first-token-type "\n")))
;;; Source location tracking
(test-group "source-locations"
(let ((tokens (tokenize "(define\n x\n 42)" "test.scm")))
(test-equal "first token line" 1 (token-line (car tokens)))
(test-equal "first token col" 0 (token-column (car tokens)))
;; Find 'x' token
(let ((x-tok (find (lambda (t) (and (eq? (token-type t) 'symbol)
(string=? (token-text t) "x")))
tokens)))
(test-assert "found x token" x-tok)
(test-equal "x on line 2" 2 (token-line x-tok))
(test-equal "x at column 2" 2 (token-column x-tok)))))
(test-end "tokenizer")