;;; Tests for (gulie tokenizer) (use-modules (srfi srfi-1) (srfi srfi-64) (ice-9 rdelim) (gulie tokenizer)) (test-begin "tokenizer") ;;; Roundtrip invariant — the most critical test (test-group "roundtrip" (define (roundtrip-ok? input) (let* ((tokens (tokenize input "test.scm")) (result (string-concatenate (map token-text tokens)))) (string=? input result))) (test-assert "empty input" (roundtrip-ok? "")) (test-assert "simple expression" (roundtrip-ok? "(define x 42)")) (test-assert "nested expressions" (roundtrip-ok? "(define (foo x)\n (+ x 1))\n")) (test-assert "string with escapes" (roundtrip-ok? "(define s \"hello \\\"world\\\"\")")) (test-assert "line comment" (roundtrip-ok? ";; a comment\n(define x 1)\n")) (test-assert "block comment" (roundtrip-ok? "#| block\ncomment |#\n(define x 1)")) (test-assert "nested block comment" (roundtrip-ok? "#| outer #| inner |# outer |#")) (test-assert "datum comment" (roundtrip-ok? "#;(skip this) (keep this)")) (test-assert "character literals" (roundtrip-ok? "(list #\\space #\\newline #\\a #\\x41)")) (test-assert "keywords" (roundtrip-ok? "(foo #:bar #:baz)")) (test-assert "booleans" (roundtrip-ok? "(list #t #f #true #false)")) (test-assert "vectors" (roundtrip-ok? "#(1 2 3)")) (test-assert "quasiquote and unquote" (roundtrip-ok? "`(a ,b ,@c)")) (test-assert "syntax shorthands" (roundtrip-ok? "#'x #`x #,x #,@x")) (test-assert "dot notation" (roundtrip-ok? "(a . b)")) (test-assert "numbers" (roundtrip-ok? "(+ 1 2.5 -3 +4 1/3 #xff)")) (test-assert "square brackets" (roundtrip-ok? "(let ([x 1] [y 2]) (+ x y))")) (test-assert "multiline with mixed content" (roundtrip-ok? "(define-module (foo bar) #:use-module (ice-9 format) #:export (baz)) ;;; Section header (define (baz x) ;; body comment (format #t \"value: ~a\\n\" x)) ")) ;; Real-world file roundtrip (test-assert "real guile source file" (let ((text (call-with-input-file "refs/guile/module/ice-9/pretty-print.scm" (lambda (port) (let lp ((acc '())) (let ((ch (read-char port))) (if (eof-object? ch) (list->string (reverse acc)) (lp (cons ch acc))))))))) (roundtrip-ok? text)))) ;;; Token type classification (test-group "token-types" (define (first-token-type input) (token-type (car (tokenize input "test.scm")))) (test-eq "symbol" 'symbol (first-token-type "foo")) (test-eq "number" 'number (first-token-type "42")) (test-eq "string" 'string (first-token-type "\"hello\"")) (test-eq "open-paren" 'open-paren (first-token-type "(")) (test-eq "close-paren" 'close-paren (first-token-type ")")) (test-eq "boolean-true" 'boolean (first-token-type "#t")) (test-eq "boolean-false" 'boolean (first-token-type "#f")) (test-eq "keyword" 'keyword (first-token-type "#:foo")) (test-eq "character" 'character (first-token-type "#\\a")) (test-eq "line-comment" 'line-comment (first-token-type ";; hi")) (test-eq "prefix-quote" 'prefix (first-token-type "'")) (test-eq "prefix-quasiquote" 'prefix (first-token-type "`")) (test-eq "dot" 'dot (first-token-type ". ")) (test-eq "newline" 'newline (first-token-type "\n"))) ;;; Source location tracking (test-group "source-locations" (let ((tokens (tokenize "(define\n x\n 42)" "test.scm"))) (test-equal "first token line" 1 (token-line (car tokens))) (test-equal "first token col" 0 (token-column (car tokens))) ;; Find 'x' token (let ((x-tok (find (lambda (t) (and (eq? (token-type t) 'symbol) (string=? (token-text t) "x"))) tokens))) (test-assert "found x token" x-tok) (test-equal "x on line 2" 2 (token-line x-tok)) (test-equal "x at column 2" 2 (token-column x-tok))))) (test-end "tokenizer")