First iteration
This commit is contained in:
127
test/test-tokenizer.scm
Normal file
127
test/test-tokenizer.scm
Normal file
@@ -0,0 +1,127 @@
|
||||
;;; Tests for (gulie tokenizer)
|
||||
|
||||
(use-modules (srfi srfi-1)
|
||||
(srfi srfi-64)
|
||||
(ice-9 rdelim)
|
||||
(gulie tokenizer))
|
||||
|
||||
(test-begin "tokenizer")
|
||||
|
||||
;;; Roundtrip invariant — the most critical test
|
||||
|
||||
(test-group "roundtrip"
|
||||
(define (roundtrip-ok? input)
|
||||
(let* ((tokens (tokenize input "test.scm"))
|
||||
(result (string-concatenate (map token-text tokens))))
|
||||
(string=? input result)))
|
||||
|
||||
(test-assert "empty input"
|
||||
(roundtrip-ok? ""))
|
||||
|
||||
(test-assert "simple expression"
|
||||
(roundtrip-ok? "(define x 42)"))
|
||||
|
||||
(test-assert "nested expressions"
|
||||
(roundtrip-ok? "(define (foo x)\n (+ x 1))\n"))
|
||||
|
||||
(test-assert "string with escapes"
|
||||
(roundtrip-ok? "(define s \"hello \\\"world\\\"\")"))
|
||||
|
||||
(test-assert "line comment"
|
||||
(roundtrip-ok? ";; a comment\n(define x 1)\n"))
|
||||
|
||||
(test-assert "block comment"
|
||||
(roundtrip-ok? "#| block\ncomment |#\n(define x 1)"))
|
||||
|
||||
(test-assert "nested block comment"
|
||||
(roundtrip-ok? "#| outer #| inner |# outer |#"))
|
||||
|
||||
(test-assert "datum comment"
|
||||
(roundtrip-ok? "#;(skip this) (keep this)"))
|
||||
|
||||
(test-assert "character literals"
|
||||
(roundtrip-ok? "(list #\\space #\\newline #\\a #\\x41)"))
|
||||
|
||||
(test-assert "keywords"
|
||||
(roundtrip-ok? "(foo #:bar #:baz)"))
|
||||
|
||||
(test-assert "booleans"
|
||||
(roundtrip-ok? "(list #t #f #true #false)"))
|
||||
|
||||
(test-assert "vectors"
|
||||
(roundtrip-ok? "#(1 2 3)"))
|
||||
|
||||
(test-assert "quasiquote and unquote"
|
||||
(roundtrip-ok? "`(a ,b ,@c)"))
|
||||
|
||||
(test-assert "syntax shorthands"
|
||||
(roundtrip-ok? "#'x #`x #,x #,@x"))
|
||||
|
||||
(test-assert "dot notation"
|
||||
(roundtrip-ok? "(a . b)"))
|
||||
|
||||
(test-assert "numbers"
|
||||
(roundtrip-ok? "(+ 1 2.5 -3 +4 1/3 #xff)"))
|
||||
|
||||
(test-assert "square brackets"
|
||||
(roundtrip-ok? "(let ([x 1] [y 2]) (+ x y))"))
|
||||
|
||||
(test-assert "multiline with mixed content"
|
||||
(roundtrip-ok? "(define-module (foo bar)
|
||||
#:use-module (ice-9 format)
|
||||
#:export (baz))
|
||||
|
||||
;;; Section header
|
||||
|
||||
(define (baz x)
|
||||
;; body comment
|
||||
(format #t \"value: ~a\\n\" x))
|
||||
"))
|
||||
|
||||
;; Real-world file roundtrip
|
||||
(test-assert "real guile source file"
|
||||
(let ((text (call-with-input-file "refs/guile/module/ice-9/pretty-print.scm"
|
||||
(lambda (port)
|
||||
(let lp ((acc '()))
|
||||
(let ((ch (read-char port)))
|
||||
(if (eof-object? ch)
|
||||
(list->string (reverse acc))
|
||||
(lp (cons ch acc)))))))))
|
||||
(roundtrip-ok? text))))
|
||||
|
||||
;;; Token type classification
|
||||
|
||||
(test-group "token-types"
|
||||
(define (first-token-type input)
|
||||
(token-type (car (tokenize input "test.scm"))))
|
||||
|
||||
(test-eq "symbol" 'symbol (first-token-type "foo"))
|
||||
(test-eq "number" 'number (first-token-type "42"))
|
||||
(test-eq "string" 'string (first-token-type "\"hello\""))
|
||||
(test-eq "open-paren" 'open-paren (first-token-type "("))
|
||||
(test-eq "close-paren" 'close-paren (first-token-type ")"))
|
||||
(test-eq "boolean-true" 'boolean (first-token-type "#t"))
|
||||
(test-eq "boolean-false" 'boolean (first-token-type "#f"))
|
||||
(test-eq "keyword" 'keyword (first-token-type "#:foo"))
|
||||
(test-eq "character" 'character (first-token-type "#\\a"))
|
||||
(test-eq "line-comment" 'line-comment (first-token-type ";; hi"))
|
||||
(test-eq "prefix-quote" 'prefix (first-token-type "'"))
|
||||
(test-eq "prefix-quasiquote" 'prefix (first-token-type "`"))
|
||||
(test-eq "dot" 'dot (first-token-type ". "))
|
||||
(test-eq "newline" 'newline (first-token-type "\n")))
|
||||
|
||||
;;; Source location tracking
|
||||
|
||||
(test-group "source-locations"
|
||||
(let ((tokens (tokenize "(define\n x\n 42)" "test.scm")))
|
||||
(test-equal "first token line" 1 (token-line (car tokens)))
|
||||
(test-equal "first token col" 0 (token-column (car tokens)))
|
||||
;; Find 'x' token
|
||||
(let ((x-tok (find (lambda (t) (and (eq? (token-type t) 'symbol)
|
||||
(string=? (token-text t) "x")))
|
||||
tokens)))
|
||||
(test-assert "found x token" x-tok)
|
||||
(test-equal "x on line 2" 2 (token-line x-tok))
|
||||
(test-equal "x at column 2" 2 (token-column x-tok)))))
|
||||
|
||||
(test-end "tokenizer")
|
||||
Reference in New Issue
Block a user