Make surface rules token-aware

This commit is contained in:
2026-04-04 16:11:46 +02:00
parent bd89fb476e
commit 8488cab0ac
4 changed files with 197 additions and 98 deletions

View File

@@ -9,6 +9,7 @@
#:use-module (ice-9 textual-ports) #:use-module (ice-9 textual-ports)
#:use-module (ice-9 threads) #:use-module (ice-9 threads)
#:use-module (srfi srfi-1) #:use-module (srfi srfi-1)
#:use-module (gulie tokenizer)
#:use-module (gulie diagnostic) #:use-module (gulie diagnostic)
#:use-module (gulie rule) #:use-module (gulie rule)
#:use-module (gulie suppression) #:use-module (gulie suppression)
@@ -97,7 +98,47 @@ This applies to semantic diagnostics as well as surface diagnostics."
(string-every char-whitespace? line-text)) (string-every char-whitespace? line-text))
(not (page-break-line? line-text)))) (not (page-break-line? line-text))))
(define (run-line-rules file lines config) (define (build-line-token-segments tokens line-count)
"Build a vector mapping line numbers to token segments.
Each segment is a list: (TYPE START-COL END-COL TEXT)."
(let ((segments (make-vector (1+ line-count) '())))
(define (add-segment! line type start end text)
(when (and (<= 1 line line-count)
(< start end))
(vector-set! segments line
(cons (list type start end text)
(vector-ref segments line)))))
(for-each
(lambda (tok)
(let ((type (token-type tok))
(text (token-text tok)))
(let ((len (string-length text)))
(let lp ((i 0)
(line (token-line tok))
(col (token-column tok))
(segment-start-i 0)
(segment-start-col (token-column tok)))
(if (= i len)
(when (< segment-start-i i)
(add-segment! line type segment-start-col col
(substring text segment-start-i i)))
(let ((ch (string-ref text i)))
(if (char=? ch #\newline)
(begin
(when (< segment-start-i i)
(add-segment! line type segment-start-col col
(substring text segment-start-i i)))
(lp (1+ i) (1+ line) 0 (1+ i) 0))
(lp (1+ i) line (1+ col)
segment-start-i segment-start-col))))))))
tokens)
(let lp ((i 1))
(when (<= i line-count)
(vector-set! segments i (reverse (vector-ref segments i)))
(lp (1+ i))))
segments))
(define (run-line-rules file lines config line-token-segments)
"Run all line-type rules against LINES. Returns list of diagnostics." "Run all line-type rules against LINES. Returns list of diagnostics."
(let ((line-rules (filter-rules-by-config (rules-of-type 'line) config)) (let ((line-rules (filter-rules-by-config (rules-of-type 'line) config))
(diagnostics '()) (diagnostics '())
@@ -106,12 +147,17 @@ This applies to semantic diagnostics as well as surface diagnostics."
(let lp ((remaining lines) (line-num 1)) (let lp ((remaining lines) (line-num 1))
(when (not (null? remaining)) (when (not (null? remaining))
(let* ((line-text (car remaining)) (let* ((line-text (car remaining))
(is-blank (surface-blank-line? line-text))) (is-blank (surface-blank-line? line-text))
(line-segments (if (and line-token-segments
(< line-num (vector-length line-token-segments)))
(vector-ref line-token-segments line-num)
'())))
(set! consecutive-blanks (set! consecutive-blanks
(if is-blank (1+ consecutive-blanks) 0)) (if is-blank (1+ consecutive-blanks) 0))
(let ((augmented-config (let ((augmented-config
(cons (cons '%consecutive-blanks consecutive-blanks) (cons (cons '%line-token-segments line-segments)
config))) (cons (cons '%consecutive-blanks consecutive-blanks)
config))))
(for-each (for-each
(lambda (rule) (lambda (rule)
(let ((results ((rule-check-proc rule) (let ((results ((rule-check-proc rule)
@@ -139,25 +185,27 @@ and '%min-severity with value 'error, 'warning, or 'info (default)."
(lines (string-split text #\newline)) (lines (string-split text #\newline))
(pass (or (assq-ref config '%pass) 'all)) (pass (or (assq-ref config '%pass) 'all))
(min-severity (or (assq-ref config '%min-severity) 'info)) (min-severity (or (assq-ref config '%min-severity) 'info))
(tok-mod (and (memq pass '(all surface))
(resolve-module '(gulie tokenizer) #:ensure #f)))
(tokenize (and tok-mod (module-ref tok-mod 'tokenize)))
(tokens (and tokenize (tokenize text file)))
(line-token-segments (and tokens
(build-line-token-segments tokens (length lines))))
(diagnostics '())) (diagnostics '()))
;; Pass 1: line-based surface rules ;; Pass 1: line-based surface rules
(when (memq pass '(all surface)) (when (memq pass '(all surface))
(set! diagnostics (append (run-line-rules file lines config) (set! diagnostics (append (run-line-rules file lines config line-token-segments)
diagnostics)) diagnostics))
;; Pass 1b: CST rules — only tokenize if there are active CST rules ;; Pass 1b: CST rules — only parse if there are active CST rules
(let ((cst-rules (filter-rules-by-config (rules-of-type 'cst) config))) (let ((cst-rules (filter-rules-by-config (rules-of-type 'cst) config)))
(when (not (null? cst-rules)) (when (and (not (null? cst-rules)) tokens)
(let ((tok-mod (resolve-module '(gulie tokenizer) #:ensure #f))) (let ((cst-mod (resolve-module '(gulie cst) #:ensure #f)))
(when tok-mod (when cst-mod
(let ((tokenize (module-ref tok-mod 'tokenize)) (let* ((parse-cst (module-ref cst-mod 'parse-cst))
(cst-mod (resolve-module '(gulie cst) #:ensure #f))) (cst (parse-cst tokens)))
(when cst-mod (set! diagnostics
(let* ((parse-cst (module-ref cst-mod 'parse-cst)) (append (run-cst-rules file cst config)
(tokens (tokenize text file)) diagnostics))))))))
(cst (parse-cst tokens)))
(set! diagnostics
(append (run-cst-rules file cst config)
diagnostics))))))))))
;; Pass 2: semantic rules (if compiler module is loaded) ;; Pass 2: semantic rules (if compiler module is loaded)
(when (memq pass '(all semantic)) (when (memq pass '(all semantic))
(let ((comp-mod (resolve-module '(gulie compiler) #:ensure #f))) (let ((comp-mod (resolve-module '(gulie compiler) #:ensure #f)))

View File

@@ -7,6 +7,7 @@
;;; ;;;; — file headers ;;; ;;;; — file headers
(define-module (gulie rules comments) (define-module (gulie rules comments)
#:use-module (srfi srfi-1)
#:use-module (gulie rule) #:use-module (gulie rule)
#:use-module (gulie diagnostic)) #:use-module (gulie diagnostic))
@@ -26,58 +27,75 @@
((char-whitespace? (string-ref line-text i)) (lp (1+ i))) ((char-whitespace? (string-ref line-text i)) (lp (1+ i)))
(else #f)))) (else #f))))
(define (line-comment-segment config)
(find (lambda (seg)
(eq? (car seg) 'line-comment))
(or (assq-ref config '%line-token-segments) '())))
(register-rule! (register-rule!
(make-rule (make-rule
'comment-semicolons 'comment-semicolons
"Check comment semicolon count follows conventions" "Check comment semicolon count follows conventions"
'info 'style 'line 'info 'style 'line
(lambda (file line-num line-text config) (lambda (file line-num line-text config)
(let ((pos (string-index line-text #\;))) (define (check-comment pos comment-text)
(if (not pos) (let* ((semis (count-semicolons comment-text))
'() (own-line? (comment-only-line? line-text pos)))
;; Check if the semicolon is inside a string (rough heuristic: (cond
;; count quotes before the semicolon position) ;; Inline comment (after code) should use single ;
(let ((quotes-before (let lp ((i 0) (count 0) (in-escape #f)) ;; But we don't enforce this strictly — just flag ;;; or more inline
(cond ((and (not own-line?) (>= semis 3))
((>= i pos) count) (let ((fixed (string-append
((and (not in-escape) (char=? (string-ref line-text i) #\\)) (substring line-text 0 pos)
(lp (1+ i) count #t)) ";;"
((and (not in-escape) (char=? (string-ref line-text i) #\")) (substring line-text (+ pos semis)))))
(lp (1+ i) (1+ count) #f)) (list (make-diagnostic
(else (lp (1+ i) count #f)))))) file line-num pos
(if (odd? quotes-before) 'info 'comment-semicolons
;; Inside a string — not a real comment "inline comments should use ; or ;; not ;;;"
(make-fix 'replace-line line-num 0
line-num (string-length line-text)
fixed)))))
;; Own-line comment with single ; (should be ;;)
((and own-line? (= semis 1) (> (string-length comment-text) 1)
(not (char=? (string-ref comment-text 1) #\!)))
(let ((fixed (string-append
(substring line-text 0 pos)
";;"
(substring line-text (1+ pos)))))
(list (make-diagnostic
file line-num pos
'info 'comment-semicolons
"line comments should use ;; not ;"
(make-fix 'replace-line line-num 0
line-num (string-length line-text)
fixed)))))
(else '()))))
(let* ((segments (or (assq-ref config '%line-token-segments) '()))
(segment (line-comment-segment config))
(pos (and segment (cadr segment)))
(comment-text (and segment (list-ref segment 3))))
(cond
(segment
(check-comment pos comment-text))
((pair? segments)
'())
(else
;; Fallback when token context is unavailable.
(let ((pos (string-index line-text #\;)))
(if (not pos)
'() '()
(let* ((semis (count-semicolons (substring line-text pos))) ;; Check if the semicolon is inside a string (rough heuristic:
(own-line? (comment-only-line? line-text pos))) ;; count quotes before the semicolon position)
(cond (let ((quotes-before (let lp ((i 0) (count 0) (in-escape #f))
;; Inline comment (after code) should use single ; (cond
;; But we don't enforce this strictly — just flag ;;; or more inline ((>= i pos) count)
((and (not own-line?) (>= semis 3)) ((and (not in-escape) (char=? (string-ref line-text i) #\\))
(let ((fixed (string-append (lp (1+ i) count #t))
(substring line-text 0 pos) ((and (not in-escape) (char=? (string-ref line-text i) #\"))
";;" (lp (1+ i) (1+ count) #f))
(substring line-text (+ pos semis))))) (else (lp (1+ i) count #f))))))
(list (make-diagnostic (if (odd? quotes-before)
file line-num pos '()
'info 'comment-semicolons (check-comment pos (substring line-text pos))))))))))
"inline comments should use ; or ;; not ;;;"
(make-fix 'replace-line line-num 0
line-num (string-length line-text)
fixed)))))
;; Own-line comment with single ; (should be ;;)
((and own-line? (= semis 1) (> (string-length line-text) (1+ pos))
(not (char=? (string-ref line-text (1+ pos)) #\!)))
(let ((fixed (string-append
(substring line-text 0 pos)
";;"
(substring line-text (1+ pos)))))
(list (make-diagnostic
file line-num pos
'info 'comment-semicolons
"line comments should use ;; not ;"
(make-fix 'replace-line line-num 0
line-num (string-length line-text)
fixed)))))
(else '()))))))))
#f)) #f))

View File

@@ -4,6 +4,7 @@
;;; just the file path, line number, and line content. ;;; just the file path, line number, and line content.
(define-module (gulie rules surface) (define-module (gulie rules surface)
#:use-module (srfi srfi-1)
#:use-module (gulie rule) #:use-module (gulie rule)
#:use-module (gulie diagnostic)) #:use-module (gulie diagnostic))
@@ -33,7 +34,27 @@
(+ col (- 8 (modulo col 8))) (+ col (- 8 (modulo col 8)))
(1+ col))) (1+ col)))
(define (find-tab-in-code line-text) (define (line-token-segments config)
(or (assq-ref config '%line-token-segments) '()))
(define (segment-type seg) (list-ref seg 0))
(define (segment-start seg) (list-ref seg 1))
(define (segment-end seg) (list-ref seg 2))
(define (segment-covers-column? seg col)
(and (<= (segment-start seg) col)
(< col (segment-end seg))))
(define (excluded-tab-segment? seg)
(memq (segment-type seg) '(string line-comment block-comment)))
(define (tab-in-excluded-segment? col segments)
(any (lambda (seg)
(and (excluded-tab-segment? seg)
(segment-covers-column? seg col)))
segments))
(define (find-tab-in-code-fallback line-text)
"Return the column of the first tab outside strings and comments, or #f." "Return the column of the first tab outside strings and comments, or #f."
(let ((len (string-length line-text))) (let ((len (string-length line-text)))
(let lp ((i 0) (in-string? #f) (escaped? #f)) (let lp ((i 0) (in-string? #f) (escaped? #f))
@@ -60,44 +81,34 @@
(else (else
(lp (1+ i) #f #f)))))))) (lp (1+ i) #f #f))))))))
(define (expand-tabs-in-code line-text) (define (find-tab-in-code line-text config)
(let ((segments (line-token-segments config)))
(if (null? segments)
(find-tab-in-code-fallback line-text)
(let lp ((i 0))
(cond
((= i (string-length line-text)) #f)
((and (char=? (string-ref line-text i) #\tab)
(not (tab-in-excluded-segment? i segments)))
i)
(else (lp (1+ i))))))))
(define (expand-tabs-in-code line-text config)
"Expand tabs outside strings and comments, preserving other tabs as-is." "Expand tabs outside strings and comments, preserving other tabs as-is."
(let ((len (string-length line-text))) (let ((segments (line-token-segments config))
(let lp ((i 0) (col 0) (in-string? #f) (escaped? #f) (len (string-length line-text)))
(in-comment? #f) (acc '())) (let lp ((i 0) (col 0) (acc '()))
(if (= i len) (if (= i len)
(list->string (reverse acc)) (list->string (reverse acc))
(let ((ch (string-ref line-text i))) (let ((ch (string-ref line-text i)))
(cond (cond
(in-comment? ((and (char=? ch #\tab)
(lp (1+ i) (advance-column col ch) #f #f #t (not (tab-in-excluded-segment? i segments)))
(cons ch acc)))
(in-string?
(cond
(escaped?
(lp (1+ i) (advance-column col ch) #t #f #f
(cons ch acc)))
((char=? ch #\\)
(lp (1+ i) (advance-column col ch) #t #t #f
(cons ch acc)))
((char=? ch #\")
(lp (1+ i) (advance-column col ch) #f #f #f
(cons ch acc)))
(else
(lp (1+ i) (advance-column col ch) #t #f #f
(cons ch acc)))))
((char=? ch #\;)
(lp (1+ i) (advance-column col ch) #f #f #t
(cons ch acc)))
((char=? ch #\")
(lp (1+ i) (advance-column col ch) #t #f #f
(cons ch acc)))
((char=? ch #\tab)
(let ((spaces (- 8 (modulo col 8)))) (let ((spaces (- 8 (modulo col 8))))
(lp (1+ i) (+ col spaces) #f #f #f (lp (1+ i) (+ col spaces)
(append (make-list spaces #\space) acc)))) (append (make-list spaces #\space) acc))))
(else (else
(lp (1+ i) (advance-column col ch) #f #f #f (lp (1+ i) (advance-column col ch)
(cons ch acc))))))))) (cons ch acc)))))))))
;;; trailing-whitespace — trailing spaces or tabs at end of line ;;; trailing-whitespace — trailing spaces or tabs at end of line
@@ -154,9 +165,9 @@
"Tab character found in source" "Tab character found in source"
'warning 'format 'line 'warning 'format 'line
(lambda (file line-num line-text config) (lambda (file line-num line-text config)
(let ((pos (find-tab-in-code line-text))) (let ((pos (find-tab-in-code line-text config)))
(if pos (if pos
(let ((fixed (string-trim-right (expand-tabs-in-code line-text)))) (let ((fixed (string-trim-right (expand-tabs-in-code line-text config))))
(list (make-diagnostic (list (make-diagnostic
file line-num pos file line-num pos
'warning 'no-tabs 'warning 'no-tabs

View File

@@ -140,6 +140,28 @@
diags)))) diags))))
(delete-file tmp))) (delete-file tmp)))
(test-group "lint-file-token-aware-no-tabs"
(let ((tmp (tmpnam)))
(call-with-output-file tmp
(lambda (p)
(display "#| block\tcomment |#\n" p)))
(let ((diags (lint-file tmp '((%pass . surface)))))
(test-assert "tab in block comment ignored"
(not (any (lambda (d) (eq? (diagnostic-rule d) 'no-tabs))
diags))))
(delete-file tmp)))
(test-group "lint-file-token-aware-comment-semantics"
(let ((tmp (tmpnam)))
(call-with-output-file tmp
(lambda (p)
(display "(display \"hello\n; not a comment\nworld\")\n" p)))
(let ((diags (lint-file tmp '((%pass . surface)))))
(test-assert "semicolon inside multiline string ignored"
(not (any (lambda (d) (eq? (diagnostic-rule d) 'comment-semicolons))
diags))))
(delete-file tmp)))
;;; Fix mode ;;; Fix mode
(test-group "fix-trailing-whitespace" (test-group "fix-trailing-whitespace"