Make surface rules token-aware
This commit is contained in:
@@ -9,6 +9,7 @@
|
||||
#:use-module (ice-9 textual-ports)
|
||||
#:use-module (ice-9 threads)
|
||||
#:use-module (srfi srfi-1)
|
||||
#:use-module (gulie tokenizer)
|
||||
#:use-module (gulie diagnostic)
|
||||
#:use-module (gulie rule)
|
||||
#:use-module (gulie suppression)
|
||||
@@ -97,7 +98,47 @@ This applies to semantic diagnostics as well as surface diagnostics."
|
||||
(string-every char-whitespace? line-text))
|
||||
(not (page-break-line? line-text))))
|
||||
|
||||
(define (run-line-rules file lines config)
|
||||
(define (build-line-token-segments tokens line-count)
|
||||
"Build a vector mapping line numbers to token segments.
|
||||
Each segment is a list: (TYPE START-COL END-COL TEXT)."
|
||||
(let ((segments (make-vector (1+ line-count) '())))
|
||||
(define (add-segment! line type start end text)
|
||||
(when (and (<= 1 line line-count)
|
||||
(< start end))
|
||||
(vector-set! segments line
|
||||
(cons (list type start end text)
|
||||
(vector-ref segments line)))))
|
||||
(for-each
|
||||
(lambda (tok)
|
||||
(let ((type (token-type tok))
|
||||
(text (token-text tok)))
|
||||
(let ((len (string-length text)))
|
||||
(let lp ((i 0)
|
||||
(line (token-line tok))
|
||||
(col (token-column tok))
|
||||
(segment-start-i 0)
|
||||
(segment-start-col (token-column tok)))
|
||||
(if (= i len)
|
||||
(when (< segment-start-i i)
|
||||
(add-segment! line type segment-start-col col
|
||||
(substring text segment-start-i i)))
|
||||
(let ((ch (string-ref text i)))
|
||||
(if (char=? ch #\newline)
|
||||
(begin
|
||||
(when (< segment-start-i i)
|
||||
(add-segment! line type segment-start-col col
|
||||
(substring text segment-start-i i)))
|
||||
(lp (1+ i) (1+ line) 0 (1+ i) 0))
|
||||
(lp (1+ i) line (1+ col)
|
||||
segment-start-i segment-start-col))))))))
|
||||
tokens)
|
||||
(let lp ((i 1))
|
||||
(when (<= i line-count)
|
||||
(vector-set! segments i (reverse (vector-ref segments i)))
|
||||
(lp (1+ i))))
|
||||
segments))
|
||||
|
||||
(define (run-line-rules file lines config line-token-segments)
|
||||
"Run all line-type rules against LINES. Returns list of diagnostics."
|
||||
(let ((line-rules (filter-rules-by-config (rules-of-type 'line) config))
|
||||
(diagnostics '())
|
||||
@@ -106,12 +147,17 @@ This applies to semantic diagnostics as well as surface diagnostics."
|
||||
(let lp ((remaining lines) (line-num 1))
|
||||
(when (not (null? remaining))
|
||||
(let* ((line-text (car remaining))
|
||||
(is-blank (surface-blank-line? line-text)))
|
||||
(is-blank (surface-blank-line? line-text))
|
||||
(line-segments (if (and line-token-segments
|
||||
(< line-num (vector-length line-token-segments)))
|
||||
(vector-ref line-token-segments line-num)
|
||||
'())))
|
||||
(set! consecutive-blanks
|
||||
(if is-blank (1+ consecutive-blanks) 0))
|
||||
(let ((augmented-config
|
||||
(cons (cons '%line-token-segments line-segments)
|
||||
(cons (cons '%consecutive-blanks consecutive-blanks)
|
||||
config)))
|
||||
config))))
|
||||
(for-each
|
||||
(lambda (rule)
|
||||
(let ((results ((rule-check-proc rule)
|
||||
@@ -139,25 +185,27 @@ and '%min-severity with value 'error, 'warning, or 'info (default)."
|
||||
(lines (string-split text #\newline))
|
||||
(pass (or (assq-ref config '%pass) 'all))
|
||||
(min-severity (or (assq-ref config '%min-severity) 'info))
|
||||
(tok-mod (and (memq pass '(all surface))
|
||||
(resolve-module '(gulie tokenizer) #:ensure #f)))
|
||||
(tokenize (and tok-mod (module-ref tok-mod 'tokenize)))
|
||||
(tokens (and tokenize (tokenize text file)))
|
||||
(line-token-segments (and tokens
|
||||
(build-line-token-segments tokens (length lines))))
|
||||
(diagnostics '()))
|
||||
;; Pass 1: line-based surface rules
|
||||
(when (memq pass '(all surface))
|
||||
(set! diagnostics (append (run-line-rules file lines config)
|
||||
(set! diagnostics (append (run-line-rules file lines config line-token-segments)
|
||||
diagnostics))
|
||||
;; Pass 1b: CST rules — only tokenize if there are active CST rules
|
||||
;; Pass 1b: CST rules — only parse if there are active CST rules
|
||||
(let ((cst-rules (filter-rules-by-config (rules-of-type 'cst) config)))
|
||||
(when (not (null? cst-rules))
|
||||
(let ((tok-mod (resolve-module '(gulie tokenizer) #:ensure #f)))
|
||||
(when tok-mod
|
||||
(let ((tokenize (module-ref tok-mod 'tokenize))
|
||||
(cst-mod (resolve-module '(gulie cst) #:ensure #f)))
|
||||
(when (and (not (null? cst-rules)) tokens)
|
||||
(let ((cst-mod (resolve-module '(gulie cst) #:ensure #f)))
|
||||
(when cst-mod
|
||||
(let* ((parse-cst (module-ref cst-mod 'parse-cst))
|
||||
(tokens (tokenize text file))
|
||||
(cst (parse-cst tokens)))
|
||||
(set! diagnostics
|
||||
(append (run-cst-rules file cst config)
|
||||
diagnostics))))))))))
|
||||
diagnostics))))))))
|
||||
;; Pass 2: semantic rules (if compiler module is loaded)
|
||||
(when (memq pass '(all semantic))
|
||||
(let ((comp-mod (resolve-module '(gulie compiler) #:ensure #f)))
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
;;; ;;;; — file headers
|
||||
|
||||
(define-module (gulie rules comments)
|
||||
#:use-module (srfi srfi-1)
|
||||
#:use-module (gulie rule)
|
||||
#:use-module (gulie diagnostic))
|
||||
|
||||
@@ -26,29 +27,19 @@
|
||||
((char-whitespace? (string-ref line-text i)) (lp (1+ i)))
|
||||
(else #f))))
|
||||
|
||||
(define (line-comment-segment config)
|
||||
(find (lambda (seg)
|
||||
(eq? (car seg) 'line-comment))
|
||||
(or (assq-ref config '%line-token-segments) '())))
|
||||
|
||||
(register-rule!
|
||||
(make-rule
|
||||
'comment-semicolons
|
||||
"Check comment semicolon count follows conventions"
|
||||
'info 'style 'line
|
||||
(lambda (file line-num line-text config)
|
||||
(let ((pos (string-index line-text #\;)))
|
||||
(if (not pos)
|
||||
'()
|
||||
;; Check if the semicolon is inside a string (rough heuristic:
|
||||
;; count quotes before the semicolon position)
|
||||
(let ((quotes-before (let lp ((i 0) (count 0) (in-escape #f))
|
||||
(cond
|
||||
((>= i pos) count)
|
||||
((and (not in-escape) (char=? (string-ref line-text i) #\\))
|
||||
(lp (1+ i) count #t))
|
||||
((and (not in-escape) (char=? (string-ref line-text i) #\"))
|
||||
(lp (1+ i) (1+ count) #f))
|
||||
(else (lp (1+ i) count #f))))))
|
||||
(if (odd? quotes-before)
|
||||
;; Inside a string — not a real comment
|
||||
'()
|
||||
(let* ((semis (count-semicolons (substring line-text pos)))
|
||||
(define (check-comment pos comment-text)
|
||||
(let* ((semis (count-semicolons comment-text))
|
||||
(own-line? (comment-only-line? line-text pos)))
|
||||
(cond
|
||||
;; Inline comment (after code) should use single ;
|
||||
@@ -66,8 +57,8 @@
|
||||
line-num (string-length line-text)
|
||||
fixed)))))
|
||||
;; Own-line comment with single ; (should be ;;)
|
||||
((and own-line? (= semis 1) (> (string-length line-text) (1+ pos))
|
||||
(not (char=? (string-ref line-text (1+ pos)) #\!)))
|
||||
((and own-line? (= semis 1) (> (string-length comment-text) 1)
|
||||
(not (char=? (string-ref comment-text 1) #\!)))
|
||||
(let ((fixed (string-append
|
||||
(substring line-text 0 pos)
|
||||
";;"
|
||||
@@ -79,5 +70,32 @@
|
||||
(make-fix 'replace-line line-num 0
|
||||
line-num (string-length line-text)
|
||||
fixed)))))
|
||||
(else '()))))))))
|
||||
(else '()))))
|
||||
(let* ((segments (or (assq-ref config '%line-token-segments) '()))
|
||||
(segment (line-comment-segment config))
|
||||
(pos (and segment (cadr segment)))
|
||||
(comment-text (and segment (list-ref segment 3))))
|
||||
(cond
|
||||
(segment
|
||||
(check-comment pos comment-text))
|
||||
((pair? segments)
|
||||
'())
|
||||
(else
|
||||
;; Fallback when token context is unavailable.
|
||||
(let ((pos (string-index line-text #\;)))
|
||||
(if (not pos)
|
||||
'()
|
||||
;; Check if the semicolon is inside a string (rough heuristic:
|
||||
;; count quotes before the semicolon position)
|
||||
(let ((quotes-before (let lp ((i 0) (count 0) (in-escape #f))
|
||||
(cond
|
||||
((>= i pos) count)
|
||||
((and (not in-escape) (char=? (string-ref line-text i) #\\))
|
||||
(lp (1+ i) count #t))
|
||||
((and (not in-escape) (char=? (string-ref line-text i) #\"))
|
||||
(lp (1+ i) (1+ count) #f))
|
||||
(else (lp (1+ i) count #f))))))
|
||||
(if (odd? quotes-before)
|
||||
'()
|
||||
(check-comment pos (substring line-text pos))))))))))
|
||||
#f))
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
;;; just the file path, line number, and line content.
|
||||
|
||||
(define-module (gulie rules surface)
|
||||
#:use-module (srfi srfi-1)
|
||||
#:use-module (gulie rule)
|
||||
#:use-module (gulie diagnostic))
|
||||
|
||||
@@ -33,7 +34,27 @@
|
||||
(+ col (- 8 (modulo col 8)))
|
||||
(1+ col)))
|
||||
|
||||
(define (find-tab-in-code line-text)
|
||||
(define (line-token-segments config)
|
||||
(or (assq-ref config '%line-token-segments) '()))
|
||||
|
||||
(define (segment-type seg) (list-ref seg 0))
|
||||
(define (segment-start seg) (list-ref seg 1))
|
||||
(define (segment-end seg) (list-ref seg 2))
|
||||
|
||||
(define (segment-covers-column? seg col)
|
||||
(and (<= (segment-start seg) col)
|
||||
(< col (segment-end seg))))
|
||||
|
||||
(define (excluded-tab-segment? seg)
|
||||
(memq (segment-type seg) '(string line-comment block-comment)))
|
||||
|
||||
(define (tab-in-excluded-segment? col segments)
|
||||
(any (lambda (seg)
|
||||
(and (excluded-tab-segment? seg)
|
||||
(segment-covers-column? seg col)))
|
||||
segments))
|
||||
|
||||
(define (find-tab-in-code-fallback line-text)
|
||||
"Return the column of the first tab outside strings and comments, or #f."
|
||||
(let ((len (string-length line-text)))
|
||||
(let lp ((i 0) (in-string? #f) (escaped? #f))
|
||||
@@ -60,44 +81,34 @@
|
||||
(else
|
||||
(lp (1+ i) #f #f))))))))
|
||||
|
||||
(define (expand-tabs-in-code line-text)
|
||||
(define (find-tab-in-code line-text config)
|
||||
(let ((segments (line-token-segments config)))
|
||||
(if (null? segments)
|
||||
(find-tab-in-code-fallback line-text)
|
||||
(let lp ((i 0))
|
||||
(cond
|
||||
((= i (string-length line-text)) #f)
|
||||
((and (char=? (string-ref line-text i) #\tab)
|
||||
(not (tab-in-excluded-segment? i segments)))
|
||||
i)
|
||||
(else (lp (1+ i))))))))
|
||||
|
||||
(define (expand-tabs-in-code line-text config)
|
||||
"Expand tabs outside strings and comments, preserving other tabs as-is."
|
||||
(let ((len (string-length line-text)))
|
||||
(let lp ((i 0) (col 0) (in-string? #f) (escaped? #f)
|
||||
(in-comment? #f) (acc '()))
|
||||
(let ((segments (line-token-segments config))
|
||||
(len (string-length line-text)))
|
||||
(let lp ((i 0) (col 0) (acc '()))
|
||||
(if (= i len)
|
||||
(list->string (reverse acc))
|
||||
(let ((ch (string-ref line-text i)))
|
||||
(cond
|
||||
(in-comment?
|
||||
(lp (1+ i) (advance-column col ch) #f #f #t
|
||||
(cons ch acc)))
|
||||
(in-string?
|
||||
(cond
|
||||
(escaped?
|
||||
(lp (1+ i) (advance-column col ch) #t #f #f
|
||||
(cons ch acc)))
|
||||
((char=? ch #\\)
|
||||
(lp (1+ i) (advance-column col ch) #t #t #f
|
||||
(cons ch acc)))
|
||||
((char=? ch #\")
|
||||
(lp (1+ i) (advance-column col ch) #f #f #f
|
||||
(cons ch acc)))
|
||||
(else
|
||||
(lp (1+ i) (advance-column col ch) #t #f #f
|
||||
(cons ch acc)))))
|
||||
((char=? ch #\;)
|
||||
(lp (1+ i) (advance-column col ch) #f #f #t
|
||||
(cons ch acc)))
|
||||
((char=? ch #\")
|
||||
(lp (1+ i) (advance-column col ch) #t #f #f
|
||||
(cons ch acc)))
|
||||
((char=? ch #\tab)
|
||||
((and (char=? ch #\tab)
|
||||
(not (tab-in-excluded-segment? i segments)))
|
||||
(let ((spaces (- 8 (modulo col 8))))
|
||||
(lp (1+ i) (+ col spaces) #f #f #f
|
||||
(lp (1+ i) (+ col spaces)
|
||||
(append (make-list spaces #\space) acc))))
|
||||
(else
|
||||
(lp (1+ i) (advance-column col ch) #f #f #f
|
||||
(lp (1+ i) (advance-column col ch)
|
||||
(cons ch acc)))))))))
|
||||
|
||||
;;; trailing-whitespace — trailing spaces or tabs at end of line
|
||||
@@ -154,9 +165,9 @@
|
||||
"Tab character found in source"
|
||||
'warning 'format 'line
|
||||
(lambda (file line-num line-text config)
|
||||
(let ((pos (find-tab-in-code line-text)))
|
||||
(let ((pos (find-tab-in-code line-text config)))
|
||||
(if pos
|
||||
(let ((fixed (string-trim-right (expand-tabs-in-code line-text))))
|
||||
(let ((fixed (string-trim-right (expand-tabs-in-code line-text config))))
|
||||
(list (make-diagnostic
|
||||
file line-num pos
|
||||
'warning 'no-tabs
|
||||
|
||||
@@ -140,6 +140,28 @@
|
||||
diags))))
|
||||
(delete-file tmp)))
|
||||
|
||||
(test-group "lint-file-token-aware-no-tabs"
|
||||
(let ((tmp (tmpnam)))
|
||||
(call-with-output-file tmp
|
||||
(lambda (p)
|
||||
(display "#| block\tcomment |#\n" p)))
|
||||
(let ((diags (lint-file tmp '((%pass . surface)))))
|
||||
(test-assert "tab in block comment ignored"
|
||||
(not (any (lambda (d) (eq? (diagnostic-rule d) 'no-tabs))
|
||||
diags))))
|
||||
(delete-file tmp)))
|
||||
|
||||
(test-group "lint-file-token-aware-comment-semantics"
|
||||
(let ((tmp (tmpnam)))
|
||||
(call-with-output-file tmp
|
||||
(lambda (p)
|
||||
(display "(display \"hello\n; not a comment\nworld\")\n" p)))
|
||||
(let ((diags (lint-file tmp '((%pass . surface)))))
|
||||
(test-assert "semicolon inside multiline string ignored"
|
||||
(not (any (lambda (d) (eq? (diagnostic-rule d) 'comment-semicolons))
|
||||
diags))))
|
||||
(delete-file tmp)))
|
||||
|
||||
;;; Fix mode
|
||||
|
||||
(test-group "fix-trailing-whitespace"
|
||||
|
||||
Reference in New Issue
Block a user