diff --git a/gulie/engine.scm b/gulie/engine.scm index 668b5e7..4f471fc 100644 --- a/gulie/engine.scm +++ b/gulie/engine.scm @@ -9,6 +9,7 @@ #:use-module (ice-9 textual-ports) #:use-module (ice-9 threads) #:use-module (srfi srfi-1) + #:use-module (gulie tokenizer) #:use-module (gulie diagnostic) #:use-module (gulie rule) #:use-module (gulie suppression) @@ -97,7 +98,47 @@ This applies to semantic diagnostics as well as surface diagnostics." (string-every char-whitespace? line-text)) (not (page-break-line? line-text)))) -(define (run-line-rules file lines config) +(define (build-line-token-segments tokens line-count) + "Build a vector mapping line numbers to token segments. +Each segment is a list: (TYPE START-COL END-COL TEXT)." + (let ((segments (make-vector (1+ line-count) '()))) + (define (add-segment! line type start end text) + (when (and (<= 1 line line-count) + (< start end)) + (vector-set! segments line + (cons (list type start end text) + (vector-ref segments line))))) + (for-each + (lambda (tok) + (let ((type (token-type tok)) + (text (token-text tok))) + (let ((len (string-length text))) + (let lp ((i 0) + (line (token-line tok)) + (col (token-column tok)) + (segment-start-i 0) + (segment-start-col (token-column tok))) + (if (= i len) + (when (< segment-start-i i) + (add-segment! line type segment-start-col col + (substring text segment-start-i i))) + (let ((ch (string-ref text i))) + (if (char=? ch #\newline) + (begin + (when (< segment-start-i i) + (add-segment! line type segment-start-col col + (substring text segment-start-i i))) + (lp (1+ i) (1+ line) 0 (1+ i) 0)) + (lp (1+ i) line (1+ col) + segment-start-i segment-start-col)))))))) + tokens) + (let lp ((i 1)) + (when (<= i line-count) + (vector-set! segments i (reverse (vector-ref segments i))) + (lp (1+ i)))) + segments)) + +(define (run-line-rules file lines config line-token-segments) "Run all line-type rules against LINES. Returns list of diagnostics." (let ((line-rules (filter-rules-by-config (rules-of-type 'line) config)) (diagnostics '()) @@ -106,12 +147,17 @@ This applies to semantic diagnostics as well as surface diagnostics." (let lp ((remaining lines) (line-num 1)) (when (not (null? remaining)) (let* ((line-text (car remaining)) - (is-blank (surface-blank-line? line-text))) + (is-blank (surface-blank-line? line-text)) + (line-segments (if (and line-token-segments + (< line-num (vector-length line-token-segments))) + (vector-ref line-token-segments line-num) + '()))) (set! consecutive-blanks (if is-blank (1+ consecutive-blanks) 0)) (let ((augmented-config - (cons (cons '%consecutive-blanks consecutive-blanks) - config))) + (cons (cons '%line-token-segments line-segments) + (cons (cons '%consecutive-blanks consecutive-blanks) + config)))) (for-each (lambda (rule) (let ((results ((rule-check-proc rule) @@ -139,25 +185,27 @@ and '%min-severity with value 'error, 'warning, or 'info (default)." (lines (string-split text #\newline)) (pass (or (assq-ref config '%pass) 'all)) (min-severity (or (assq-ref config '%min-severity) 'info)) + (tok-mod (and (memq pass '(all surface)) + (resolve-module '(gulie tokenizer) #:ensure #f))) + (tokenize (and tok-mod (module-ref tok-mod 'tokenize))) + (tokens (and tokenize (tokenize text file))) + (line-token-segments (and tokens + (build-line-token-segments tokens (length lines)))) (diagnostics '())) ;; Pass 1: line-based surface rules (when (memq pass '(all surface)) - (set! diagnostics (append (run-line-rules file lines config) + (set! diagnostics (append (run-line-rules file lines config line-token-segments) diagnostics)) - ;; Pass 1b: CST rules — only tokenize if there are active CST rules + ;; Pass 1b: CST rules — only parse if there are active CST rules (let ((cst-rules (filter-rules-by-config (rules-of-type 'cst) config))) - (when (not (null? cst-rules)) - (let ((tok-mod (resolve-module '(gulie tokenizer) #:ensure #f))) - (when tok-mod - (let ((tokenize (module-ref tok-mod 'tokenize)) - (cst-mod (resolve-module '(gulie cst) #:ensure #f))) - (when cst-mod - (let* ((parse-cst (module-ref cst-mod 'parse-cst)) - (tokens (tokenize text file)) - (cst (parse-cst tokens))) - (set! diagnostics - (append (run-cst-rules file cst config) - diagnostics)))))))))) + (when (and (not (null? cst-rules)) tokens) + (let ((cst-mod (resolve-module '(gulie cst) #:ensure #f))) + (when cst-mod + (let* ((parse-cst (module-ref cst-mod 'parse-cst)) + (cst (parse-cst tokens))) + (set! diagnostics + (append (run-cst-rules file cst config) + diagnostics)))))))) ;; Pass 2: semantic rules (if compiler module is loaded) (when (memq pass '(all semantic)) (let ((comp-mod (resolve-module '(gulie compiler) #:ensure #f))) diff --git a/gulie/rules/comments.scm b/gulie/rules/comments.scm index 233f18c..75ad15f 100644 --- a/gulie/rules/comments.scm +++ b/gulie/rules/comments.scm @@ -7,6 +7,7 @@ ;;; ;;;; — file headers (define-module (gulie rules comments) + #:use-module (srfi srfi-1) #:use-module (gulie rule) #:use-module (gulie diagnostic)) @@ -26,58 +27,75 @@ ((char-whitespace? (string-ref line-text i)) (lp (1+ i))) (else #f)))) +(define (line-comment-segment config) + (find (lambda (seg) + (eq? (car seg) 'line-comment)) + (or (assq-ref config '%line-token-segments) '()))) + (register-rule! (make-rule 'comment-semicolons "Check comment semicolon count follows conventions" 'info 'style 'line (lambda (file line-num line-text config) - (let ((pos (string-index line-text #\;))) - (if (not pos) - '() - ;; Check if the semicolon is inside a string (rough heuristic: - ;; count quotes before the semicolon position) - (let ((quotes-before (let lp ((i 0) (count 0) (in-escape #f)) - (cond - ((>= i pos) count) - ((and (not in-escape) (char=? (string-ref line-text i) #\\)) - (lp (1+ i) count #t)) - ((and (not in-escape) (char=? (string-ref line-text i) #\")) - (lp (1+ i) (1+ count) #f)) - (else (lp (1+ i) count #f)))))) - (if (odd? quotes-before) - ;; Inside a string — not a real comment + (define (check-comment pos comment-text) + (let* ((semis (count-semicolons comment-text)) + (own-line? (comment-only-line? line-text pos))) + (cond + ;; Inline comment (after code) should use single ; + ;; But we don't enforce this strictly — just flag ;;; or more inline + ((and (not own-line?) (>= semis 3)) + (let ((fixed (string-append + (substring line-text 0 pos) + ";;" + (substring line-text (+ pos semis))))) + (list (make-diagnostic + file line-num pos + 'info 'comment-semicolons + "inline comments should use ; or ;; not ;;;" + (make-fix 'replace-line line-num 0 + line-num (string-length line-text) + fixed))))) + ;; Own-line comment with single ; (should be ;;) + ((and own-line? (= semis 1) (> (string-length comment-text) 1) + (not (char=? (string-ref comment-text 1) #\!))) + (let ((fixed (string-append + (substring line-text 0 pos) + ";;" + (substring line-text (1+ pos))))) + (list (make-diagnostic + file line-num pos + 'info 'comment-semicolons + "line comments should use ;; not ;" + (make-fix 'replace-line line-num 0 + line-num (string-length line-text) + fixed))))) + (else '())))) + (let* ((segments (or (assq-ref config '%line-token-segments) '())) + (segment (line-comment-segment config)) + (pos (and segment (cadr segment))) + (comment-text (and segment (list-ref segment 3)))) + (cond + (segment + (check-comment pos comment-text)) + ((pair? segments) + '()) + (else + ;; Fallback when token context is unavailable. + (let ((pos (string-index line-text #\;))) + (if (not pos) '() - (let* ((semis (count-semicolons (substring line-text pos))) - (own-line? (comment-only-line? line-text pos))) - (cond - ;; Inline comment (after code) should use single ; - ;; But we don't enforce this strictly — just flag ;;; or more inline - ((and (not own-line?) (>= semis 3)) - (let ((fixed (string-append - (substring line-text 0 pos) - ";;" - (substring line-text (+ pos semis))))) - (list (make-diagnostic - file line-num pos - 'info 'comment-semicolons - "inline comments should use ; or ;; not ;;;" - (make-fix 'replace-line line-num 0 - line-num (string-length line-text) - fixed))))) - ;; Own-line comment with single ; (should be ;;) - ((and own-line? (= semis 1) (> (string-length line-text) (1+ pos)) - (not (char=? (string-ref line-text (1+ pos)) #\!))) - (let ((fixed (string-append - (substring line-text 0 pos) - ";;" - (substring line-text (1+ pos))))) - (list (make-diagnostic - file line-num pos - 'info 'comment-semicolons - "line comments should use ;; not ;" - (make-fix 'replace-line line-num 0 - line-num (string-length line-text) - fixed))))) - (else '())))))))) + ;; Check if the semicolon is inside a string (rough heuristic: + ;; count quotes before the semicolon position) + (let ((quotes-before (let lp ((i 0) (count 0) (in-escape #f)) + (cond + ((>= i pos) count) + ((and (not in-escape) (char=? (string-ref line-text i) #\\)) + (lp (1+ i) count #t)) + ((and (not in-escape) (char=? (string-ref line-text i) #\")) + (lp (1+ i) (1+ count) #f)) + (else (lp (1+ i) count #f)))))) + (if (odd? quotes-before) + '() + (check-comment pos (substring line-text pos)))))))))) #f)) diff --git a/gulie/rules/surface.scm b/gulie/rules/surface.scm index 60cb187..33b5f80 100644 --- a/gulie/rules/surface.scm +++ b/gulie/rules/surface.scm @@ -4,6 +4,7 @@ ;;; just the file path, line number, and line content. (define-module (gulie rules surface) + #:use-module (srfi srfi-1) #:use-module (gulie rule) #:use-module (gulie diagnostic)) @@ -33,7 +34,27 @@ (+ col (- 8 (modulo col 8))) (1+ col))) -(define (find-tab-in-code line-text) +(define (line-token-segments config) + (or (assq-ref config '%line-token-segments) '())) + +(define (segment-type seg) (list-ref seg 0)) +(define (segment-start seg) (list-ref seg 1)) +(define (segment-end seg) (list-ref seg 2)) + +(define (segment-covers-column? seg col) + (and (<= (segment-start seg) col) + (< col (segment-end seg)))) + +(define (excluded-tab-segment? seg) + (memq (segment-type seg) '(string line-comment block-comment))) + +(define (tab-in-excluded-segment? col segments) + (any (lambda (seg) + (and (excluded-tab-segment? seg) + (segment-covers-column? seg col))) + segments)) + +(define (find-tab-in-code-fallback line-text) "Return the column of the first tab outside strings and comments, or #f." (let ((len (string-length line-text))) (let lp ((i 0) (in-string? #f) (escaped? #f)) @@ -60,44 +81,34 @@ (else (lp (1+ i) #f #f)))))))) -(define (expand-tabs-in-code line-text) +(define (find-tab-in-code line-text config) + (let ((segments (line-token-segments config))) + (if (null? segments) + (find-tab-in-code-fallback line-text) + (let lp ((i 0)) + (cond + ((= i (string-length line-text)) #f) + ((and (char=? (string-ref line-text i) #\tab) + (not (tab-in-excluded-segment? i segments))) + i) + (else (lp (1+ i)))))))) + +(define (expand-tabs-in-code line-text config) "Expand tabs outside strings and comments, preserving other tabs as-is." - (let ((len (string-length line-text))) - (let lp ((i 0) (col 0) (in-string? #f) (escaped? #f) - (in-comment? #f) (acc '())) + (let ((segments (line-token-segments config)) + (len (string-length line-text))) + (let lp ((i 0) (col 0) (acc '())) (if (= i len) (list->string (reverse acc)) (let ((ch (string-ref line-text i))) (cond - (in-comment? - (lp (1+ i) (advance-column col ch) #f #f #t - (cons ch acc))) - (in-string? - (cond - (escaped? - (lp (1+ i) (advance-column col ch) #t #f #f - (cons ch acc))) - ((char=? ch #\\) - (lp (1+ i) (advance-column col ch) #t #t #f - (cons ch acc))) - ((char=? ch #\") - (lp (1+ i) (advance-column col ch) #f #f #f - (cons ch acc))) - (else - (lp (1+ i) (advance-column col ch) #t #f #f - (cons ch acc))))) - ((char=? ch #\;) - (lp (1+ i) (advance-column col ch) #f #f #t - (cons ch acc))) - ((char=? ch #\") - (lp (1+ i) (advance-column col ch) #t #f #f - (cons ch acc))) - ((char=? ch #\tab) + ((and (char=? ch #\tab) + (not (tab-in-excluded-segment? i segments))) (let ((spaces (- 8 (modulo col 8)))) - (lp (1+ i) (+ col spaces) #f #f #f + (lp (1+ i) (+ col spaces) (append (make-list spaces #\space) acc)))) (else - (lp (1+ i) (advance-column col ch) #f #f #f + (lp (1+ i) (advance-column col ch) (cons ch acc))))))))) ;;; trailing-whitespace — trailing spaces or tabs at end of line @@ -154,9 +165,9 @@ "Tab character found in source" 'warning 'format 'line (lambda (file line-num line-text config) - (let ((pos (find-tab-in-code line-text))) + (let ((pos (find-tab-in-code line-text config))) (if pos - (let ((fixed (string-trim-right (expand-tabs-in-code line-text)))) + (let ((fixed (string-trim-right (expand-tabs-in-code line-text config)))) (list (make-diagnostic file line-num pos 'warning 'no-tabs diff --git a/test/test-engine.scm b/test/test-engine.scm index a555f8c..455a475 100644 --- a/test/test-engine.scm +++ b/test/test-engine.scm @@ -140,6 +140,28 @@ diags)))) (delete-file tmp))) +(test-group "lint-file-token-aware-no-tabs" + (let ((tmp (tmpnam))) + (call-with-output-file tmp + (lambda (p) + (display "#| block\tcomment |#\n" p))) + (let ((diags (lint-file tmp '((%pass . surface))))) + (test-assert "tab in block comment ignored" + (not (any (lambda (d) (eq? (diagnostic-rule d) 'no-tabs)) + diags)))) + (delete-file tmp))) + +(test-group "lint-file-token-aware-comment-semantics" + (let ((tmp (tmpnam))) + (call-with-output-file tmp + (lambda (p) + (display "(display \"hello\n; not a comment\nworld\")\n" p))) + (let ((diags (lint-file tmp '((%pass . surface))))) + (test-assert "semicolon inside multiline string ignored" + (not (any (lambda (d) (eq? (diagnostic-rule d) 'comment-semicolons)) + diags)))) + (delete-file tmp))) + ;;; Fix mode (test-group "fix-trailing-whitespace"