Rollup merge of #155469 - Jules-Bertholet:titlecase-idents, r=petrochenkov

Account for titlecase in casing lints Puts https://github.com/rust-lang/rust/issues/153892 to work. Also contains fixes for Greek final sigma casing. There are probably still some edge cases left to fix. Ideally we would use https://www.unicode.org/reports/tr55/#Identifier-Chunks as a base. @rustbot label A-Unicode A-diagnostics A-lints A-suggestion-diagnostics
2026-04-26 13:01:27 +03:00 · 2026-04-23 14:42:45 +02:00
parent 827651f220 0dca30756d
commit 4f4c1d553e
11 changed files with 252 additions and 62 deletions
@@ -24,6 +24,7 @@
 #![feature(box_patterns)]
 #![feature(iter_order_by)]
 #![feature(rustc_attrs)]
+#![feature(titlecase)]
 #![feature(try_blocks)]
 // tidy-alphabetical-end

@@ -47,34 +47,46 @@

 declare_lint_pass!(NonCamelCaseTypes => [NON_CAMEL_CASE_TYPES]);

-/// Some unicode characters *have* case, are considered upper case or lower case, but they *can't*
-/// be upper cased or lower cased. For the purposes of the lint suggestion, we care about being able
+/// Some unicode characters *have* case, are considered upper, title, or lower case, but they *can't*
+/// be title cased or lower cased. For the purposes of the lint suggestion, we care about being able
 /// to change the char's case.
 fn char_has_case(c: char) -> bool {
-    !c.to_lowercase().eq(c.to_uppercase())
+    !c.to_lowercase().eq(c.to_titlecase())
 }

-// contains a capitalisable character followed by, or preceded by, an underscore
-fn has_underscore_case(s: &str) -> bool {
+/// FIXME: we should add a more efficient version
+/// in the stdlib for this
+fn changes_when_titlecased(c: char) -> bool {
+    !c.to_titlecase().eq([c])
+}
+
+// contains a capitalisable character followed by, or preceded by, an underscore,
+// or contains an uppercase character that changes when titlecased,
+// or contains `__`
+fn not_camel_case(s: &str) -> bool {
    let mut last = '\0';
-    s.chars().any(|c| match (std::mem::replace(&mut last, c), c) {
-        ('_', cs) | (cs, '_') => char_has_case(cs),
-        _ => false,
+    s.chars().any(|snd| {
+        let fst = std::mem::replace(&mut last, snd);
+        match (fst, snd) {
+            ('_', '_') => return true,
+            ('_', _) if char_has_case(snd) => return true,
+            (_, '_') if char_has_case(fst) => return true,
+            _ => snd.is_uppercase() && changes_when_titlecased(snd),
+        }
    })
 }

-fn is_camel_case(name: &str) -> bool {
+fn is_upper_camel_case(name: &str) -> bool {
    let name = name.trim_matches('_');
    let Some(first) = name.chars().next() else {
        return true;
    };

-    // start with a non-lowercase letter rather than uppercase
-    // ones (some scripts don't have a concept of upper/lowercase)
-    !(first.is_lowercase() || name.contains("__") || has_underscore_case(name))
+    // some scripts don't have a concept of upper/lowercase
+    !(changes_when_titlecased(first) || not_camel_case(name))
 }

-fn to_camel_case(s: &str) -> String {
+fn to_upper_camel_case(s: &str) -> String {
    s.trim_matches('_')
        .split('_')
        .filter(|component| !component.is_empty())
@@ -83,24 +95,31 @@ fn to_camel_case(s: &str) -> String {

            let mut new_word = true;
            let mut prev_is_lower_case = true;
+            let mut prev_is_lowercased_sigma = false;

            for c in component.chars() {
                // Preserve the case if an uppercase letter follows a lowercase letter, so that
                // `camelCase` is converted to `CamelCase`.
-                if prev_is_lower_case && c.is_uppercase() {
+                if prev_is_lower_case && (c.is_uppercase() | c.is_titlecase()) {
                    new_word = true;
                }

                if new_word {
-                    camel_cased_component.extend(c.to_uppercase());
+                    camel_cased_component.extend(c.to_titlecase());
                } else {
                    camel_cased_component.extend(c.to_lowercase());
                }

-                prev_is_lower_case = c.is_lowercase();
+                prev_is_lower_case = c.is_lowercase() || c.is_titlecase();
+                prev_is_lowercased_sigma = !new_word && c == 'Σ';
                new_word = false;
            }

+            if prev_is_lowercased_sigma {
+                camel_cased_component.pop();
+                camel_cased_component.push('ς');
+            }
+
            camel_cased_component
        })
        .fold((String::new(), None), |(acc, prev): (String, Option<String>), next| {
@@ -122,8 +141,8 @@ impl NonCamelCaseTypes {
    fn check_case(&self, cx: &EarlyContext<'_>, sort: &str, ident: &Ident) {
        let name = ident.name.as_str();

-        if !is_camel_case(name) {
-            let cc = to_camel_case(name);
+        if !is_upper_camel_case(name) {
+            let cc = to_upper_camel_case(name);
            let sub = if *name != cc {
                NonCamelCaseTypeSub::Suggestion { span: ident.span, replace: cc }
            } else {
@@ -235,14 +254,20 @@ fn to_snake_case(mut name: &str) -> String {
                continue;
            }
            for ch in s.chars() {
-                if !buf.is_empty() && buf != "'" && ch.is_uppercase() && !last_upper {
-                    words.push(buf);
+                if !buf.is_empty()
+                    && buf != "'"
+                    && (ch.is_uppercase() || ch.is_titlecase())
+                    && !last_upper
+                {
+                    // We lowercase only at the end, to handle final sigma correctly
+                    words.push(buf.to_lowercase());
                    buf = String::new();
                }
-                last_upper = ch.is_uppercase();
-                buf.extend(ch.to_lowercase());
+                last_upper = ch.is_uppercase() || ch.is_titlecase();
+                buf.push(ch);
            }
-            words.push(buf);
+            // We lowercase only at the end, to handle final sigma correctly
+            words.push(buf.to_lowercase());
        }
        words.join("_")
    }
@@ -262,7 +287,8 @@ fn is_snake_case(ident: &str) -> bool {

            // This correctly handles letters in languages with and without
            // cases, as well as numbers and underscores.
-            !ident.chars().any(char::is_uppercase)
+            // FIXME: we should add a standard library impl of `c.to_lowercase().eq([c])`
+            ident.chars().all(|c| c.to_lowercase().eq([c]))
        }

        let name = ident.name.as_str();
@@ -474,10 +500,12 @@ fn into_diag(self, dcx: DiagCtxtHandle<'a>, level: Level) -> Diag<'a, ()> {
 impl NonUpperCaseGlobals {
    fn check_upper_case(cx: &LateContext<'_>, sort: &str, did: Option<LocalDefId>, ident: &Ident) {
        let name = ident.name.as_str();
-        if name.chars().any(|c| c.is_lowercase()) {
+        // FIXME: we should add a more efficient version
+        // in the stdlib for `c.to_uppercase().eq([c])`
+        if !name.chars().all(|c| c.to_uppercase().eq([c])) {
            let uc = NonSnakeCase::to_snake_case(name).to_uppercase();

-            // If the item is exported, suggesting changing it's name would be breaking-change
+            // If the item is exported, suggesting changing its name would be a breaking change
            // and could break users without a "nice" applicable fix, so let's avoid it.
            let can_change_usages = if let Some(did) = did {
                !cx.tcx.effective_visibilities(()).is_exported(did)
@@ -1,21 +1,37 @@
-use super::{is_camel_case, to_camel_case};
+use super::{is_upper_camel_case, to_upper_camel_case};

 #[test]
 fn camel_case() {
-    assert!(!is_camel_case("userData"));
-    assert_eq!(to_camel_case("userData"), "UserData");
+    assert!(!is_upper_camel_case("userData"));
+    assert_eq!(to_upper_camel_case("userData"), "UserData");

-    assert!(is_camel_case("X86_64"));
+    assert!(is_upper_camel_case("X86_64"));

-    assert!(!is_camel_case("X86__64"));
-    assert_eq!(to_camel_case("X86__64"), "X86_64");
+    assert!(!is_upper_camel_case("X86__64"));
+    assert_eq!(to_upper_camel_case("X86__64"), "X86_64");

-    assert!(!is_camel_case("Abc_123"));
-    assert_eq!(to_camel_case("Abc_123"), "Abc123");
+    assert!(!is_upper_camel_case("Abc_123"));
+    assert_eq!(to_upper_camel_case("Abc_123"), "Abc123");

-    assert!(!is_camel_case("A1_b2_c3"));
-    assert_eq!(to_camel_case("A1_b2_c3"), "A1B2C3");
+    assert!(!is_upper_camel_case("A1_b2_c3"));
+    assert_eq!(to_upper_camel_case("A1_b2_c3"), "A1B2C3");

-    assert!(!is_camel_case("ONE_TWO_THREE"));
-    assert_eq!(to_camel_case("ONE_TWO_THREE"), "OneTwoThree");
+    assert!(!is_upper_camel_case("ONE_TWO_THREE"));
+    assert_eq!(to_upper_camel_case("ONE_TWO_THREE"), "OneTwoThree");
+
+    // FIXME(@Jules-Bertholet): This test doesn't work due to what I believe
+    // is a Unicode spec bug - uppercase Georgian letters have
+    // incorrect titlecase mappings.
+    // I've reported it to Unicode.
+    // Georgian mtavruli is only used in all-caps
+    //assert!(!is_upper_camel_case("ᲫალაᲔრთობაშია"));
+    //assert_eq!(to_upper_camel_case("ᲫალაᲔრთობაშია"), "ძალა_ერთობაშია");
+
+    assert!(!is_upper_camel_case("ǇǊaaaǄooo"));
+    assert_eq!(to_upper_camel_case("ǇǊaaǈǊaǄooo"), "ǈǌaaǈǋaǅooo");
+
+    // Final sigma
+    assert!(!is_upper_camel_case("ΦΙΛΟΣ_ΦΙΛΟΣ"));
+    assert_eq!(to_upper_camel_case("ΦΙΛΟΣ_ΦΙΛΟΣ"), "ΦιλοςΦιλος");
+    assert!(is_upper_camel_case("ΦιλοσΦιλοσ"));
 }
@@ -42,8 +42,18 @@
 struct 你_ӟ;
 //~^ ERROR type `你_ӟ` should have an upper camel case name

-// and this is ok:
+struct ΦΙΛΟΣ_Σ;
+//~^ ERROR type `ΦΙΛΟΣ_Σ` should have an upper camel case name
+
+struct Σ_ΦΙΛΟΣ;
+//~^ ERROR type `Σ_ΦΙΛΟΣ` should have an upper camel case name
+
+// these are ok:

 struct 你_好;

+struct ძალა_ერთობაშია;
+
+struct Σ;
+
 fn main() {}
@@ -46,5 +46,17 @@ error: type `你_ӟ` should have an upper camel case name
 LL | struct 你_ӟ;
   |        ^^^^ help: convert the identifier to upper camel case: `你Ӟ`

-error: aborting due to 7 previous errors
+error: type `ΦΙΛΟΣ_Σ` should have an upper camel case name
+  --> $DIR/lint-nonstandard-style-unicode-1.rs:45:8
+   |
+LL | struct ΦΙΛΟΣ_Σ;
+   |        ^^^^^^^ help: convert the identifier to upper camel case: `ΦιλοςΣ`
+
+error: type `Σ_ΦΙΛΟΣ` should have an upper camel case name
+  --> $DIR/lint-nonstandard-style-unicode-1.rs:48:8
+   |
+LL | struct Σ_ΦΙΛΟΣ;
+   |        ^^^^^^^ help: convert the identifier to upper camel case: `ΣΦιλος`
+
+error: aborting due to 9 previous errors

@@ -0,0 +1,31 @@
+#![allow(dead_code)]
+#![forbid(non_snake_case)]
+
+// 2. non_snake_case
+
+
+fn ǇǊaaǈǊaǄooo() {}
+//~^ ERROR function `ǇǊaaǈǊaǄooo` should have a snake case name
+//~| WARN identifier contains 5 non normalized (NFKC) characters
+
+fn ǈǌaaǈǋaǅooo() {}
+//~^ ERROR function `ǈǌaaǈǋaǅooo` should have a snake case name
+//~| WARN identifier contains 5 non normalized (NFKC) characters
+
+// test final sigma casing
+fn ΦΙΛΟΣ_ΦΙΛΟΣ() {}
+//~^ ERROR function `ΦΙΛΟΣ_ΦΙΛΟΣ` should have a snake case name
+
+fn Σ() {}
+//~^ ERROR function `Σ` should have a snake case name
+
+fn ΦΙΛΟΣ_Σ() {}
+//~^ ERROR function `ΦΙΛΟΣ_Σ` should have a snake case name
+
+fn Σ_ΦΙΛΟΣ() {}
+//~^ ERROR function `Σ_ΦΙΛΟΣ` should have a snake case name
+
+// this is ok
+fn φιλοσ_φιλοσ() {}
+
+fn main() {}
@@ -0,0 +1,61 @@
+warning: identifier contains 5 non normalized (NFKC) characters: 'Ǉ', 'Ǌ', 'ǈ', 'Ǌ', and 'Ǆ'
+  --> $DIR/lint-nonstandard-style-unicode-2.rs:7:4
+   |
+LL | fn ǇǊaaǈǊaǄooo() {}
+   |    ^^^^^^^^^^^
+   |
+   = note: these characters are included in the Not_NFKC Unicode general security profile
+   = note: `#[warn(uncommon_codepoints)]` on by default
+
+warning: identifier contains 5 non normalized (NFKC) characters: 'ǈ', 'ǌ', 'ǈ', 'ǋ', and 'ǅ'
+  --> $DIR/lint-nonstandard-style-unicode-2.rs:11:4
+   |
+LL | fn ǈǌaaǈǋaǅooo() {}
+   |    ^^^^^^^^^^^
+   |
+   = note: these characters are included in the Not_NFKC Unicode general security profile
+
+error: function `ǇǊaaǈǊaǄooo` should have a snake case name
+  --> $DIR/lint-nonstandard-style-unicode-2.rs:7:4
+   |
+LL | fn ǇǊaaǈǊaǄooo() {}
+   |    ^^^^^^^^^^^ help: convert the identifier to snake case: `ǉǌaa_ǉǌa_ǆooo`
+   |
+note: the lint level is defined here
+  --> $DIR/lint-nonstandard-style-unicode-2.rs:2:11
+   |
+LL | #![forbid(non_snake_case)]
+   |           ^^^^^^^^^^^^^^
+
+error: function `ǈǌaaǈǋaǅooo` should have a snake case name
+  --> $DIR/lint-nonstandard-style-unicode-2.rs:11:4
+   |
+LL | fn ǈǌaaǈǋaǅooo() {}
+   |    ^^^^^^^^^^^ help: convert the identifier to snake case: `ǉǌaa_ǉǌa_ǆooo`
+
+error: function `ΦΙΛΟΣ_ΦΙΛΟΣ` should have a snake case name
+  --> $DIR/lint-nonstandard-style-unicode-2.rs:16:4
+   |
+LL | fn ΦΙΛΟΣ_ΦΙΛΟΣ() {}
+   |    ^^^^^^^^^^^ help: convert the identifier to snake case: `φιλος_φιλος`
+
+error: function `Σ` should have a snake case name
+  --> $DIR/lint-nonstandard-style-unicode-2.rs:19:4
+   |
+LL | fn Σ() {}
+   |    ^ help: convert the identifier to snake case: `σ`
+
+error: function `ΦΙΛΟΣ_Σ` should have a snake case name
+  --> $DIR/lint-nonstandard-style-unicode-2.rs:22:4
+   |
+LL | fn ΦΙΛΟΣ_Σ() {}
+   |    ^^^^^^^ help: convert the identifier to snake case: `φιλος_σ`
+
+error: function `Σ_ΦΙΛΟΣ` should have a snake case name
+  --> $DIR/lint-nonstandard-style-unicode-2.rs:25:4
+   |
+LL | fn Σ_ΦΙΛΟΣ() {}
+   |    ^^^^^^^ help: convert the identifier to snake case: `σ_φιλος`
+
+error: aborting due to 6 previous errors; 2 warnings emitted
+
@@ -21,4 +21,11 @@

 static __密__封__线__内__禁__止__答__题__: bool = true;

+static ძალა_ერთობაშია: () = ();
+//~^ ERROR static variable `ძალა_ერთობაშია` should have an upper case name
+
+static ǋ: () = ();
+//~^ ERROR static variable `ǋ` should have an upper case name
+//~| WARN identifier contains a non normalized (NFKC) character
+
 fn main() {}
@@ -1,3 +1,12 @@
+warning: identifier contains a non normalized (NFKC) character: 'ǋ'
+  --> $DIR/lint-nonstandard-style-unicode-3.rs:27:8
+   |
+LL | static ǋ: () = ();
+   |        ^
+   |
+   = note: this character is included in the Not_NFKC Unicode general security profile
+   = note: `#[warn(uncommon_codepoints)]` on by default
+
 error: static variable `τεχ` should have an upper case name
  --> $DIR/lint-nonstandard-style-unicode-3.rs:17:8
   |
@@ -10,5 +19,17 @@ note: the lint level is defined here
 LL | #![forbid(non_upper_case_globals)]
   |           ^^^^^^^^^^^^^^^^^^^^^^

-error: aborting due to 1 previous error
+error: static variable `ძალა_ერთობაშია` should have an upper case name
+  --> $DIR/lint-nonstandard-style-unicode-3.rs:24:8
+   |
+LL | static ძალა_ერთობაშია: () = ();
+   |        ^^^^^^^^^^^^^^ help: convert the identifier to upper case: `ᲫᲐᲚᲐ_ᲔᲠᲗᲝᲑᲐᲨᲘᲐ`
+
+error: static variable `ǋ` should have an upper case name
+  --> $DIR/lint-nonstandard-style-unicode-3.rs:27:8
+   |
+LL | static ǋ: () = ();
+   |        ^ help: convert the identifier to upper case: `Ǌ`
+
+error: aborting due to 3 previous errors; 1 warning emitted

@@ -1,23 +1,23 @@
 // (#77273) These characters are in the general categories of
-// "Uppercase/Lowercase Letter".
-// The diagnostics don't provide meaningful suggestions for them
-// as we cannot convert them properly.
+// "Uppercase/Lowercase Letter",
+// but casing operations map them to themselves.
+// Therefore, we do not warn about casing
+// (but do warn about uncommon codepoints).

 //@ check-pass

-#![allow(uncommon_codepoints, unused)]
+#![allow(unused)]

 struct 𝕟𝕠𝕥𝕒𝕔𝕒𝕞𝕖𝕝;
-//~^ WARN: type `𝕟𝕠𝕥𝕒𝕔𝕒𝕞𝕖𝕝` should have an upper camel case name
+//~^ WARN identifier contains 9 non normalized (NFKC) characters

-// FIXME: How we should handle this?
 struct 𝕟𝕠𝕥_𝕒_𝕔𝕒𝕞𝕖𝕝;
-//~^ WARN: type `𝕟𝕠𝕥_𝕒_𝕔𝕒𝕞𝕖𝕝` should have an upper camel case name
+//~^ WARN identifier contains 9 non normalized (NFKC) characters

 static 𝗻𝗼𝗻𝘂𝗽𝗽𝗲𝗿𝗰𝗮𝘀𝗲: i32 = 1;
-//~^ WARN: static variable `𝗻𝗼𝗻𝘂𝗽𝗽𝗲𝗿𝗰𝗮𝘀𝗲` should have an upper case name
+//~^ WARN identifier contains 12 non normalized (NFKC) characters

 fn main() {
    let 𝓢𝓝𝓐𝓐𝓐𝓐𝓚𝓔𝓢 = 1;
-    //~^ WARN: variable `𝓢𝓝𝓐𝓐𝓐𝓐𝓚𝓔𝓢` should have a snake case name
+    //~^ WARN identifier contains 9 non normalized (NFKC) characters
 }
@@ -1,32 +1,35 @@
-warning: type `𝕟𝕠𝕥𝕒𝕔𝕒𝕞𝕖𝕝` should have an upper camel case name
-  --> $DIR/special-upper-lower-cases.rs:10:8
+warning: identifier contains 9 non normalized (NFKC) characters: '𝕟', '𝕠', '𝕥', '𝕒', '𝕔', '𝕒', '𝕞', '𝕖', and '𝕝'
+  --> $DIR/special-upper-lower-cases.rs:11:8
   |
 LL | struct 𝕟𝕠𝕥𝕒𝕔𝕒𝕞𝕖𝕝;
-   |        ^^^^^^^^^ should have an UpperCamelCase name
+   |        ^^^^^^^^^
   |
-   = note: `#[warn(non_camel_case_types)]` (part of `#[warn(nonstandard_style)]`) on by default
+   = note: these characters are included in the Not_NFKC Unicode general security profile
+   = note: `#[warn(uncommon_codepoints)]` on by default

-warning: type `𝕟𝕠𝕥_𝕒_𝕔𝕒𝕞𝕖𝕝` should have an upper camel case name
+warning: identifier contains 9 non normalized (NFKC) characters: '𝕟', '𝕠', '𝕥', '𝕒', '𝕔', '𝕒', '𝕞', '𝕖', and '𝕝'
  --> $DIR/special-upper-lower-cases.rs:14:8
   |
 LL | struct 𝕟𝕠𝕥_𝕒_𝕔𝕒𝕞𝕖𝕝;
-   |        ^^^^^^^^^^^ should have an UpperCamelCase name
+   |        ^^^^^^^^^^^
+   |
+   = note: these characters are included in the Not_NFKC Unicode general security profile

-warning: static variable `𝗻𝗼𝗻𝘂𝗽𝗽𝗲𝗿𝗰𝗮𝘀𝗲` should have an upper case name
+warning: identifier contains 12 non normalized (NFKC) characters: '𝗻', '𝗼', '𝗻', '𝘂', '𝗽', '𝗽', '𝗲', '𝗿', '𝗰', '𝗮', '𝘀', and '𝗲'
  --> $DIR/special-upper-lower-cases.rs:17:8
   |
 LL | static 𝗻𝗼𝗻𝘂𝗽𝗽𝗲𝗿𝗰𝗮𝘀𝗲: i32 = 1;
-   |        ^^^^^^^^^^^^ should have an UPPER_CASE name
+   |        ^^^^^^^^^^^^
   |
-   = note: `#[warn(non_upper_case_globals)]` (part of `#[warn(nonstandard_style)]`) on by default
+   = note: these characters are included in the Not_NFKC Unicode general security profile

-warning: variable `𝓢𝓝𝓐𝓐𝓐𝓐𝓚𝓔𝓢` should have a snake case name
+warning: identifier contains 9 non normalized (NFKC) characters: '𝓢', '𝓝', '𝓐', '𝓐', '𝓐', '𝓐', '𝓚', '𝓔', and '𝓢'
  --> $DIR/special-upper-lower-cases.rs:21:9
   |
 LL |     let 𝓢𝓝𝓐𝓐𝓐𝓐𝓚𝓔𝓢 = 1;
-   |         ^^^^^^^^^ should have a snake_case name
+   |         ^^^^^^^^^
   |
-   = note: `#[warn(non_snake_case)]` (part of `#[warn(nonstandard_style)]`) on by default
+   = note: these characters are included in the Not_NFKC Unicode general security profile

 warning: 4 warnings emitted