Rollup merge of #155469 - Jules-Bertholet:titlecase-idents, r=petrochenkov

Account for titlecase in casing lints

Puts https://github.com/rust-lang/rust/issues/153892 to work.

Also contains fixes for Greek final sigma casing.

There are probably still some edge cases left to fix. Ideally we would use https://www.unicode.org/reports/tr55/#Identifier-Chunks as a base.

@rustbot label A-Unicode A-diagnostics A-lints A-suggestion-diagnostics
This commit is contained in:
Guillaume Gomez
2026-04-23 14:42:45 +02:00
committed by GitHub
11 changed files with 252 additions and 62 deletions
+1
View File
@@ -24,6 +24,7 @@
#![feature(box_patterns)]
#![feature(iter_order_by)]
#![feature(rustc_attrs)]
#![feature(titlecase)]
#![feature(try_blocks)]
// tidy-alphabetical-end
+54 -26
View File
@@ -47,34 +47,46 @@
declare_lint_pass!(NonCamelCaseTypes => [NON_CAMEL_CASE_TYPES]);
/// Some unicode characters *have* case, are considered upper case or lower case, but they *can't*
/// be upper cased or lower cased. For the purposes of the lint suggestion, we care about being able
/// Some unicode characters *have* case, are considered upper, title, or lower case, but they *can't*
/// be title cased or lower cased. For the purposes of the lint suggestion, we care about being able
/// to change the char's case.
fn char_has_case(c: char) -> bool {
!c.to_lowercase().eq(c.to_uppercase())
!c.to_lowercase().eq(c.to_titlecase())
}
// contains a capitalisable character followed by, or preceded by, an underscore
fn has_underscore_case(s: &str) -> bool {
/// FIXME: we should add a more efficient version
/// in the stdlib for this
fn changes_when_titlecased(c: char) -> bool {
!c.to_titlecase().eq([c])
}
// contains a capitalisable character followed by, or preceded by, an underscore,
// or contains an uppercase character that changes when titlecased,
// or contains `__`
fn not_camel_case(s: &str) -> bool {
let mut last = '\0';
s.chars().any(|c| match (std::mem::replace(&mut last, c), c) {
('_', cs) | (cs, '_') => char_has_case(cs),
_ => false,
s.chars().any(|snd| {
let fst = std::mem::replace(&mut last, snd);
match (fst, snd) {
('_', '_') => return true,
('_', _) if char_has_case(snd) => return true,
(_, '_') if char_has_case(fst) => return true,
_ => snd.is_uppercase() && changes_when_titlecased(snd),
}
})
}
fn is_camel_case(name: &str) -> bool {
fn is_upper_camel_case(name: &str) -> bool {
let name = name.trim_matches('_');
let Some(first) = name.chars().next() else {
return true;
};
// start with a non-lowercase letter rather than uppercase
// ones (some scripts don't have a concept of upper/lowercase)
!(first.is_lowercase() || name.contains("__") || has_underscore_case(name))
// some scripts don't have a concept of upper/lowercase
!(changes_when_titlecased(first) || not_camel_case(name))
}
fn to_camel_case(s: &str) -> String {
fn to_upper_camel_case(s: &str) -> String {
s.trim_matches('_')
.split('_')
.filter(|component| !component.is_empty())
@@ -83,24 +95,31 @@ fn to_camel_case(s: &str) -> String {
let mut new_word = true;
let mut prev_is_lower_case = true;
let mut prev_is_lowercased_sigma = false;
for c in component.chars() {
// Preserve the case if an uppercase letter follows a lowercase letter, so that
// `camelCase` is converted to `CamelCase`.
if prev_is_lower_case && c.is_uppercase() {
if prev_is_lower_case && (c.is_uppercase() | c.is_titlecase()) {
new_word = true;
}
if new_word {
camel_cased_component.extend(c.to_uppercase());
camel_cased_component.extend(c.to_titlecase());
} else {
camel_cased_component.extend(c.to_lowercase());
}
prev_is_lower_case = c.is_lowercase();
prev_is_lower_case = c.is_lowercase() || c.is_titlecase();
prev_is_lowercased_sigma = !new_word && c == 'Σ';
new_word = false;
}
if prev_is_lowercased_sigma {
camel_cased_component.pop();
camel_cased_component.push('ς');
}
camel_cased_component
})
.fold((String::new(), None), |(acc, prev): (String, Option<String>), next| {
@@ -122,8 +141,8 @@ impl NonCamelCaseTypes {
fn check_case(&self, cx: &EarlyContext<'_>, sort: &str, ident: &Ident) {
let name = ident.name.as_str();
if !is_camel_case(name) {
let cc = to_camel_case(name);
if !is_upper_camel_case(name) {
let cc = to_upper_camel_case(name);
let sub = if *name != cc {
NonCamelCaseTypeSub::Suggestion { span: ident.span, replace: cc }
} else {
@@ -235,14 +254,20 @@ fn to_snake_case(mut name: &str) -> String {
continue;
}
for ch in s.chars() {
if !buf.is_empty() && buf != "'" && ch.is_uppercase() && !last_upper {
words.push(buf);
if !buf.is_empty()
&& buf != "'"
&& (ch.is_uppercase() || ch.is_titlecase())
&& !last_upper
{
// We lowercase only at the end, to handle final sigma correctly
words.push(buf.to_lowercase());
buf = String::new();
}
last_upper = ch.is_uppercase();
buf.extend(ch.to_lowercase());
last_upper = ch.is_uppercase() || ch.is_titlecase();
buf.push(ch);
}
words.push(buf);
// We lowercase only at the end, to handle final sigma correctly
words.push(buf.to_lowercase());
}
words.join("_")
}
@@ -262,7 +287,8 @@ fn is_snake_case(ident: &str) -> bool {
// This correctly handles letters in languages with and without
// cases, as well as numbers and underscores.
!ident.chars().any(char::is_uppercase)
// FIXME: we should add a standard library impl of `c.to_lowercase().eq([c])`
ident.chars().all(|c| c.to_lowercase().eq([c]))
}
let name = ident.name.as_str();
@@ -474,10 +500,12 @@ fn into_diag(self, dcx: DiagCtxtHandle<'a>, level: Level) -> Diag<'a, ()> {
impl NonUpperCaseGlobals {
fn check_upper_case(cx: &LateContext<'_>, sort: &str, did: Option<LocalDefId>, ident: &Ident) {
let name = ident.name.as_str();
if name.chars().any(|c| c.is_lowercase()) {
// FIXME: we should add a more efficient version
// in the stdlib for `c.to_uppercase().eq([c])`
if !name.chars().all(|c| c.to_uppercase().eq([c])) {
let uc = NonSnakeCase::to_snake_case(name).to_uppercase();
// If the item is exported, suggesting changing it's name would be breaking-change
// If the item is exported, suggesting changing its name would be a breaking change
// and could break users without a "nice" applicable fix, so let's avoid it.
let can_change_usages = if let Some(did) = did {
!cx.tcx.effective_visibilities(()).is_exported(did)
@@ -1,21 +1,37 @@
use super::{is_camel_case, to_camel_case};
use super::{is_upper_camel_case, to_upper_camel_case};
#[test]
fn camel_case() {
assert!(!is_camel_case("userData"));
assert_eq!(to_camel_case("userData"), "UserData");
assert!(!is_upper_camel_case("userData"));
assert_eq!(to_upper_camel_case("userData"), "UserData");
assert!(is_camel_case("X86_64"));
assert!(is_upper_camel_case("X86_64"));
assert!(!is_camel_case("X86__64"));
assert_eq!(to_camel_case("X86__64"), "X86_64");
assert!(!is_upper_camel_case("X86__64"));
assert_eq!(to_upper_camel_case("X86__64"), "X86_64");
assert!(!is_camel_case("Abc_123"));
assert_eq!(to_camel_case("Abc_123"), "Abc123");
assert!(!is_upper_camel_case("Abc_123"));
assert_eq!(to_upper_camel_case("Abc_123"), "Abc123");
assert!(!is_camel_case("A1_b2_c3"));
assert_eq!(to_camel_case("A1_b2_c3"), "A1B2C3");
assert!(!is_upper_camel_case("A1_b2_c3"));
assert_eq!(to_upper_camel_case("A1_b2_c3"), "A1B2C3");
assert!(!is_camel_case("ONE_TWO_THREE"));
assert_eq!(to_camel_case("ONE_TWO_THREE"), "OneTwoThree");
assert!(!is_upper_camel_case("ONE_TWO_THREE"));
assert_eq!(to_upper_camel_case("ONE_TWO_THREE"), "OneTwoThree");
// FIXME(@Jules-Bertholet): This test doesn't work due to what I believe
// is a Unicode spec bug - uppercase Georgian letters have
// incorrect titlecase mappings.
// I've reported it to Unicode.
// Georgian mtavruli is only used in all-caps
//assert!(!is_upper_camel_case("ᲫალაᲔრთობაშია"));
//assert_eq!(to_upper_camel_case("ᲫალაᲔრთობაშია"), "ძალა_ერთობაშია");
assert!(!is_upper_camel_case("LJNJaaaDŽooo"));
assert_eq!(to_upper_camel_case("LJNJaaLjNJaDŽooo"), "LjnjaaLjNjaDžooo");
// Final sigma
assert!(!is_upper_camel_case("ΦΙΛΟΣ_ΦΙΛΟΣ"));
assert_eq!(to_upper_camel_case("ΦΙΛΟΣ_ΦΙΛΟΣ"), "ΦιλοςΦιλος");
assert!(is_upper_camel_case("ΦιλοσΦιλοσ"));
}
@@ -42,8 +42,18 @@
struct ;
//~^ ERROR type `你_ӟ` should have an upper camel case name
// and this is ok:
struct ΦΙΛΟΣ;
//~^ ERROR type `ΦΙΛΟΣ_Σ` should have an upper camel case name
struct Σ_ΦΙΛΟΣ;
//~^ ERROR type `Σ_ΦΙΛΟΣ` should have an upper camel case name
// these are ok:
struct _好;
struct _ერთობაშია;
struct Σ;
fn main() {}
@@ -46,5 +46,17 @@ error: type `你_ӟ` should have an upper camel case name
LL | struct 你_ӟ;
| ^^^^ help: convert the identifier to upper camel case: `你Ӟ`
error: aborting due to 7 previous errors
error: type `ΦΙΛΟΣ_Σ` should have an upper camel case name
--> $DIR/lint-nonstandard-style-unicode-1.rs:45:8
|
LL | struct ΦΙΛΟΣ_Σ;
| ^^^^^^^ help: convert the identifier to upper camel case: `ΦιλοςΣ`
error: type `Σ_ΦΙΛΟΣ` should have an upper camel case name
--> $DIR/lint-nonstandard-style-unicode-1.rs:48:8
|
LL | struct Σ_ΦΙΛΟΣ;
| ^^^^^^^ help: convert the identifier to upper camel case: `ΣΦιλος`
error: aborting due to 9 previous errors
@@ -0,0 +1,31 @@
#![allow(dead_code)]
#![forbid(non_snake_case)]
// 2. non_snake_case
fn LJNJaaLjNJaDŽooo() {}
//~^ ERROR function `LJNJaaLjNJaDŽooo` should have a snake case name
//~| WARN identifier contains 5 non normalized (NFKC) characters
fn LjnjaaLjNjaDžooo() {}
//~^ ERROR function `LjnjaaLjNjaDžooo` should have a snake case name
//~| WARN identifier contains 5 non normalized (NFKC) characters
// test final sigma casing
fn ΦΙΛΟΣ_ΦΙΛΟΣ() {}
//~^ ERROR function `ΦΙΛΟΣ_ΦΙΛΟΣ` should have a snake case name
fn Σ() {}
//~^ ERROR function `Σ` should have a snake case name
fn ΦΙΛΟΣ() {}
//~^ ERROR function `ΦΙΛΟΣ_Σ` should have a snake case name
fn Σ_ΦΙΛΟΣ() {}
//~^ ERROR function `Σ_ΦΙΛΟΣ` should have a snake case name
// this is ok
fn φιλοσ_φιλοσ() {}
fn main() {}
@@ -0,0 +1,61 @@
warning: identifier contains 5 non normalized (NFKC) characters: 'LJ', 'NJ', 'Lj', 'NJ', and 'DŽ'
--> $DIR/lint-nonstandard-style-unicode-2.rs:7:4
|
LL | fn LJNJaaLjNJaDŽooo() {}
| ^^^^^^^^^^^
|
= note: these characters are included in the Not_NFKC Unicode general security profile
= note: `#[warn(uncommon_codepoints)]` on by default
warning: identifier contains 5 non normalized (NFKC) characters: 'Lj', 'nj', 'Lj', 'Nj', and 'Dž'
--> $DIR/lint-nonstandard-style-unicode-2.rs:11:4
|
LL | fn LjnjaaLjNjaDžooo() {}
| ^^^^^^^^^^^
|
= note: these characters are included in the Not_NFKC Unicode general security profile
error: function `LJNJaaLjNJaDŽooo` should have a snake case name
--> $DIR/lint-nonstandard-style-unicode-2.rs:7:4
|
LL | fn LJNJaaLjNJaDŽooo() {}
| ^^^^^^^^^^^ help: convert the identifier to snake case: `ljnjaa_ljnja_džooo`
|
note: the lint level is defined here
--> $DIR/lint-nonstandard-style-unicode-2.rs:2:11
|
LL | #![forbid(non_snake_case)]
| ^^^^^^^^^^^^^^
error: function `LjnjaaLjNjaDžooo` should have a snake case name
--> $DIR/lint-nonstandard-style-unicode-2.rs:11:4
|
LL | fn LjnjaaLjNjaDžooo() {}
| ^^^^^^^^^^^ help: convert the identifier to snake case: `ljnjaa_ljnja_džooo`
error: function `ΦΙΛΟΣ_ΦΙΛΟΣ` should have a snake case name
--> $DIR/lint-nonstandard-style-unicode-2.rs:16:4
|
LL | fn ΦΙΛΟΣ_ΦΙΛΟΣ() {}
| ^^^^^^^^^^^ help: convert the identifier to snake case: `φιλος_φιλος`
error: function `Σ` should have a snake case name
--> $DIR/lint-nonstandard-style-unicode-2.rs:19:4
|
LL | fn Σ() {}
| ^ help: convert the identifier to snake case: `σ`
error: function `ΦΙΛΟΣ_Σ` should have a snake case name
--> $DIR/lint-nonstandard-style-unicode-2.rs:22:4
|
LL | fn ΦΙΛΟΣ_Σ() {}
| ^^^^^^^ help: convert the identifier to snake case: `φιλος_σ`
error: function `Σ_ΦΙΛΟΣ` should have a snake case name
--> $DIR/lint-nonstandard-style-unicode-2.rs:25:4
|
LL | fn Σ_ΦΙΛΟΣ() {}
| ^^^^^^^ help: convert the identifier to snake case: `σ_φιλος`
error: aborting due to 6 previous errors; 2 warnings emitted
@@ -21,4 +21,11 @@
static __密__封__线__内__禁__止__答__题__: bool = true;
static _ერთობაშია: () = ();
//~^ ERROR static variable `ძალა_ერთობაშია` should have an upper case name
static Nj: () = ();
//~^ ERROR static variable `Nj` should have an upper case name
//~| WARN identifier contains a non normalized (NFKC) character
fn main() {}
@@ -1,3 +1,12 @@
warning: identifier contains a non normalized (NFKC) character: 'Nj'
--> $DIR/lint-nonstandard-style-unicode-3.rs:27:8
|
LL | static Nj: () = ();
| ^
|
= note: this character is included in the Not_NFKC Unicode general security profile
= note: `#[warn(uncommon_codepoints)]` on by default
error: static variable `τεχ` should have an upper case name
--> $DIR/lint-nonstandard-style-unicode-3.rs:17:8
|
@@ -10,5 +19,17 @@ note: the lint level is defined here
LL | #![forbid(non_upper_case_globals)]
| ^^^^^^^^^^^^^^^^^^^^^^
error: aborting due to 1 previous error
error: static variable `ძალა_ერთობაშია` should have an upper case name
--> $DIR/lint-nonstandard-style-unicode-3.rs:24:8
|
LL | static ძალა_ერთობაშია: () = ();
| ^^^^^^^^^^^^^^ help: convert the identifier to upper case: `ᲫᲐᲚᲐ_ᲔᲠᲗᲝᲑᲐᲨᲘᲐ`
error: static variable `Nj` should have an upper case name
--> $DIR/lint-nonstandard-style-unicode-3.rs:27:8
|
LL | static Nj: () = ();
| ^ help: convert the identifier to upper case: `NJ`
error: aborting due to 3 previous errors; 1 warning emitted
+9 -9
View File
@@ -1,23 +1,23 @@
// (#77273) These characters are in the general categories of
// "Uppercase/Lowercase Letter".
// The diagnostics don't provide meaningful suggestions for them
// as we cannot convert them properly.
// "Uppercase/Lowercase Letter",
// but casing operations map them to themselves.
// Therefore, we do not warn about casing
// (but do warn about uncommon codepoints).
//@ check-pass
#![allow(uncommon_codepoints, unused)]
#![allow(unused)]
struct 𝕟𝕠𝕥𝕒𝕔𝕒𝕞𝕖𝕝;
//~^ WARN: type `𝕟𝕠𝕥𝕒𝕔𝕒𝕞𝕖𝕝` should have an upper camel case name
//~^ WARN identifier contains 9 non normalized (NFKC) characters
// FIXME: How we should handle this?
struct 𝕟𝕠𝕥_𝕒_𝕔𝕒𝕞𝕖𝕝;
//~^ WARN: type `𝕟𝕠𝕥_𝕒_𝕔𝕒𝕞𝕖𝕝` should have an upper camel case name
//~^ WARN identifier contains 9 non normalized (NFKC) characters
static 𝗻𝗼𝗻𝘂𝗽𝗽𝗲𝗿𝗰𝗮𝘀𝗲: i32 = 1;
//~^ WARN: static variable `𝗻𝗼𝗻𝘂𝗽𝗽𝗲𝗿𝗰𝗮𝘀𝗲` should have an upper case name
//~^ WARN identifier contains 12 non normalized (NFKC) characters
fn main() {
let 𝓢𝓝𝓐𝓐𝓐𝓐𝓚𝓔𝓢 = 1;
//~^ WARN: variable `𝓢𝓝𝓐𝓐𝓐𝓐𝓚𝓔𝓢` should have a snake case name
//~^ WARN identifier contains 9 non normalized (NFKC) characters
}
+15 -12
View File
@@ -1,32 +1,35 @@
warning: type `𝕟𝕠𝕥𝕒𝕔𝕒𝕞𝕖𝕝` should have an upper camel case name
--> $DIR/special-upper-lower-cases.rs:10:8
warning: identifier contains 9 non normalized (NFKC) characters: '𝕟', '𝕠', '𝕥', '𝕒', '𝕔', '𝕒', '𝕞', '𝕖', and '𝕝'
--> $DIR/special-upper-lower-cases.rs:11:8
|
LL | struct 𝕟𝕠𝕥𝕒𝕔𝕒𝕞𝕖𝕝;
| ^^^^^^^^^ should have an UpperCamelCase name
| ^^^^^^^^^
|
= note: `#[warn(non_camel_case_types)]` (part of `#[warn(nonstandard_style)]`) on by default
= note: these characters are included in the Not_NFKC Unicode general security profile
= note: `#[warn(uncommon_codepoints)]` on by default
warning: type `𝕟𝕠𝕥_𝕒_𝕔𝕒𝕞𝕖𝕝` should have an upper camel case name
warning: identifier contains 9 non normalized (NFKC) characters: '𝕟', '𝕠', '𝕥', '𝕒', '𝕔', '𝕒', '𝕞', '𝕖', and '𝕝'
--> $DIR/special-upper-lower-cases.rs:14:8
|
LL | struct 𝕟𝕠𝕥_𝕒_𝕔𝕒𝕞𝕖𝕝;
| ^^^^^^^^^^^ should have an UpperCamelCase name
| ^^^^^^^^^^^
|
= note: these characters are included in the Not_NFKC Unicode general security profile
warning: static variable `𝗻𝗼𝗻𝘂𝗽𝗽𝗲𝗿𝗰𝗮𝘀𝗲` should have an upper case name
warning: identifier contains 12 non normalized (NFKC) characters: '𝗻', '𝗼', '𝗻', '𝘂', '𝗽', '𝗽', '𝗲', '𝗿', '𝗰', '𝗮', '𝘀', and '𝗲'
--> $DIR/special-upper-lower-cases.rs:17:8
|
LL | static 𝗻𝗼𝗻𝘂𝗽𝗽𝗲𝗿𝗰𝗮𝘀𝗲: i32 = 1;
| ^^^^^^^^^^^^ should have an UPPER_CASE name
| ^^^^^^^^^^^^
|
= note: `#[warn(non_upper_case_globals)]` (part of `#[warn(nonstandard_style)]`) on by default
= note: these characters are included in the Not_NFKC Unicode general security profile
warning: variable `𝓢𝓝𝓐𝓐𝓐𝓐𝓚𝓔𝓢` should have a snake case name
warning: identifier contains 9 non normalized (NFKC) characters: '𝓢', '𝓝', '𝓐', '𝓐', '𝓐', '𝓐', '𝓚', '𝓔', and '𝓢'
--> $DIR/special-upper-lower-cases.rs:21:9
|
LL | let 𝓢𝓝𝓐𝓐𝓐𝓐𝓚𝓔𝓢 = 1;
| ^^^^^^^^^ should have a snake_case name
| ^^^^^^^^^
|
= note: `#[warn(non_snake_case)]` (part of `#[warn(nonstandard_style)]`) on by default
= note: these characters are included in the Not_NFKC Unicode general security profile
warning: 4 warnings emitted