From 9a54466e2f25605f8254bdb12c51dcd69b091bd6 Mon Sep 17 00:00:00 2001 From: compilersutra Date: Tue, 16 Jun 2026 18:24:37 +0530 Subject: [PATCH 1/4] date: align Thai locale formatting with GNU --- src/uucore/src/lib/features/i18n/datetime.rs | 268 ++++++++++++++----- tests/by-util/test_date.rs | 50 ++-- 2 files changed, 231 insertions(+), 87 deletions(-) diff --git a/src/uucore/src/lib/features/i18n/datetime.rs b/src/uucore/src/lib/features/i18n/datetime.rs index 1e94f299abd..bebc6050e79 100644 --- a/src/uucore/src/lib/features/i18n/datetime.rs +++ b/src/uucore/src/lib/features/i18n/datetime.rs @@ -8,14 +8,19 @@ //! Locale-aware datetime formatting utilities using ICU and jiff-icu use icu_calendar::Date; -use icu_calendar::cal::{Buddhist, Ethiopian, Iso, Persian}; -use icu_datetime::DateTimeFormatter; -use icu_datetime::fieldsets; +use icu_calendar::cal::{Ethiopian, Iso, Persian}; use icu_locale::Locale; use jiff::civil::Date as JiffDate; use jiff_icu::ConvertFrom; use std::sync::OnceLock; +#[cfg(all(unix, not(target_os = "android"), not(target_os = "cygwin"), not(target_os = "redox")))] +use nix::libc; +#[cfg(any(not(unix), target_os = "android", target_os = "cygwin", target_os = "redox"))] +use icu_datetime::DateTimeFormatter; +#[cfg(any(not(unix), target_os = "android", target_os = "cygwin", target_os = "redox"))] +use icu_datetime::fieldsets; + use crate::i18n::get_locale_from_env; /// Get the locale for time/date formatting from LC_TIME environment variable @@ -75,74 +80,101 @@ pub fn localize_format_string(format: &str, date: JiffDate) -> String { let iso_date = Date::::convert_from(date); let mut fmt = format.replace("%%", PERCENT_PLACEHOLDER); + fmt = fmt.replace("%EY", "%Y"); - // For non-Gregorian calendars, replace date components with converted values + // GNU date keeps `%Y` and `%EY` Gregorian for th_TH.UTF-8, so normalize + // `%EY` to `%Y` before applying locale-specific substitutions. let calendar_type = get_locale_calendar_type(locale); - if calendar_type != CalendarType::Gregorian { - let (cal_year, cal_month, cal_day) = match calendar_type { - CalendarType::Buddhist => { - let d = iso_date.to_calendar(Buddhist); - ( - d.year().extended_year(), - d.month().ordinal, - d.day_of_month().0, - ) + match calendar_type { + CalendarType::Buddhist => {} + CalendarType::Persian => { + let d = iso_date.to_calendar(Persian); + let cal_year = d.year().extended_year(); + let cal_month = d.month().ordinal; + let cal_day = d.day_of_month().0; + fmt = fmt + .replace("%Y", &cal_year.to_string()) + .replace("%m", &format!("{cal_month:02}")) + .replace("%d", &format!("{cal_day:02}")) + .replace("%e", &format!("{cal_day:2}")); + } + CalendarType::Ethiopian => { + let d = iso_date.to_calendar(Ethiopian::new()); + let cal_year = d.year().extended_year(); + let cal_month = d.month().ordinal; + let cal_day = d.day_of_month().0; + fmt = fmt + .replace("%Y", &cal_year.to_string()) + .replace("%m", &format!("{cal_month:02}")) + .replace("%d", &format!("{cal_day:02}")) + .replace("%e", &format!("{cal_day:2}")); + } + CalendarType::Gregorian => {} + } + + // Format localized names. + #[cfg(all(unix, not(target_os = "android"), not(target_os = "cygwin"), not(target_os = "redox")))] + { + let month_idx = (iso_date.month().ordinal - 1) as usize; + let weekday_idx = iso_date.weekday() as usize % 7; + + if fmt.contains("%B") { + if let Some(months) = get_locale_month_names_long() { + fmt = fmt.replace("%B", &months[month_idx]); } - CalendarType::Persian => { - let d = iso_date.to_calendar(Persian); - ( - d.year().extended_year(), - d.month().ordinal, - d.day_of_month().0, - ) + } + if fmt.contains("%b") || fmt.contains("%h") { + if let Some(months) = get_locale_month_names_abbrev() { + let month_abbrev = &months[month_idx]; + fmt = fmt + .replace("%b", month_abbrev) + .replace("%h", month_abbrev); } - CalendarType::Ethiopian => { - let d = iso_date.to_calendar(Ethiopian::new()); - ( - d.year().extended_year(), - d.month().ordinal, - d.day_of_month().0, - ) + } + if fmt.contains("%A") { + if let Some(days) = get_locale_weekday_names_long() { + fmt = fmt.replace("%A", &days[weekday_idx]); } - CalendarType::Gregorian => unreachable!(), - }; - fmt = fmt - .replace("%Y", &cal_year.to_string()) - .replace("%m", &format!("{cal_month:02}")) - .replace("%d", &format!("{cal_day:02}")) - .replace("%e", &format!("{cal_day:2}")); + } + if fmt.contains("%a") { + if let Some(days) = get_locale_weekday_names_abbrev() { + fmt = fmt.replace("%a", &days[weekday_idx]); + } + } } - // Format localized names using ICU DateTimeFormatter - let locale_prefs = locale.clone().into(); + #[cfg(any(not(unix), target_os = "android", target_os = "cygwin", target_os = "redox"))] + { + let locale_prefs = locale.clone().into(); - if fmt.contains("%B") { - if let Ok(f) = DateTimeFormatter::try_new(locale_prefs, fieldsets::M::long()) { - fmt = fmt.replace("%B", &f.format(&iso_date).to_string()); + if fmt.contains("%B") { + if let Ok(f) = DateTimeFormatter::try_new(locale_prefs, fieldsets::M::long()) { + fmt = fmt.replace("%B", &f.format(&iso_date).to_string()); + } } - } - if fmt.contains("%b") || fmt.contains("%h") { - if let Ok(f) = DateTimeFormatter::try_new(locale_prefs, fieldsets::M::medium()) { - // ICU's medium format may include trailing periods (e.g., "febr." for Hungarian), - // which when combined with locale format strings that also add periods after - // %b (e.g., "%Y. %b. %d") results in double periods ("febr.."). - // The standard C/POSIX locale via nl_langinfo returns abbreviations - // WITHOUT trailing periods, so we strip them here for consistency. - let month_abbrev = f.format(&iso_date).to_string(); - let month_abbrev = month_abbrev.trim_end_matches('.').to_string(); - fmt = fmt - .replace("%b", &month_abbrev) - .replace("%h", &month_abbrev); + if fmt.contains("%b") || fmt.contains("%h") { + if let Ok(f) = DateTimeFormatter::try_new(locale_prefs, fieldsets::M::medium()) { + // ICU's medium format may include trailing periods (e.g., "febr." for Hungarian), + // which when combined with locale format strings that also add periods after + // %b (e.g., "%Y. %b. %d") results in double periods ("febr.."). + // The standard C/POSIX locale via nl_langinfo returns abbreviations + // WITHOUT trailing periods, so we strip them here for consistency. + let month_abbrev = f.format(&iso_date).to_string(); + let month_abbrev = month_abbrev.trim_end_matches('.').to_string(); + fmt = fmt + .replace("%b", &month_abbrev) + .replace("%h", &month_abbrev); + } } - } - if fmt.contains("%A") { - if let Ok(f) = DateTimeFormatter::try_new(locale_prefs, fieldsets::E::long()) { - fmt = fmt.replace("%A", &f.format(&iso_date).to_string()); + if fmt.contains("%A") { + if let Ok(f) = DateTimeFormatter::try_new(locale_prefs, fieldsets::E::long()) { + fmt = fmt.replace("%A", &f.format(&iso_date).to_string()); + } } - } - if fmt.contains("%a") { - if let Ok(f) = DateTimeFormatter::try_new(locale_prefs, fieldsets::E::short()) { - fmt = fmt.replace("%a", &f.format(&iso_date).to_string()); + if fmt.contains("%a") { + if let Ok(f) = DateTimeFormatter::try_new(locale_prefs, fieldsets::E::short()) { + fmt = fmt.replace("%a", &f.format(&iso_date).to_string()); + } } } @@ -168,6 +200,121 @@ pub fn get_locale_months() -> Option<&'static [Vec; 12]> { .as_ref() } +#[cfg(all(unix, not(target_os = "android"), not(target_os = "cygwin"), not(target_os = "redox")))] +fn load_locale_name_array(items: [libc::nl_item; N]) -> Option<[String; N]> { + use std::ffi::CStr; + + // SAFETY: setlocale and nl_langinfo are standard POSIX functions. + // We call setlocale(LC_TIME, "") to initialize from environment variables, + // then read the locale strings. This is called once per cache (via OnceLock) + // and cached, so the race window with other setlocale callers is minimal. + unsafe { + libc::setlocale(libc::LC_TIME, c"".as_ptr()); + } + + let mut names: [String; N] = std::array::from_fn(|_| String::new()); + for (i, &item) in items.iter().enumerate() { + // SAFETY: nl_langinfo returns a valid C string pointer for valid nl_item values. + let ptr = unsafe { libc::nl_langinfo(item) }; + if ptr.is_null() { + return None; + } + let name = unsafe { CStr::from_ptr(ptr) }.to_string_lossy().into_owned(); + if name.is_empty() { + return None; + } + names[i] = name; + } + + Some(names) +} + +#[cfg(all(unix, not(target_os = "android"), not(target_os = "cygwin"), not(target_os = "redox")))] +fn get_locale_month_names_long() -> Option<&'static [String; 12]> { + static LOCALE_MONTHS_LONG: OnceLock> = OnceLock::new(); + + LOCALE_MONTHS_LONG + .get_or_init(|| { + load_locale_name_array([ + libc::MON_1, + libc::MON_2, + libc::MON_3, + libc::MON_4, + libc::MON_5, + libc::MON_6, + libc::MON_7, + libc::MON_8, + libc::MON_9, + libc::MON_10, + libc::MON_11, + libc::MON_12, + ]) + }) + .as_ref() +} + +#[cfg(all(unix, not(target_os = "android"), not(target_os = "cygwin"), not(target_os = "redox")))] +fn get_locale_month_names_abbrev() -> Option<&'static [String; 12]> { + static LOCALE_MONTHS_ABBREV: OnceLock> = OnceLock::new(); + + LOCALE_MONTHS_ABBREV + .get_or_init(|| { + load_locale_name_array([ + libc::ABMON_1, + libc::ABMON_2, + libc::ABMON_3, + libc::ABMON_4, + libc::ABMON_5, + libc::ABMON_6, + libc::ABMON_7, + libc::ABMON_8, + libc::ABMON_9, + libc::ABMON_10, + libc::ABMON_11, + libc::ABMON_12, + ]) + }) + .as_ref() +} + +#[cfg(all(unix, not(target_os = "android"), not(target_os = "cygwin"), not(target_os = "redox")))] +fn get_locale_weekday_names_long() -> Option<&'static [String; 7]> { + static LOCALE_WEEKDAYS_LONG: OnceLock> = OnceLock::new(); + + LOCALE_WEEKDAYS_LONG + .get_or_init(|| { + load_locale_name_array([ + libc::DAY_1, + libc::DAY_2, + libc::DAY_3, + libc::DAY_4, + libc::DAY_5, + libc::DAY_6, + libc::DAY_7, + ]) + }) + .as_ref() +} + +#[cfg(all(unix, not(target_os = "android"), not(target_os = "cygwin"), not(target_os = "redox")))] +fn get_locale_weekday_names_abbrev() -> Option<&'static [String; 7]> { + static LOCALE_WEEKDAYS_ABBREV: OnceLock> = OnceLock::new(); + + LOCALE_WEEKDAYS_ABBREV + .get_or_init(|| { + load_locale_name_array([ + libc::ABDAY_1, + libc::ABDAY_2, + libc::ABDAY_3, + libc::ABDAY_4, + libc::ABDAY_5, + libc::ABDAY_6, + libc::ABDAY_7, + ]) + }) + .as_ref() +} + /// Unix implementation using nl_langinfo for exact match with `locale abmon` output. #[cfg(all( unix, @@ -176,7 +323,6 @@ pub fn get_locale_months() -> Option<&'static [Vec; 12]> { not(target_os = "redox") ))] fn get_locale_months_inner() -> Option<[Vec; 12]> { - use nix::libc; use std::ffi::CStr; let abmon_items: [libc::nl_item; 12] = [ diff --git a/tests/by-util/test_date.rs b/tests/by-util/test_date.rs index 763e4a844fd..ee3e2fa7e4d 100644 --- a/tests/by-util/test_date.rs +++ b/tests/by-util/test_date.rs @@ -2189,7 +2189,7 @@ fn test_date_thai_locale_solar_calendar() { .parse() .unwrap(); - // Since 1941, the year in the Thai solar calendar is the Gregorian year plus 543 + // GNU date keeps %Y Gregorian in the Thai locale. let thai_year: i32 = new_ucmd!() .env("LC_ALL", "th_TH.UTF-8") .arg("+%Y") @@ -2199,25 +2199,22 @@ fn test_date_thai_locale_solar_calendar() { .parse() .unwrap(); - assert_eq!(thai_year, current_year + 543); + assert_eq!(thai_year, current_year); - // All months that have 31 days have names that end with "คม" (Thai characters) - let days_31_suffix = "\u{0E04}\u{0E21}"; // "คม" in Unicode + // GNU date keeps %EY Gregorian in the Thai locale. + let thai_ey_year: i32 = new_ucmd!() + .env("LC_ALL", "th_TH.UTF-8") + .arg("+%EY") + .succeeds() + .stdout_str() + .trim() + .parse() + .unwrap(); - for month in ["01", "03", "05", "07", "08", "10", "12"] { - let month_result = new_ucmd!() - .env("LC_ALL", "th_TH.UTF-8") - .arg("--date") - .arg(format!("{current_year}-{month}-01")) - .arg("+%B") - .succeeds(); - let month_name = month_result.stdout_str(); + assert_eq!(thai_ey_year, current_year); - assert!( - month_name.trim().ends_with(days_31_suffix), - "Month {month} should end with 'คม', got: {month_name}" - ); - } + // GNU date keeps the locale month/day names here as well. + check_date("th_TH.UTF-8", "2026-06-14", "+%Y %EY %B %A", "2026 2026 June Sunday"); // Check that --iso-8601 and --rfc-3339 use the Gregorian calendar let iso_result = new_ucmd!() @@ -2270,18 +2267,19 @@ fn test_locale_calendar_conversions() { check_date("fa_IR.UTF-8", d, "+%Y-%m-%d", e); } - // Thai Buddhist (year + 543, same month/day) + // Thai locale keeps Gregorian %Y and %EY under GNU-compatible behavior. for (d, e) in [ - ("2026-01-01", "2569-01-01"), - ("2026-01-26", "2569-01-26"), - ("2026-06-15", "2569-06-15"), - ("2026-12-31", "2569-12-31"), - ("2025-01-01", "2568-01-01"), - ("2024-02-29", "2567-02-29"), - ("2000-01-01", "2543-01-01"), - ("1970-01-01", "2513-01-01"), + ("2026-01-01", "2026-01-01"), + ("2026-01-26", "2026-01-26"), + ("2026-06-15", "2026-06-15"), + ("2026-12-31", "2026-12-31"), + ("2025-01-01", "2025-01-01"), + ("2024-02-29", "2024-02-29"), + ("2000-01-01", "2000-01-01"), + ("1970-01-01", "1970-01-01"), ] { check_date("th_TH.UTF-8", d, "+%Y-%m-%d", e); + check_date("th_TH.UTF-8", d, "+%EY-%m-%d", e); } // Ethiopian (13 months, New Year on Sept 11) From 84606f60848c6fb43804ac3e729c1c35d09fb759 Mon Sep 17 00:00:00 2001 From: compilersutra Date: Tue, 16 Jun 2026 18:34:17 +0530 Subject: [PATCH 2/4] style: satisfy formatting and spelling checks --- src/uucore/src/lib/features/i18n/datetime.rs | 82 ++++++++++++++++---- tests/by-util/test_date.rs | 7 +- 2 files changed, 72 insertions(+), 17 deletions(-) diff --git a/src/uucore/src/lib/features/i18n/datetime.rs b/src/uucore/src/lib/features/i18n/datetime.rs index bebc6050e79..d5831f293a9 100644 --- a/src/uucore/src/lib/features/i18n/datetime.rs +++ b/src/uucore/src/lib/features/i18n/datetime.rs @@ -3,7 +3,7 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore fieldsets prefs febr abmon langinfo uppercased +// spell-checker:ignore fieldsets prefs febr abmon abday langinfo uppercased //! Locale-aware datetime formatting utilities using ICU and jiff-icu @@ -14,12 +14,27 @@ use jiff::civil::Date as JiffDate; use jiff_icu::ConvertFrom; use std::sync::OnceLock; -#[cfg(all(unix, not(target_os = "android"), not(target_os = "cygwin"), not(target_os = "redox")))] -use nix::libc; -#[cfg(any(not(unix), target_os = "android", target_os = "cygwin", target_os = "redox"))] +#[cfg(any( + not(unix), + target_os = "android", + target_os = "cygwin", + target_os = "redox" +))] use icu_datetime::DateTimeFormatter; -#[cfg(any(not(unix), target_os = "android", target_os = "cygwin", target_os = "redox"))] +#[cfg(any( + not(unix), + target_os = "android", + target_os = "cygwin", + target_os = "redox" +))] use icu_datetime::fieldsets; +#[cfg(all( + unix, + not(target_os = "android"), + not(target_os = "cygwin"), + not(target_os = "redox") +))] +use nix::libc; use crate::i18n::get_locale_from_env; @@ -113,7 +128,12 @@ pub fn localize_format_string(format: &str, date: JiffDate) -> String { } // Format localized names. - #[cfg(all(unix, not(target_os = "android"), not(target_os = "cygwin"), not(target_os = "redox")))] + #[cfg(all( + unix, + not(target_os = "android"), + not(target_os = "cygwin"), + not(target_os = "redox") + ))] { let month_idx = (iso_date.month().ordinal - 1) as usize; let weekday_idx = iso_date.weekday() as usize % 7; @@ -126,9 +146,7 @@ pub fn localize_format_string(format: &str, date: JiffDate) -> String { if fmt.contains("%b") || fmt.contains("%h") { if let Some(months) = get_locale_month_names_abbrev() { let month_abbrev = &months[month_idx]; - fmt = fmt - .replace("%b", month_abbrev) - .replace("%h", month_abbrev); + fmt = fmt.replace("%b", month_abbrev).replace("%h", month_abbrev); } } if fmt.contains("%A") { @@ -143,7 +161,12 @@ pub fn localize_format_string(format: &str, date: JiffDate) -> String { } } - #[cfg(any(not(unix), target_os = "android", target_os = "cygwin", target_os = "redox"))] + #[cfg(any( + not(unix), + target_os = "android", + target_os = "cygwin", + target_os = "redox" + ))] { let locale_prefs = locale.clone().into(); @@ -200,7 +223,12 @@ pub fn get_locale_months() -> Option<&'static [Vec; 12]> { .as_ref() } -#[cfg(all(unix, not(target_os = "android"), not(target_os = "cygwin"), not(target_os = "redox")))] +#[cfg(all( + unix, + not(target_os = "android"), + not(target_os = "cygwin"), + not(target_os = "redox") +))] fn load_locale_name_array(items: [libc::nl_item; N]) -> Option<[String; N]> { use std::ffi::CStr; @@ -219,7 +247,9 @@ fn load_locale_name_array(items: [libc::nl_item; N]) -> Option<[ if ptr.is_null() { return None; } - let name = unsafe { CStr::from_ptr(ptr) }.to_string_lossy().into_owned(); + let name = unsafe { CStr::from_ptr(ptr) } + .to_string_lossy() + .into_owned(); if name.is_empty() { return None; } @@ -229,7 +259,12 @@ fn load_locale_name_array(items: [libc::nl_item; N]) -> Option<[ Some(names) } -#[cfg(all(unix, not(target_os = "android"), not(target_os = "cygwin"), not(target_os = "redox")))] +#[cfg(all( + unix, + not(target_os = "android"), + not(target_os = "cygwin"), + not(target_os = "redox") +))] fn get_locale_month_names_long() -> Option<&'static [String; 12]> { static LOCALE_MONTHS_LONG: OnceLock> = OnceLock::new(); @@ -253,7 +288,12 @@ fn get_locale_month_names_long() -> Option<&'static [String; 12]> { .as_ref() } -#[cfg(all(unix, not(target_os = "android"), not(target_os = "cygwin"), not(target_os = "redox")))] +#[cfg(all( + unix, + not(target_os = "android"), + not(target_os = "cygwin"), + not(target_os = "redox") +))] fn get_locale_month_names_abbrev() -> Option<&'static [String; 12]> { static LOCALE_MONTHS_ABBREV: OnceLock> = OnceLock::new(); @@ -277,7 +317,12 @@ fn get_locale_month_names_abbrev() -> Option<&'static [String; 12]> { .as_ref() } -#[cfg(all(unix, not(target_os = "android"), not(target_os = "cygwin"), not(target_os = "redox")))] +#[cfg(all( + unix, + not(target_os = "android"), + not(target_os = "cygwin"), + not(target_os = "redox") +))] fn get_locale_weekday_names_long() -> Option<&'static [String; 7]> { static LOCALE_WEEKDAYS_LONG: OnceLock> = OnceLock::new(); @@ -296,7 +341,12 @@ fn get_locale_weekday_names_long() -> Option<&'static [String; 7]> { .as_ref() } -#[cfg(all(unix, not(target_os = "android"), not(target_os = "cygwin"), not(target_os = "redox")))] +#[cfg(all( + unix, + not(target_os = "android"), + not(target_os = "cygwin"), + not(target_os = "redox") +))] fn get_locale_weekday_names_abbrev() -> Option<&'static [String; 7]> { static LOCALE_WEEKDAYS_ABBREV: OnceLock> = OnceLock::new(); diff --git a/tests/by-util/test_date.rs b/tests/by-util/test_date.rs index ee3e2fa7e4d..179adb70606 100644 --- a/tests/by-util/test_date.rs +++ b/tests/by-util/test_date.rs @@ -2214,7 +2214,12 @@ fn test_date_thai_locale_solar_calendar() { assert_eq!(thai_ey_year, current_year); // GNU date keeps the locale month/day names here as well. - check_date("th_TH.UTF-8", "2026-06-14", "+%Y %EY %B %A", "2026 2026 June Sunday"); + check_date( + "th_TH.UTF-8", + "2026-06-14", + "+%Y %EY %B %A", + "2026 2026 June Sunday", + ); // Check that --iso-8601 and --rfc-3339 use the Gregorian calendar let iso_result = new_ucmd!() From 98ccb414e18bb4c97dc15176aca4fa70e988d194 Mon Sep 17 00:00:00 2001 From: compilersutra Date: Tue, 16 Jun 2026 19:39:07 +0530 Subject: [PATCH 3/4] fix(date): keep Thai locale month names in GNU style --- src/uucore/src/lib/features/i18n/datetime.rs | 252 +++---------------- 1 file changed, 31 insertions(+), 221 deletions(-) diff --git a/src/uucore/src/lib/features/i18n/datetime.rs b/src/uucore/src/lib/features/i18n/datetime.rs index d5831f293a9..a918292e571 100644 --- a/src/uucore/src/lib/features/i18n/datetime.rs +++ b/src/uucore/src/lib/features/i18n/datetime.rs @@ -3,7 +3,7 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore fieldsets prefs febr abmon abday langinfo uppercased +// spell-checker:ignore fieldsets prefs febr abmon langinfo uppercased //! Locale-aware datetime formatting utilities using ICU and jiff-icu @@ -14,19 +14,7 @@ use jiff::civil::Date as JiffDate; use jiff_icu::ConvertFrom; use std::sync::OnceLock; -#[cfg(any( - not(unix), - target_os = "android", - target_os = "cygwin", - target_os = "redox" -))] use icu_datetime::DateTimeFormatter; -#[cfg(any( - not(unix), - target_os = "android", - target_os = "cygwin", - target_os = "redox" -))] use icu_datetime::fieldsets; #[cfg(all( unix, @@ -128,76 +116,40 @@ pub fn localize_format_string(format: &str, date: JiffDate) -> String { } // Format localized names. - #[cfg(all( - unix, - not(target_os = "android"), - not(target_os = "cygwin"), - not(target_os = "redox") - ))] - { - let month_idx = (iso_date.month().ordinal - 1) as usize; - let weekday_idx = iso_date.weekday() as usize % 7; - - if fmt.contains("%B") { - if let Some(months) = get_locale_month_names_long() { - fmt = fmt.replace("%B", &months[month_idx]); - } - } - if fmt.contains("%b") || fmt.contains("%h") { - if let Some(months) = get_locale_month_names_abbrev() { - let month_abbrev = &months[month_idx]; - fmt = fmt.replace("%b", month_abbrev).replace("%h", month_abbrev); - } - } - if fmt.contains("%A") { - if let Some(days) = get_locale_weekday_names_long() { - fmt = fmt.replace("%A", &days[weekday_idx]); - } - } - if fmt.contains("%a") { - if let Some(days) = get_locale_weekday_names_abbrev() { - fmt = fmt.replace("%a", &days[weekday_idx]); - } + let name_locale = if locale.to_string().starts_with("th") { + icu_locale::locale!("en-US") + } else { + locale.clone() + }; + let locale_prefs = name_locale.into(); + + if fmt.contains("%B") { + if let Ok(f) = DateTimeFormatter::try_new(locale_prefs, fieldsets::M::long()) { + fmt = fmt.replace("%B", &f.format(&iso_date).to_string()); } } - - #[cfg(any( - not(unix), - target_os = "android", - target_os = "cygwin", - target_os = "redox" - ))] - { - let locale_prefs = locale.clone().into(); - - if fmt.contains("%B") { - if let Ok(f) = DateTimeFormatter::try_new(locale_prefs, fieldsets::M::long()) { - fmt = fmt.replace("%B", &f.format(&iso_date).to_string()); - } - } - if fmt.contains("%b") || fmt.contains("%h") { - if let Ok(f) = DateTimeFormatter::try_new(locale_prefs, fieldsets::M::medium()) { - // ICU's medium format may include trailing periods (e.g., "febr." for Hungarian), - // which when combined with locale format strings that also add periods after - // %b (e.g., "%Y. %b. %d") results in double periods ("febr.."). - // The standard C/POSIX locale via nl_langinfo returns abbreviations - // WITHOUT trailing periods, so we strip them here for consistency. - let month_abbrev = f.format(&iso_date).to_string(); - let month_abbrev = month_abbrev.trim_end_matches('.').to_string(); - fmt = fmt - .replace("%b", &month_abbrev) - .replace("%h", &month_abbrev); - } + if fmt.contains("%b") || fmt.contains("%h") { + if let Ok(f) = DateTimeFormatter::try_new(locale_prefs, fieldsets::M::medium()) { + // ICU's medium format may include trailing periods (e.g., "febr." for Hungarian), + // which when combined with locale format strings that also add periods after + // %b (e.g., "%Y. %b. %d") results in double periods ("febr.."). + // The standard C/POSIX locale via nl_langinfo returns abbreviations + // WITHOUT trailing periods, so we strip them here for consistency. + let month_abbrev = f.format(&iso_date).to_string(); + let month_abbrev = month_abbrev.trim_end_matches('.').to_string(); + fmt = fmt + .replace("%b", &month_abbrev) + .replace("%h", &month_abbrev); } - if fmt.contains("%A") { - if let Ok(f) = DateTimeFormatter::try_new(locale_prefs, fieldsets::E::long()) { - fmt = fmt.replace("%A", &f.format(&iso_date).to_string()); - } + } + if fmt.contains("%A") { + if let Ok(f) = DateTimeFormatter::try_new(locale_prefs, fieldsets::E::long()) { + fmt = fmt.replace("%A", &f.format(&iso_date).to_string()); } - if fmt.contains("%a") { - if let Ok(f) = DateTimeFormatter::try_new(locale_prefs, fieldsets::E::short()) { - fmt = fmt.replace("%a", &f.format(&iso_date).to_string()); - } + } + if fmt.contains("%a") { + if let Ok(f) = DateTimeFormatter::try_new(locale_prefs, fieldsets::E::short()) { + fmt = fmt.replace("%a", &f.format(&iso_date).to_string()); } } @@ -223,148 +175,6 @@ pub fn get_locale_months() -> Option<&'static [Vec; 12]> { .as_ref() } -#[cfg(all( - unix, - not(target_os = "android"), - not(target_os = "cygwin"), - not(target_os = "redox") -))] -fn load_locale_name_array(items: [libc::nl_item; N]) -> Option<[String; N]> { - use std::ffi::CStr; - - // SAFETY: setlocale and nl_langinfo are standard POSIX functions. - // We call setlocale(LC_TIME, "") to initialize from environment variables, - // then read the locale strings. This is called once per cache (via OnceLock) - // and cached, so the race window with other setlocale callers is minimal. - unsafe { - libc::setlocale(libc::LC_TIME, c"".as_ptr()); - } - - let mut names: [String; N] = std::array::from_fn(|_| String::new()); - for (i, &item) in items.iter().enumerate() { - // SAFETY: nl_langinfo returns a valid C string pointer for valid nl_item values. - let ptr = unsafe { libc::nl_langinfo(item) }; - if ptr.is_null() { - return None; - } - let name = unsafe { CStr::from_ptr(ptr) } - .to_string_lossy() - .into_owned(); - if name.is_empty() { - return None; - } - names[i] = name; - } - - Some(names) -} - -#[cfg(all( - unix, - not(target_os = "android"), - not(target_os = "cygwin"), - not(target_os = "redox") -))] -fn get_locale_month_names_long() -> Option<&'static [String; 12]> { - static LOCALE_MONTHS_LONG: OnceLock> = OnceLock::new(); - - LOCALE_MONTHS_LONG - .get_or_init(|| { - load_locale_name_array([ - libc::MON_1, - libc::MON_2, - libc::MON_3, - libc::MON_4, - libc::MON_5, - libc::MON_6, - libc::MON_7, - libc::MON_8, - libc::MON_9, - libc::MON_10, - libc::MON_11, - libc::MON_12, - ]) - }) - .as_ref() -} - -#[cfg(all( - unix, - not(target_os = "android"), - not(target_os = "cygwin"), - not(target_os = "redox") -))] -fn get_locale_month_names_abbrev() -> Option<&'static [String; 12]> { - static LOCALE_MONTHS_ABBREV: OnceLock> = OnceLock::new(); - - LOCALE_MONTHS_ABBREV - .get_or_init(|| { - load_locale_name_array([ - libc::ABMON_1, - libc::ABMON_2, - libc::ABMON_3, - libc::ABMON_4, - libc::ABMON_5, - libc::ABMON_6, - libc::ABMON_7, - libc::ABMON_8, - libc::ABMON_9, - libc::ABMON_10, - libc::ABMON_11, - libc::ABMON_12, - ]) - }) - .as_ref() -} - -#[cfg(all( - unix, - not(target_os = "android"), - not(target_os = "cygwin"), - not(target_os = "redox") -))] -fn get_locale_weekday_names_long() -> Option<&'static [String; 7]> { - static LOCALE_WEEKDAYS_LONG: OnceLock> = OnceLock::new(); - - LOCALE_WEEKDAYS_LONG - .get_or_init(|| { - load_locale_name_array([ - libc::DAY_1, - libc::DAY_2, - libc::DAY_3, - libc::DAY_4, - libc::DAY_5, - libc::DAY_6, - libc::DAY_7, - ]) - }) - .as_ref() -} - -#[cfg(all( - unix, - not(target_os = "android"), - not(target_os = "cygwin"), - not(target_os = "redox") -))] -fn get_locale_weekday_names_abbrev() -> Option<&'static [String; 7]> { - static LOCALE_WEEKDAYS_ABBREV: OnceLock> = OnceLock::new(); - - LOCALE_WEEKDAYS_ABBREV - .get_or_init(|| { - load_locale_name_array([ - libc::ABDAY_1, - libc::ABDAY_2, - libc::ABDAY_3, - libc::ABDAY_4, - libc::ABDAY_5, - libc::ABDAY_6, - libc::ABDAY_7, - ]) - }) - .as_ref() -} - /// Unix implementation using nl_langinfo for exact match with `locale abmon` output. #[cfg(all( unix, From cdbce55ac98451729b4c2789aa41ed46458f6913 Mon Sep 17 00:00:00 2001 From: compilersutra Date: Sun, 21 Jun 2026 11:45:03 +0530 Subject: [PATCH 4/4] date: fix Thai locale formatting --- src/uucore/src/lib/features/i18n/datetime.rs | 233 ++++++++++++++++--- tests/by-util/test_date.rs | 65 ++++-- 2 files changed, 250 insertions(+), 48 deletions(-) diff --git a/src/uucore/src/lib/features/i18n/datetime.rs b/src/uucore/src/lib/features/i18n/datetime.rs index a918292e571..74c93f55b4f 100644 --- a/src/uucore/src/lib/features/i18n/datetime.rs +++ b/src/uucore/src/lib/features/i18n/datetime.rs @@ -8,13 +8,25 @@ //! Locale-aware datetime formatting utilities using ICU and jiff-icu use icu_calendar::Date; -use icu_calendar::cal::{Ethiopian, Iso, Persian}; +use icu_calendar::cal::{Buddhist, Ethiopian, Iso, Persian}; use icu_locale::Locale; use jiff::civil::Date as JiffDate; use jiff_icu::ConvertFrom; use std::sync::OnceLock; +#[cfg(any( + not(unix), + target_os = "android", + target_os = "cygwin", + target_os = "redox" +))] use icu_datetime::DateTimeFormatter; +#[cfg(any( + not(unix), + target_os = "android", + target_os = "cygwin", + target_os = "redox" +))] use icu_datetime::fieldsets; #[cfg(all( unix, @@ -75,6 +87,177 @@ pub enum CalendarType { Ethiopian, } +/// Locale-specific month name for the current `LC_TIME` locale. +#[cfg(all( + unix, + not(target_os = "android"), + not(target_os = "cygwin"), + not(target_os = "redox") +))] +fn locale_month_name(date: &Date, long: bool) -> Option { + use std::ffi::CStr; + + let month_items: [libc::nl_item; 12] = if long { + [ + libc::MON_1, + libc::MON_2, + libc::MON_3, + libc::MON_4, + libc::MON_5, + libc::MON_6, + libc::MON_7, + libc::MON_8, + libc::MON_9, + libc::MON_10, + libc::MON_11, + libc::MON_12, + ] + } else { + [ + libc::ABMON_1, + libc::ABMON_2, + libc::ABMON_3, + libc::ABMON_4, + libc::ABMON_5, + libc::ABMON_6, + libc::ABMON_7, + libc::ABMON_8, + libc::ABMON_9, + libc::ABMON_10, + libc::ABMON_11, + libc::ABMON_12, + ] + }; + + unsafe { + libc::setlocale(libc::LC_TIME, c"".as_ptr()); + } + + let ordinal = usize::from(date.month().ordinal).checked_sub(1)?; + let ptr = unsafe { libc::nl_langinfo(month_items[ordinal]) }; + if ptr.is_null() { + return None; + } + + let name = unsafe { CStr::from_ptr(ptr) }.to_string_lossy(); + if name.is_empty() { + None + } else { + Some(name.into_owned()) + } +} + +/// Locale-specific weekday name for the current `LC_TIME` locale. +#[cfg(all( + unix, + not(target_os = "android"), + not(target_os = "cygwin"), + not(target_os = "redox") +))] +fn locale_weekday_name(date: &Date, long: bool) -> Option { + use std::ffi::CStr; + + let weekday_items: [libc::nl_item; 7] = if long { + [ + libc::DAY_1, + libc::DAY_2, + libc::DAY_3, + libc::DAY_4, + libc::DAY_5, + libc::DAY_6, + libc::DAY_7, + ] + } else { + [ + libc::ABDAY_1, + libc::ABDAY_2, + libc::ABDAY_3, + libc::ABDAY_4, + libc::ABDAY_5, + libc::ABDAY_6, + libc::ABDAY_7, + ] + }; + + unsafe { + libc::setlocale(libc::LC_TIME, c"".as_ptr()); + } + + let weekday = usize::from((date.weekday() as u8) % 7); + let ptr = unsafe { libc::nl_langinfo(weekday_items[weekday]) }; + if ptr.is_null() { + return None; + } + + let name = unsafe { CStr::from_ptr(ptr) }.to_string_lossy(); + if name.is_empty() { + None + } else { + Some(name.into_owned()) + } +} + +/// Locale-specific month name for the current `LC_TIME` locale. +#[cfg(any( + not(unix), + target_os = "android", + target_os = "cygwin", + target_os = "redox" +))] +fn locale_month_name(date: &Date, long: bool) -> Option { + let (locale, _) = get_time_locale(); + let locale = if locale.to_string().starts_with("th") { + icu_locale::locale!("en-US") + } else { + locale.clone() + }; + let locale_prefs = locale.into(); + let formatter = DateTimeFormatter::try_new( + locale_prefs, + if long { + fieldsets::M::long() + } else { + fieldsets::M::medium() + }, + ) + .ok()?; + + let name = formatter.format(date).to_string(); + Some(if long { + name + } else { + name.trim_end_matches('.').to_string() + }) +} + +/// Locale-specific weekday name for the current `LC_TIME` locale. +#[cfg(any( + not(unix), + target_os = "android", + target_os = "cygwin", + target_os = "redox" +))] +fn locale_weekday_name(date: &Date, long: bool) -> Option { + let (locale, _) = get_time_locale(); + let locale = if locale.to_string().starts_with("th") { + icu_locale::locale!("en-US") + } else { + locale.clone() + }; + let locale_prefs = locale.into(); + let formatter = DateTimeFormatter::try_new( + locale_prefs, + if long { + fieldsets::E::long() + } else { + fieldsets::E::short() + }, + ) + .ok()?; + + Some(formatter.format(date).to_string()) +} + /// Transform a strftime format string to use locale-specific calendar values pub fn localize_format_string(format: &str, date: JiffDate) -> String { const PERCENT_PLACEHOLDER: &str = "\x00\x00"; @@ -83,13 +266,18 @@ pub fn localize_format_string(format: &str, date: JiffDate) -> String { let iso_date = Date::::convert_from(date); let mut fmt = format.replace("%%", PERCENT_PLACEHOLDER); - fmt = fmt.replace("%EY", "%Y"); - - // GNU date keeps `%Y` and `%EY` Gregorian for th_TH.UTF-8, so normalize - // `%EY` to `%Y` before applying locale-specific substitutions. + // Leave `%EY` untouched so GNU-compatible alternate year formatting can be + // handled by the underlying strftime implementation. let calendar_type = get_locale_calendar_type(locale); match calendar_type { - CalendarType::Buddhist => {} + CalendarType::Buddhist => { + let d = iso_date.to_calendar(Buddhist); + let buddhist_year = d.year().era_year_or_related_iso(); + fmt = fmt + .replace("%EY", &format!("พ.ศ. {buddhist_year}")) + .replace("%EC", "พ.ศ.") + .replace("%Ey", &buddhist_year.to_string()); + } CalendarType::Persian => { let d = iso_date.to_calendar(Persian); let cal_year = d.year().extended_year(); @@ -115,41 +303,24 @@ pub fn localize_format_string(format: &str, date: JiffDate) -> String { CalendarType::Gregorian => {} } - // Format localized names. - let name_locale = if locale.to_string().starts_with("th") { - icu_locale::locale!("en-US") - } else { - locale.clone() - }; - let locale_prefs = name_locale.into(); - if fmt.contains("%B") { - if let Ok(f) = DateTimeFormatter::try_new(locale_prefs, fieldsets::M::long()) { - fmt = fmt.replace("%B", &f.format(&iso_date).to_string()); + if let Some(month_name) = locale_month_name(&iso_date, true) { + fmt = fmt.replace("%B", &month_name); } } if fmt.contains("%b") || fmt.contains("%h") { - if let Ok(f) = DateTimeFormatter::try_new(locale_prefs, fieldsets::M::medium()) { - // ICU's medium format may include trailing periods (e.g., "febr." for Hungarian), - // which when combined with locale format strings that also add periods after - // %b (e.g., "%Y. %b. %d") results in double periods ("febr.."). - // The standard C/POSIX locale via nl_langinfo returns abbreviations - // WITHOUT trailing periods, so we strip them here for consistency. - let month_abbrev = f.format(&iso_date).to_string(); - let month_abbrev = month_abbrev.trim_end_matches('.').to_string(); - fmt = fmt - .replace("%b", &month_abbrev) - .replace("%h", &month_abbrev); + if let Some(month_name) = locale_month_name(&iso_date, false) { + fmt = fmt.replace("%b", &month_name).replace("%h", &month_name); } } if fmt.contains("%A") { - if let Ok(f) = DateTimeFormatter::try_new(locale_prefs, fieldsets::E::long()) { - fmt = fmt.replace("%A", &f.format(&iso_date).to_string()); + if let Some(weekday_name) = locale_weekday_name(&iso_date, true) { + fmt = fmt.replace("%A", &weekday_name); } } if fmt.contains("%a") { - if let Ok(f) = DateTimeFormatter::try_new(locale_prefs, fieldsets::E::short()) { - fmt = fmt.replace("%a", &f.format(&iso_date).to_string()); + if let Some(weekday_name) = locale_weekday_name(&iso_date, false) { + fmt = fmt.replace("%a", &weekday_name); } } diff --git a/tests/by-util/test_date.rs b/tests/by-util/test_date.rs index 7b522bffae8..d55d498bfed 100644 --- a/tests/by-util/test_date.rs +++ b/tests/by-util/test_date.rs @@ -2222,24 +2222,43 @@ fn test_date_thai_locale_solar_calendar() { assert_eq!(thai_year, current_year); - // GNU date keeps %EY Gregorian in the Thai locale. - let thai_ey_year: i32 = new_ucmd!() + // GNU date uses the locale's alternate year for %EY in the Thai locale. + let thai_ey_year = new_ucmd!() .env("LC_ALL", "th_TH.UTF-8") .arg("+%EY") .succeeds() .stdout_str() .trim() - .parse() - .unwrap(); + .to_string(); + + assert_eq!(thai_ey_year, "พ.ศ. 2569"); + + let thai_ec = new_ucmd!() + .env("LC_ALL", "th_TH.UTF-8") + .arg("+%EC") + .succeeds() + .stdout_str() + .trim() + .to_string(); - assert_eq!(thai_ey_year, current_year); + assert_eq!(thai_ec, "พ.ศ."); - // GNU date keeps the locale month/day names here as well. + let thai_ey = new_ucmd!() + .env("LC_ALL", "th_TH.UTF-8") + .arg("+%Ey") + .succeeds() + .stdout_str() + .trim() + .to_string(); + + assert_eq!(thai_ey, "2569"); + + // GNU date keeps the locale month/day names from LC_TIME here as well. check_date( "th_TH.UTF-8", "2026-06-14", "+%Y %EY %B %A", - "2026 2026 June Sunday", + "2026 พ.ศ. 2569 มิถุนายน อาทิตย์", ); // Check that --iso-8601 and --rfc-3339 use the Gregorian calendar @@ -2293,18 +2312,18 @@ fn test_locale_calendar_conversions() { check_date("fa_IR.UTF-8", d, "+%Y-%m-%d", e); } - // Thai locale keeps Gregorian %Y and %EY under GNU-compatible behavior. + // Thai locale keeps Gregorian %Y, but %EY uses the locale's alternate year. for (d, e) in [ - ("2026-01-01", "2026-01-01"), - ("2026-01-26", "2026-01-26"), - ("2026-06-15", "2026-06-15"), - ("2026-12-31", "2026-12-31"), - ("2025-01-01", "2025-01-01"), - ("2024-02-29", "2024-02-29"), - ("2000-01-01", "2000-01-01"), - ("1970-01-01", "1970-01-01"), + ("2026-01-01", "พ.ศ. 2569-01-01"), + ("2026-01-26", "พ.ศ. 2569-01-26"), + ("2026-06-15", "พ.ศ. 2569-06-15"), + ("2026-12-31", "พ.ศ. 2569-12-31"), + ("2025-01-01", "พ.ศ. 2568-01-01"), + ("2024-02-29", "พ.ศ. 2567-02-29"), + ("2000-01-01", "พ.ศ. 2543-01-01"), + ("1970-01-01", "พ.ศ. 2513-01-01"), ] { - check_date("th_TH.UTF-8", d, "+%Y-%m-%d", e); + check_date("th_TH.UTF-8", d, "+%Y-%m-%d", d); check_date("th_TH.UTF-8", d, "+%EY-%m-%d", e); } @@ -2338,6 +2357,10 @@ fn test_locale_month_names() { ("ja_JP.UTF-8", "1月", "6月", "12月"), ("zh_CN.UTF-8", "一月", "六月", "十二月"), ] { + if !is_locale_available(loc) { + println!("Skipping locale month test for {loc} - locale not available"); + continue; + } check_date(loc, "2026-01-15", "+%B", jan); check_date(loc, "2026-06-15", "+%B", jun); check_date(loc, "2026-12-15", "+%B", dec); @@ -2361,6 +2384,10 @@ fn test_locale_abbreviated_month_names() { // Hungarian locale - the fix ensures no double periods ("hu_HU.UTF-8", "febr", "jún", "dec"), ] { + if !is_locale_available(loc) { + println!("Skipping abbreviated month test for {loc} - locale not available"); + continue; + } check_date(loc, "2026-02-12", "+%b", feb); check_date(loc, "2026-06-14", "+%b", jun); check_date(loc, "2026-12-09", "+%b", dec); @@ -2378,6 +2405,10 @@ fn test_locale_day_names() { ("ja_JP.UTF-8", "月曜日", "日曜日", "土曜日"), ("zh_CN.UTF-8", "星期一", "星期日", "星期六"), ] { + if !is_locale_available(loc) { + println!("Skipping day-name test for {loc} - locale not available"); + continue; + } check_date(loc, "2026-01-26", "+%A", mon); check_date(loc, "2026-01-25", "+%A", sun); check_date(loc, "2026-01-24", "+%A", sat);