mas_i18n/
translator.rs

1// Copyright 2024 New Vector Ltd.
2// Copyright 2023, 2024 The Matrix.org Foundation C.I.C.
3//
4// SPDX-License-Identifier: AGPL-3.0-only
5// Please see LICENSE in the repository root for full details.
6
7use std::{collections::HashMap, fs::File, io::BufReader, str::FromStr};
8
9use camino::{Utf8Path, Utf8PathBuf};
10use icu_experimental::relativetime::{
11    RelativeTimeFormatter, RelativeTimeFormatterOptions, options::Numeric,
12};
13use icu_locid::{Locale, ParserError};
14use icu_locid_transform::fallback::{
15    LocaleFallbackPriority, LocaleFallbackSupplement, LocaleFallbacker, LocaleFallbackerWithConfig,
16};
17use icu_plurals::{PluralRules, PluralsError};
18use icu_provider::{
19    DataError, DataErrorKind, DataKey, DataLocale, DataRequest, DataRequestMetadata, data_key,
20    fallback::LocaleFallbackConfig,
21};
22use icu_provider_adapters::fallback::LocaleFallbackProvider;
23use thiserror::Error;
24use writeable::Writeable;
25
26use crate::{sprintf::Message, translations::TranslationTree};
27
28/// Fake data key for errors
29const DATA_KEY: DataKey = data_key!("mas/translations@1");
30
31const FALLBACKER: LocaleFallbackerWithConfig<'static> = LocaleFallbacker::new().for_config({
32    let mut config = LocaleFallbackConfig::const_default();
33    config.priority = LocaleFallbackPriority::Collation;
34    config.fallback_supplement = Some(LocaleFallbackSupplement::Collation);
35    config
36});
37
38/// Construct a [`DataRequest`] for the given locale
39pub fn data_request_for_locale(locale: &DataLocale) -> DataRequest<'_> {
40    let mut metadata = DataRequestMetadata::default();
41    metadata.silent = true;
42    DataRequest { locale, metadata }
43}
44
45/// Error type for loading translations
46#[derive(Debug, Error)]
47pub enum LoadError {
48    #[error("Failed to load translation directory {path:?}")]
49    ReadDir {
50        path: Utf8PathBuf,
51        #[source]
52        source: std::io::Error,
53    },
54
55    #[error("Failed to read translation file {path:?}")]
56    ReadFile {
57        path: Utf8PathBuf,
58        #[source]
59        source: std::io::Error,
60    },
61
62    #[error("Failed to deserialize translation file {path:?}")]
63    Deserialize {
64        path: Utf8PathBuf,
65        #[source]
66        source: serde_json::Error,
67    },
68
69    #[error("Invalid locale for file {path:?}")]
70    InvalidLocale {
71        path: Utf8PathBuf,
72        #[source]
73        source: ParserError,
74    },
75
76    #[error("Invalid file name {path:?}")]
77    InvalidFileName { path: Utf8PathBuf },
78}
79
80/// A translator for a set of translations.
81#[derive(Debug)]
82pub struct Translator {
83    translations: HashMap<DataLocale, TranslationTree>,
84    plural_provider: LocaleFallbackProvider<icu_plurals::provider::Baked>,
85    default_locale: DataLocale,
86}
87
88impl Translator {
89    /// Create a new translator from a set of translations.
90    #[must_use]
91    pub fn new(translations: HashMap<DataLocale, TranslationTree>) -> Self {
92        let fallbacker = LocaleFallbacker::new().static_to_owned();
93        let plural_provider = LocaleFallbackProvider::new_with_fallbacker(
94            icu_plurals::provider::Baked,
95            fallbacker.clone(),
96        );
97
98        Self {
99            translations,
100            plural_provider,
101            // TODO: make this configurable
102            default_locale: icu_locid::locale!("en").into(),
103        }
104    }
105
106    /// Load a set of translations from a directory.
107    ///
108    /// The directory should contain one JSON file per locale, with the locale
109    /// being the filename without the extension, e.g. `en-US.json`.
110    ///
111    /// # Parameters
112    ///
113    /// * `path` - The path to load from.
114    ///
115    /// # Errors
116    ///
117    /// Returns an error if the directory cannot be read, or if any of the files
118    /// cannot be parsed.
119    pub fn load_from_path(path: &Utf8Path) -> Result<Self, LoadError> {
120        let mut translations = HashMap::new();
121
122        let dir = path.read_dir_utf8().map_err(|source| LoadError::ReadDir {
123            path: path.to_owned(),
124            source,
125        })?;
126
127        for entry in dir {
128            let entry = entry.map_err(|source| LoadError::ReadDir {
129                path: path.to_owned(),
130                source,
131            })?;
132            let path = entry.into_path();
133            let Some(name) = path.file_stem() else {
134                return Err(LoadError::InvalidFileName { path });
135            };
136
137            let locale: Locale = match Locale::from_str(name) {
138                Ok(locale) => locale,
139                Err(source) => return Err(LoadError::InvalidLocale { path, source }),
140            };
141
142            let file = match File::open(&path) {
143                Ok(file) => file,
144                Err(source) => return Err(LoadError::ReadFile { path, source }),
145            };
146
147            let mut reader = BufReader::new(file);
148
149            let content = match serde_json::from_reader(&mut reader) {
150                Ok(content) => content,
151                Err(source) => return Err(LoadError::Deserialize { path, source }),
152            };
153
154            translations.insert(locale.into(), content);
155        }
156
157        Ok(Self::new(translations))
158    }
159
160    /// Get a message from the tree by key, with locale fallback.
161    ///
162    /// Returns the message and the locale it was found in.
163    /// If the message is not found, returns `None`.
164    ///
165    /// # Parameters
166    ///
167    /// * `locale` - The locale to use.
168    /// * `key` - The key to look up, which is a dot-separated path.
169    #[must_use]
170    pub fn message_with_fallback(
171        &self,
172        locale: DataLocale,
173        key: &str,
174    ) -> Option<(&Message, DataLocale)> {
175        if let Ok(message) = self.message(&locale, key) {
176            return Some((message, locale));
177        }
178
179        let mut iter = FALLBACKER.fallback_for(locale);
180
181        loop {
182            let locale = iter.get();
183
184            if let Ok(message) = self.message(locale, key) {
185                return Some((message, iter.take()));
186            }
187
188            // Try the defaut locale if we hit the `und` locale
189            if locale.is_und() {
190                let message = self.message(&self.default_locale, key).ok()?;
191                return Some((message, self.default_locale.clone()));
192            }
193
194            iter.step();
195        }
196    }
197
198    /// Get a message from the tree by key.
199    ///
200    /// # Parameters
201    ///
202    /// * `locale` - The locale to use.
203    /// * `key` - The key to look up, which is a dot-separated path.
204    ///
205    /// # Errors
206    ///
207    /// Returns an error if the requested locale is not found, or if the
208    /// requested key is not found.
209    pub fn message(&self, locale: &DataLocale, key: &str) -> Result<&Message, DataError> {
210        let request = data_request_for_locale(locale);
211
212        let tree = self
213            .translations
214            .get(locale)
215            .ok_or_else(|| DataErrorKind::MissingLocale.with_req(DATA_KEY, request))?;
216
217        let message = tree
218            .message(key)
219            .ok_or_else(|| DataErrorKind::MissingDataKey.with_req(DATA_KEY, request))?;
220
221        Ok(message)
222    }
223
224    /// Get a plural message from the tree by key, with locale fallback.
225    ///
226    /// Returns the message and the locale it was found in.
227    /// If the message is not found, returns `None`.
228    ///
229    /// # Parameters
230    ///
231    /// * `locale` - The locale to use.
232    /// * `key` - The key to look up, which is a dot-separated path.
233    /// * `count` - The count to use for pluralization.
234    #[must_use]
235    pub fn plural_with_fallback(
236        &self,
237        locale: DataLocale,
238        key: &str,
239        count: usize,
240    ) -> Option<(&Message, DataLocale)> {
241        let mut iter = FALLBACKER.fallback_for(locale);
242
243        loop {
244            let locale = iter.get();
245
246            if let Ok(message) = self.plural(locale, key, count) {
247                return Some((message, iter.take()));
248            }
249
250            // Stop if we hit the `und` locale
251            if locale.is_und() {
252                return None;
253            }
254
255            iter.step();
256        }
257    }
258
259    /// Get a plural message from the tree by key.
260    ///
261    /// # Parameters
262    ///
263    /// * `locale` - The locale to use.
264    /// * `key` - The key to look up, which is a dot-separated path.
265    /// * `count` - The count to use for pluralization.
266    ///
267    /// # Errors
268    ///
269    /// Returns an error if the requested locale is not found, or if the
270    /// requested key is not found.
271    pub fn plural(
272        &self,
273        locale: &DataLocale,
274        key: &str,
275        count: usize,
276    ) -> Result<&Message, PluralsError> {
277        let plurals = PluralRules::try_new_cardinal_unstable(&self.plural_provider, locale)?;
278        let category = plurals.category_for(count);
279
280        let request = data_request_for_locale(locale);
281
282        let tree = self
283            .translations
284            .get(locale)
285            .ok_or_else(|| DataErrorKind::MissingLocale.with_req(DATA_KEY, request))?;
286
287        let message = tree
288            .pluralize(key, category)
289            .ok_or_else(|| DataErrorKind::MissingDataKey.with_req(DATA_KEY, request))?;
290
291        Ok(message)
292    }
293
294    /// Format a relative date
295    ///
296    /// # Parameters
297    ///
298    /// * `locale` - The locale to use.
299    /// * `days` - The number of days to format, where 0 = today, 1 = tomorrow,
300    ///   -1 = yesterday, etc.
301    ///
302    /// # Errors
303    ///
304    /// Returns an error if the requested locale is not found.
305    pub fn relative_date(
306        &self,
307        locale: &DataLocale,
308        days: i64,
309    ) -> Result<String, icu_experimental::relativetime::RelativeTimeError> {
310        // TODO: this is not using the fallbacker
311        let formatter = RelativeTimeFormatter::try_new_long_day(
312            locale,
313            RelativeTimeFormatterOptions {
314                numeric: Numeric::Auto,
315            },
316        )?;
317
318        let date = formatter.format(days.into());
319        Ok(date.write_to_string().into_owned())
320    }
321
322    /// Format time
323    ///
324    /// # Parameters
325    ///
326    /// * `locale` - The locale to use.
327    /// * `time` - The time to format.
328    ///
329    /// # Errors
330    ///
331    /// Returns an error if the requested locale is not found.
332    pub fn short_time<T: icu_datetime::input::IsoTimeInput>(
333        &self,
334        locale: &DataLocale,
335        time: &T,
336    ) -> Result<String, icu_datetime::DateTimeError> {
337        // TODO: this is not using the fallbacker
338        let formatter = icu_datetime::TimeFormatter::try_new_with_length(
339            locale,
340            icu_datetime::options::length::Time::Short,
341        )?;
342
343        Ok(formatter.format_to_string(time))
344    }
345
346    /// Get a list of available locales.
347    #[must_use]
348    pub fn available_locales(&self) -> Vec<&DataLocale> {
349        self.translations.keys().collect()
350    }
351
352    /// Check if a locale is available.
353    #[must_use]
354    pub fn has_locale(&self, locale: &DataLocale) -> bool {
355        self.translations.contains_key(locale)
356    }
357
358    /// Choose the best available locale from a list of candidates.
359    #[must_use]
360    pub fn choose_locale(&self, iter: impl Iterator<Item = DataLocale>) -> DataLocale {
361        for locale in iter {
362            if self.has_locale(&locale) {
363                return locale;
364            }
365
366            let mut fallbacker = FALLBACKER.fallback_for(locale);
367
368            loop {
369                if fallbacker.get().is_und() {
370                    break;
371                }
372
373                if self.has_locale(fallbacker.get()) {
374                    return fallbacker.take();
375                }
376                fallbacker.step();
377            }
378        }
379
380        self.default_locale.clone()
381    }
382}
383
384#[cfg(test)]
385mod tests {
386    use camino::Utf8PathBuf;
387    use icu_locid::locale;
388
389    use crate::{sprintf::arg_list, translator::Translator};
390
391    fn translator() -> Translator {
392        let root: Utf8PathBuf = env!("CARGO_MANIFEST_DIR").parse().unwrap();
393        let test_data = root.join("test_data");
394        Translator::load_from_path(&test_data).unwrap()
395    }
396
397    #[test]
398    fn test_message() {
399        let translator = translator();
400
401        let message = translator.message(&locale!("en").into(), "hello").unwrap();
402        let formatted = message.format(&arg_list!()).unwrap();
403        assert_eq!(formatted, "Hello!");
404
405        let message = translator.message(&locale!("fr").into(), "hello").unwrap();
406        let formatted = message.format(&arg_list!()).unwrap();
407        assert_eq!(formatted, "Bonjour !");
408
409        let message = translator
410            .message(&locale!("en-US").into(), "hello")
411            .unwrap();
412        let formatted = message.format(&arg_list!()).unwrap();
413        assert_eq!(formatted, "Hey!");
414
415        // Try the fallback chain
416        let result = translator.message(&locale!("en-US").into(), "goodbye");
417        assert!(result.is_err());
418
419        let (message, locale) = translator
420            .message_with_fallback(locale!("en-US").into(), "goodbye")
421            .unwrap();
422        let formatted = message.format(&arg_list!()).unwrap();
423        assert_eq!(formatted, "Goodbye!");
424        assert_eq!(locale, locale!("en").into());
425    }
426
427    #[test]
428    fn test_plurals() {
429        let translator = translator();
430
431        let message = translator
432            .plural(&locale!("en").into(), "active_sessions", 1)
433            .unwrap();
434        let formatted = message.format(&arg_list!(count = 1)).unwrap();
435        assert_eq!(formatted, "1 active session.");
436
437        let message = translator
438            .plural(&locale!("en").into(), "active_sessions", 2)
439            .unwrap();
440        let formatted = message.format(&arg_list!(count = 2)).unwrap();
441        assert_eq!(formatted, "2 active sessions.");
442
443        // In english, zero is plural
444        let message = translator
445            .plural(&locale!("en").into(), "active_sessions", 0)
446            .unwrap();
447        let formatted = message.format(&arg_list!(count = 0)).unwrap();
448        assert_eq!(formatted, "0 active sessions.");
449
450        let message = translator
451            .plural(&locale!("fr").into(), "active_sessions", 1)
452            .unwrap();
453        let formatted = message.format(&arg_list!(count = 1)).unwrap();
454        assert_eq!(formatted, "1 session active.");
455
456        let message = translator
457            .plural(&locale!("fr").into(), "active_sessions", 2)
458            .unwrap();
459        let formatted = message.format(&arg_list!(count = 2)).unwrap();
460        assert_eq!(formatted, "2 sessions actives.");
461
462        // In french, zero is singular
463        let message = translator
464            .plural(&locale!("fr").into(), "active_sessions", 0)
465            .unwrap();
466        let formatted = message.format(&arg_list!(count = 0)).unwrap();
467        assert_eq!(formatted, "0 session active.");
468
469        // Try the fallback chain
470        let result = translator.plural(&locale!("en-US").into(), "active_sessions", 1);
471        assert!(result.is_err());
472
473        let (message, locale) = translator
474            .plural_with_fallback(locale!("en-US").into(), "active_sessions", 1)
475            .unwrap();
476        let formatted = message.format(&arg_list!(count = 1)).unwrap();
477        assert_eq!(formatted, "1 active session.");
478        assert_eq!(locale, locale!("en").into());
479    }
480}