@@ -205,15 +205,6 @@ efi_status_t efi_parse_options(char const *cmdline)
return EFI_SUCCESS;
}
-/*
- * Get the number of UTF-8 bytes corresponding to an UTF-16 character.
- * This overestimates for surrogates, but that is okay.
- */
-static int efi_utf8_bytes(u16 c)
-{
- return 1 + (c >= 0x80) + (c >= 0x800);
-}
-
/*
* Convert an UTF-16 string, not necessarily null terminated, to UTF-8.
*/
@@ -274,10 +265,39 @@ char *efi_convert_cmdline(efi_loaded_image_t *image,
if (options) {
s2 = options;
- while (*s2 && *s2 != '\n'
- && options_chars < load_options_chars) {
- options_bytes += efi_utf8_bytes(*s2++);
+ while (options_chars < load_options_chars) {
+ u16 c = *s2++;
+
+ if (c == L'\0' || c == L'\n')
+ break;
+ /*
+ * Get the number of UTF-8 bytes corresponding to a
+ * UTF-16 character.
+ * The first part handles everything in the BMP.
+ */
+ options_bytes += 1 + (c >= 0x80) + (c >= 0x800);
options_chars++;
+ /*
+ * Add one more byte for valid surrogate pairs. Invalid
+ * surrogates will be replaced with 0xfffd and take up
+ * only 3 bytes.
+ */
+ if ((c & 0xfc00) == 0xd800) {
+ /*
+ * If the very last word is a high surrogate,
+ * we must ignore it since we can't access the
+ * low surrogate.
+ */
+ if (options_chars == load_options_chars) {
+ options_bytes -= 3;
+ options_chars--;
+ break;
+ } else if ((*s2 & 0xfc00) == 0xdc00) {
+ options_bytes++;
+ options_chars++;
+ s2++;
+ }
+ }
}
}
efi_convert_cmdline currently overestimates the length of the equivalent UTF-8 encoding. snprintf can now be used to do the conversion to UTF-8, however, it does not have a way to specify the size of the UTF-16 string, only the size of the resulting UTF-8 string. So in order to use it, we need to precalculate the exact UTF-8 size. Signed-off-by: Arvind Sankar <nivedita@alum.mit.edu> --- .../firmware/efi/libstub/efi-stub-helper.c | 44 ++++++++++++++----- 1 file changed, 32 insertions(+), 12 deletions(-)