efi: x86: Handle arbitrary Unicode characters
authorH. Peter Anvin <hpa@linux.intel.com>
Fri, 20 Sep 2013 14:55:39 +0000 (09:55 -0500)
committerMatt Fleming <matt.fleming@intel.com>
Thu, 17 Apr 2014 11:29:25 +0000 (12:29 +0100)
Instead of truncating UTF-16 assuming all characters is ASCII,
properly convert it to UTF-8.

Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
[ Bug and style fixes. ]
Signed-off-by: Roy Franz <roy.franz@linaro.org>
Signed-off-by: Leif Lindholm <leif.lindholm@linaro.org>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
arch/x86/boot/compressed/eboot.c
drivers/firmware/efi/efi-stub-helper.c

index 4703a6c4b8e315050a38533438a27228ddc4758c..0331d765c2bb7130dcf55b7e664a43759035efda 100644 (file)
@@ -1087,8 +1087,7 @@ struct boot_params *make_boot_params(struct efi_config *c)
        hdr->type_of_loader = 0x21;
 
        /* Convert unicode cmdline to ascii */
-       cmdline_ptr = efi_convert_cmdline_to_ascii(sys_table, image,
-                                                  &options_size);
+       cmdline_ptr = efi_convert_cmdline(sys_table, image, &options_size);
        if (!cmdline_ptr)
                goto fail;
        hdr->cmd_line_ptr = (unsigned long)cmdline_ptr;
index a168dd20511f6c1dc9c45c0b4ae911c0e29dd8cf..eb6d4be9e722ac0ab87c875b6dbcda04b27ce3bd 100644 (file)
@@ -535,53 +535,100 @@ static efi_status_t efi_relocate_kernel(efi_system_table_t *sys_table_arg,
        return status;
 }
 
+/*
+ * Get the number of UTF-8 bytes corresponding to an UTF-16 character.
+ * This overestimates for surrogates, but that is okay.
+ */
+static int efi_utf8_bytes(u16 c)
+{
+       return 1 + (c >= 0x80) + (c >= 0x800);
+}
+
+/*
+ * Convert an UTF-16 string, not necessarily null terminated, to UTF-8.
+ */
+static u8 *efi_utf16_to_utf8(u8 *dst, const u16 *src, int n)
+{
+       unsigned int c;
+
+       while (n--) {
+               c = *src++;
+               if (n && c >= 0xd800 && c <= 0xdbff &&
+                   *src >= 0xdc00 && *src <= 0xdfff) {
+                       c = 0x10000 + ((c & 0x3ff) << 10) + (*src & 0x3ff);
+                       src++;
+                       n--;
+               }
+               if (c >= 0xd800 && c <= 0xdfff)
+                       c = 0xfffd; /* Unmatched surrogate */
+               if (c < 0x80) {
+                       *dst++ = c;
+                       continue;
+               }
+               if (c < 0x800) {
+                       *dst++ = 0xc0 + (c >> 6);
+                       goto t1;
+               }
+               if (c < 0x10000) {
+                       *dst++ = 0xe0 + (c >> 12);
+                       goto t2;
+               }
+               *dst++ = 0xf0 + (c >> 18);
+               *dst++ = 0x80 + ((c >> 12) & 0x3f);
+       t2:
+               *dst++ = 0x80 + ((c >> 6) & 0x3f);
+       t1:
+               *dst++ = 0x80 + (c & 0x3f);
+       }
+
+       return dst;
+}
+
 /*
  * Convert the unicode UEFI command line to ASCII to pass to kernel.
  * Size of memory allocated return in *cmd_line_len.
  * Returns NULL on error.
  */
-static char *efi_convert_cmdline_to_ascii(efi_system_table_t *sys_table_arg,
-                                     efi_loaded_image_t *image,
-                                     int *cmd_line_len)
+static char *efi_convert_cmdline(efi_system_table_t *sys_table_arg,
+                                efi_loaded_image_t *image,
+                                int *cmd_line_len)
 {
-       u16 *s2;
+       const u16 *s2;
        u8 *s1 = NULL;
        unsigned long cmdline_addr = 0;
-       int load_options_size = image->load_options_size / 2; /* ASCII */
-       void *options = image->load_options;
-       int options_size = 0;
+       int load_options_chars = image->load_options_size / 2; /* UTF-16 */
+       const u16 *options = image->load_options;
+       int options_bytes = 0;  /* UTF-8 bytes */
+       int options_chars = 0;  /* UTF-16 chars */
        efi_status_t status;
-       int i;
        u16 zero = 0;
 
        if (options) {
                s2 = options;
-               while (*s2 && *s2 != '\n' && options_size < load_options_size) {
-                       s2++;
-                       options_size++;
+               while (*s2 && *s2 != '\n'
+                      && options_chars < load_options_chars) {
+                       options_bytes += efi_utf8_bytes(*s2++);
+                       options_chars++;
                }
        }
 
-       if (options_size == 0) {
+       if (!options_chars) {
                /* No command line options, so return empty string*/
-               options_size = 1;
                options = &zero;
        }
 
-       options_size++;  /* NUL termination */
+       options_bytes++;        /* NUL termination */
 
-       status = efi_low_alloc(sys_table_arg, options_size, 0, &cmdline_addr);
+       status = efi_low_alloc(sys_table_arg, options_bytes, 0, &cmdline_addr);
        if (status != EFI_SUCCESS)
                return NULL;
 
        s1 = (u8 *)cmdline_addr;
-       s2 = (u16 *)options;
-
-       for (i = 0; i < options_size - 1; i++)
-               *s1++ = *s2++;
+       s2 = (const u16 *)options;
 
+       s1 = efi_utf16_to_utf8(s1, s2, options_chars);
        *s1 = '\0';
 
-       *cmd_line_len = options_size;
+       *cmd_line_len = options_bytes;
        return (char *)cmdline_addr;
 }