Unicode support

int console_read_unicode(s32 *code)

read Unicode code point from console

Parameters

s32 *code

pointer to store Unicode code point

Return

0 = success

s32 utf8_get(const char **src)

get next UTF-8 code point from buffer

Parameters

const char **src

pointer to current byte, updated to point to next byte

Return

code point, or 0 for end of string, or -1 if no legal

code point is found. In case of an error src points to the incorrect byte.

int utf8_put(s32 code, char **dst)

write UTF-8 code point to buffer

Parameters

s32 code

code point

char **dst

pointer to destination buffer, updated to next position

Return

-1 if the input parameters are invalid

size_t utf8_utf16_strnlen(const char *src, size_t count)

length of a truncated utf-8 string after conversion to utf-16

Parameters

const char *src

utf-8 string

size_t count

maximum number of code points to convert

Return

length in u16 after conversion to utf-16 without the

trailing 0. If an invalid UTF-8 sequence is hit one u16 will be reserved for a replacement character.

utf8_utf16_strlen

utf8_utf16_strlen (a)

length of a utf-8 string after conversion to utf-16

Parameters

a

utf-8 string

Return

length in u16 after conversion to utf-16 without the

trailing 0. If an invalid UTF-8 sequence is hit one u16 will be reserved for a replacement character.

int utf8_utf16_strncpy(u16 **dst, const char *src, size_t count)

copy utf-8 string to utf-16 string

Parameters

u16 **dst

destination buffer

const char *src

source buffer

size_t count

maximum number of code points to copy

Return

-1 if the input parameters are invalid

utf8_utf16_strcpy

utf8_utf16_strcpy (d, s)

copy utf-8 string to utf-16 string

Parameters

d

destination buffer

s

source buffer

Return

-1 if the input parameters are invalid

s32 utf16_get(const u16 **src)

get next UTF-16 code point from buffer

Parameters

const u16 **src

pointer to current word, updated to point to next word

Return

code point, or 0 for end of string, or -1 if no legal

code point is found. In case of an error src points to the incorrect word.

int utf16_put(s32 code, u16 **dst)

write UTF-16 code point to buffer

Parameters

s32 code

code point

u16 **dst

pointer to destination buffer, updated to next position

Return

-1 if the input parameters are invalid

size_t utf16_strnlen(const u16 *src, size_t count)

length of a truncated utf-16 string

Parameters

const u16 *src

utf-16 string

size_t count

maximum number of code points to convert

Return

length in code points. If an invalid UTF-16 sequence is

hit one position will be reserved for a replacement character.

size_t utf16_utf8_strnlen(const u16 *src, size_t count)

length of a truncated utf-16 string after conversion to utf-8

Parameters

const u16 *src

utf-16 string

size_t count

maximum number of code points to convert

Return

length in bytes after conversion to utf-8 without the

trailing 0. If an invalid UTF-16 sequence is hit one byte will be reserved for a replacement character.

utf16_utf8_strlen

utf16_utf8_strlen (a)

length of a utf-16 string after conversion to utf-8

Parameters

a

utf-16 string

Return

length in bytes after conversion to utf-8 without the

trailing 0. If an invalid UTF-16 sequence is hit one byte will be reserved for a replacement character.

int utf16_utf8_strncpy(char **dst, const u16 *src, size_t count)

copy utf-16 string to utf-8 string

Parameters

char **dst

destination buffer

const u16 *src

source buffer

size_t count

maximum number of code points to copy

Return

-1 if the input parameters are invalid

utf16_utf8_strcpy

utf16_utf8_strcpy (d, s)

copy utf-16 string to utf-8 string

Parameters

d

destination buffer

s

source buffer

Return

-1 if the input parameters are invalid

s32 utf_to_lower(const s32 code)

convert a Unicode letter to lower case

Parameters

const s32 code

letter to convert

Return

lower case letter or unchanged letter

s32 utf_to_upper(const s32 code)

convert a Unicode letter to upper case

Parameters

const s32 code

letter to convert

Return

upper case letter or unchanged letter

int u16_strcasecmp(const u16 *s1, const u16 *s2)

compare two u16 strings case insensitively

Parameters

const u16 *s1

first string to compare

const u16 *s2

second string to compare

Return

0 if the first n u16 are the same in s1 and s2

< 0 if the first different u16 in s1 is less than the corresponding u16 in s2 > 0 if the first different u16 in s1 is greater than the

int u16_strncmp(const u16 *s1, const u16 *s2, size_t n)

compare two u16 string

Parameters

const u16 *s1

first string to compare

const u16 *s2

second string to compare

size_t n

maximum number of u16 to compare

Return

0 if the first n u16 are the same in s1 and s2

< 0 if the first different u16 in s1 is less than the corresponding u16 in s2 > 0 if the first different u16 in s1 is greater than the corresponding u16 in s2

u16_strcmp

u16_strcmp (s1, s2)

compare two u16 string

Parameters

s1

first string to compare

s2

second string to compare

Return

0 if the first n u16 are the same in s1 and s2

< 0 if the first different u16 in s1 is less than the corresponding u16 in s2 > 0 if the first different u16 in s1 is greater than the corresponding u16 in s2

size_t u16_strsize(const void *in)

count size of u16 string in bytes including the null character

Parameters

const void *in

null terminated u16 string

Description

Counts the number of bytes occupied by a u16 string

Return

bytes in a u16 string

size_t u16_strnlen(const u16 *in, size_t count)

count non-zero words

Parameters

const u16 *in

null terminated u16 string

size_t count

maximum number of words to count

Description

This function matches wscnlen_s() if the -fshort-wchar compiler flag is set. In the EFI context we explicitly need a function handling u16 strings.

Return

number of non-zero words.

This is not the number of utf-16 letters!

size_t u16_strlen(const void *in)

count non-zero words

Parameters

const void *in

null terminated u16 string

Description

This function matches wsclen() if the -fshort-wchar compiler flag is set. In the EFI context we explicitly need a function handling u16 strings.

Return

number of non-zero words.

This is not the number of utf-16 letters!

u16 *u16_strcpy(u16 *dest, const u16 *src)

copy u16 string

Parameters

u16 *dest

destination buffer

const u16 *src

source buffer (null terminated)

Description

Copy u16 string pointed to by src, including terminating null word, to the buffer pointed to by dest.

Return

‘dest’ address

u16 *u16_strdup(const void *src)

duplicate u16 string

Parameters

const void *src

source buffer (null terminated)

Description

Copy u16 string pointed to by src, including terminating null word, to a newly allocated buffer.

Return

allocated new buffer on success, NULL on failure

size_t u16_strlcat(u16 *dest, const u16 *src, size_t count)

Append a length-limited, NUL-terminated string to another

Parameters

u16 *dest

zero terminated u16 destination string

const u16 *src

zero terminated u16 source string

size_t count

size of buffer in u16 words including taling 0x0000

Description

Append the source string src to the destination string dest, overwriting null word at the end of dest adding a terminating null word.

Return

required size including trailing 0x0000 in u16 words

If return value >= count, truncation occurred.

uint8_t *utf16_to_utf8(uint8_t *dest, const uint16_t *src, size_t size)

Convert an utf16 string to utf8

Parameters

uint8_t *dest

the destination buffer to write the utf8 characters

const uint16_t *src

the source utf16 string

size_t size

the number of utf16 characters to convert

Description

Converts ‘size’ characters of the utf16 string ‘src’ to utf8 written to the ‘dest’ buffer.

NOTE that a single utf16 character can generate up to 3 utf8 characters. See MAX_UTF8_PER_UTF16.

Return

the pointer to the first unwritten byte in ‘dest’

int utf_to_cp(s32 *c, const u16 *codepage)

translate Unicode code point to 8bit codepage

Parameters

s32 *c

pointer to Unicode code point to be translated

const u16 *codepage

Unicode to codepage translation table

Description

Codepoints that do not exist in the codepage are rendered as question mark.

Return

0 on success, -ENOENT if codepoint cannot be translated

int utf8_to_cp437_stream(u8 c, char *buffer)

convert UTF-8 stream to codepage 437

Parameters

u8 c

next UTF-8 character to convert

char *buffer

buffer, at least 5 characters

Return

next codepage 437 character or 0

int utf8_to_utf32_stream(u8 c, char *buffer)

convert UTF-8 byte stream to Unicode code points

Parameters

u8 c

next UTF-8 character to convert

char *buffer

buffer, at least 5 characters

Description

The function is called for each byte c in a UTF-8 stream. The byte is appended to the temporary storage buffer until the UTF-8 stream in buffer describes a Unicode code point.

When a new code point has been decoded it is returned and buffer[0] is set to ‘0’, otherwise the return value is 0.

The buffer must be at least 5 characters long. Before the first function invocation buffer[0] must be set to ‘0’.”

Return

Unicode code point or 0