module Utf8:sig
..end
Unicode Manipulation
UTF-8 handling, and conversion to UCS-4
val validate : string -> bool
If you read an UTF-8 string from somewhere, you should validate it, or risk random segmentation faults
val length : string -> int
val from_unichar : Glib.unichar -> string
from_unichar 0xiii
converts a code point iii
(usually in hexadecimal
form) into a string containing the UTF-8 encoded character 0xiii
. See
unicode.org for charmaps.
Does not check that the given code point is a valid unicode point.
val from_unistring : Glib.unistring -> string
val to_unichar_validated : string -> pos:int Stdlib.ref -> Glib.unichar
to_unichar_validated
decodes an UTF-8 encoded code point and checks
for incomplete characters, invalid characters and overlong encodings.
Convert.Error
if invalidval to_unichar : string -> pos:int Stdlib.ref -> Glib.unichar
to_unichar
decodes an UTF-8 encoded code point. Result is undefined
if pos
does not point to a valid UTF-8 encoded character.
val to_unistring : string -> Glib.unistring
to_unistring
decodes an UTF-8 encoded string into an array of
unichar
. The string must be valid.
val first_char : string -> Glib.unichar
val offset_to_pos : string -> pos:int -> off:int -> int
typenormalize_mode =
[ `ALL | `ALL_COMPOSE | `DEFAULT | `DEFAULT_COMPOSE ]
val normalize : string -> normalize_mode -> string
val uppercase : string -> string
val lowercase : string -> string
val casefold : string -> string
val collate : string -> string -> int
val collate_key : string -> string