ICU4C

Summary

Enumerations

Anonymous Enum 117{
  U_PARSE_CONTEXT_LEN = 16
}
enum
The capacity of the context strings in UParseError.
UAcceptResult{
  ULOC_ACCEPT_FAILED = 0,
  ULOC_ACCEPT_VALID = 1,
  ULOC_ACCEPT_FALLBACK = 2
}
enum
Output values which uloc_acceptLanguage() writes to the 'outResult' parameter.
UBidiPairedBracketType{
  U_BPT_NONE,
  U_BPT_OPEN,
  U_BPT_CLOSE,
  U_BPT_COUNT
}
enum
Bidi Paired Bracket Type constants.
UBlockCode{
  UBLOCK_NO_BLOCK = 0,
  UBLOCK_BASIC_LATIN = 1,
  UBLOCK_LATIN_1_SUPPLEMENT =2,
  UBLOCK_LATIN_EXTENDED_A =3,
  UBLOCK_LATIN_EXTENDED_B =4,
  UBLOCK_IPA_EXTENSIONS =5,
  UBLOCK_SPACING_MODIFIER_LETTERS =6,
  UBLOCK_COMBINING_DIACRITICAL_MARKS =7,
  UBLOCK_GREEK =8,
  UBLOCK_CYRILLIC =9,
  UBLOCK_ARMENIAN =10,
  UBLOCK_HEBREW =11,
  UBLOCK_ARABIC =12,
  UBLOCK_SYRIAC =13,
  UBLOCK_THAANA =14,
  UBLOCK_DEVANAGARI =15,
  UBLOCK_BENGALI =16,
  UBLOCK_GURMUKHI =17,
  UBLOCK_GUJARATI =18,
  UBLOCK_ORIYA =19,
  UBLOCK_TAMIL =20,
  UBLOCK_TELUGU =21,
  UBLOCK_KANNADA =22,
  UBLOCK_MALAYALAM =23,
  UBLOCK_SINHALA =24,
  UBLOCK_THAI =25,
  UBLOCK_LAO =26,
  UBLOCK_TIBETAN =27,
  UBLOCK_MYANMAR =28,
  UBLOCK_GEORGIAN =29,
  UBLOCK_HANGUL_JAMO =30,
  UBLOCK_ETHIOPIC =31,
  UBLOCK_CHEROKEE =32,
  UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =33,
  UBLOCK_OGHAM =34,
  UBLOCK_RUNIC =35,
  UBLOCK_KHMER =36,
  UBLOCK_MONGOLIAN =37,
  UBLOCK_LATIN_EXTENDED_ADDITIONAL =38,
  UBLOCK_GREEK_EXTENDED =39,
  UBLOCK_GENERAL_PUNCTUATION =40,
  UBLOCK_SUPERSCRIPTS_AND_SUBSCRIPTS =41,
  UBLOCK_CURRENCY_SYMBOLS =42,
  UBLOCK_COMBINING_MARKS_FOR_SYMBOLS =43,
  UBLOCK_LETTERLIKE_SYMBOLS =44,
  UBLOCK_NUMBER_FORMS =45,
  UBLOCK_ARROWS =46,
  UBLOCK_MATHEMATICAL_OPERATORS =47,
  UBLOCK_MISCELLANEOUS_TECHNICAL =48,
  UBLOCK_CONTROL_PICTURES =49,
  UBLOCK_OPTICAL_CHARACTER_RECOGNITION =50,
  UBLOCK_ENCLOSED_ALPHANUMERICS =51,
  UBLOCK_BOX_DRAWING =52,
  UBLOCK_BLOCK_ELEMENTS =53,
  UBLOCK_GEOMETRIC_SHAPES =54,
  UBLOCK_MISCELLANEOUS_SYMBOLS =55,
  UBLOCK_DINGBATS =56,
  UBLOCK_BRAILLE_PATTERNS =57,
  UBLOCK_CJK_RADICALS_SUPPLEMENT =58,
  UBLOCK_KANGXI_RADICALS =59,
  UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS =60,
  UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION =61,
  UBLOCK_HIRAGANA =62,
  UBLOCK_KATAKANA =63,
  UBLOCK_BOPOMOFO =64,
  UBLOCK_HANGUL_COMPATIBILITY_JAMO =65,
  UBLOCK_KANBUN =66,
  UBLOCK_BOPOMOFO_EXTENDED =67,
  UBLOCK_ENCLOSED_CJK_LETTERS_AND_MONTHS =68,
  UBLOCK_CJK_COMPATIBILITY =69,
  UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =70,
  UBLOCK_CJK_UNIFIED_IDEOGRAPHS =71,
  UBLOCK_YI_SYLLABLES =72,
  UBLOCK_YI_RADICALS =73,
  UBLOCK_HANGUL_SYLLABLES =74,
  UBLOCK_HIGH_SURROGATES =75,
  UBLOCK_HIGH_PRIVATE_USE_SURROGATES =76,
  UBLOCK_LOW_SURROGATES =77,
  UBLOCK_PRIVATE_USE_AREA =78,
  UBLOCK_PRIVATE_USE = UBLOCK_PRIVATE_USE_AREA,
  UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS =79,
  UBLOCK_ALPHABETIC_PRESENTATION_FORMS =80,
  UBLOCK_ARABIC_PRESENTATION_FORMS_A =81,
  UBLOCK_COMBINING_HALF_MARKS =82,
  UBLOCK_CJK_COMPATIBILITY_FORMS =83,
  UBLOCK_SMALL_FORM_VARIANTS =84,
  UBLOCK_ARABIC_PRESENTATION_FORMS_B =85,
  UBLOCK_SPECIALS =86,
  UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS =87,
  UBLOCK_OLD_ITALIC = 88,
  UBLOCK_GOTHIC = 89,
  UBLOCK_DESERET = 90,
  UBLOCK_BYZANTINE_MUSICAL_SYMBOLS = 91,
  UBLOCK_MUSICAL_SYMBOLS = 92,
  UBLOCK_MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 93,
  UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 94,
  UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 95,
  UBLOCK_TAGS = 96,
  UBLOCK_CYRILLIC_SUPPLEMENT = 97,
  UBLOCK_CYRILLIC_SUPPLEMENTARY = UBLOCK_CYRILLIC_SUPPLEMENT,
  UBLOCK_TAGALOG = 98,
  UBLOCK_HANUNOO = 99,
  UBLOCK_BUHID = 100,
  UBLOCK_TAGBANWA = 101,
  UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = 102,
  UBLOCK_SUPPLEMENTAL_ARROWS_A = 103,
  UBLOCK_SUPPLEMENTAL_ARROWS_B = 104,
  UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = 105,
  UBLOCK_SUPPLEMENTAL_MATHEMATICAL_OPERATORS = 106,
  UBLOCK_KATAKANA_PHONETIC_EXTENSIONS = 107,
  UBLOCK_VARIATION_SELECTORS = 108,
  UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_A = 109,
  UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B = 110,
  UBLOCK_LIMBU = 111,
  UBLOCK_TAI_LE = 112,
  UBLOCK_KHMER_SYMBOLS = 113,
  UBLOCK_PHONETIC_EXTENSIONS = 114,
  UBLOCK_MISCELLANEOUS_SYMBOLS_AND_ARROWS = 115,
  UBLOCK_YIJING_HEXAGRAM_SYMBOLS = 116,
  UBLOCK_LINEAR_B_SYLLABARY = 117,
  UBLOCK_LINEAR_B_IDEOGRAMS = 118,
  UBLOCK_AEGEAN_NUMBERS = 119,
  UBLOCK_UGARITIC = 120,
  UBLOCK_SHAVIAN = 121,
  UBLOCK_OSMANYA = 122,
  UBLOCK_CYPRIOT_SYLLABARY = 123,
  UBLOCK_TAI_XUAN_JING_SYMBOLS = 124,
  UBLOCK_VARIATION_SELECTORS_SUPPLEMENT = 125,
  UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION = 126,
  UBLOCK_ANCIENT_GREEK_NUMBERS = 127,
  UBLOCK_ARABIC_SUPPLEMENT = 128,
  UBLOCK_BUGINESE = 129,
  UBLOCK_CJK_STROKES = 130,
  UBLOCK_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 131,
  UBLOCK_COPTIC = 132,
  UBLOCK_ETHIOPIC_EXTENDED = 133,
  UBLOCK_ETHIOPIC_SUPPLEMENT = 134,
  UBLOCK_GEORGIAN_SUPPLEMENT = 135,
  UBLOCK_GLAGOLITIC = 136,
  UBLOCK_KHAROSHTHI = 137,
  UBLOCK_MODIFIER_TONE_LETTERS = 138,
  UBLOCK_NEW_TAI_LUE = 139,
  UBLOCK_OLD_PERSIAN = 140,
  UBLOCK_PHONETIC_EXTENSIONS_SUPPLEMENT = 141,
  UBLOCK_SUPPLEMENTAL_PUNCTUATION = 142,
  UBLOCK_SYLOTI_NAGRI = 143,
  UBLOCK_TIFINAGH = 144,
  UBLOCK_VERTICAL_FORMS = 145,
  UBLOCK_NKO = 146,
  UBLOCK_BALINESE = 147,
  UBLOCK_LATIN_EXTENDED_C = 148,
  UBLOCK_LATIN_EXTENDED_D = 149,
  UBLOCK_PHAGS_PA = 150,
  UBLOCK_PHOENICIAN = 151,
  UBLOCK_CUNEIFORM = 152,
  UBLOCK_CUNEIFORM_NUMBERS_AND_PUNCTUATION = 153,
  UBLOCK_COUNTING_ROD_NUMERALS = 154,
  UBLOCK_SUNDANESE = 155,
  UBLOCK_LEPCHA = 156,
  UBLOCK_OL_CHIKI = 157,
  UBLOCK_CYRILLIC_EXTENDED_A = 158,
  UBLOCK_VAI = 159,
  UBLOCK_CYRILLIC_EXTENDED_B = 160,
  UBLOCK_SAURASHTRA = 161,
  UBLOCK_KAYAH_LI = 162,
  UBLOCK_REJANG = 163,
  UBLOCK_CHAM = 164,
  UBLOCK_ANCIENT_SYMBOLS = 165,
  UBLOCK_PHAISTOS_DISC = 166,
  UBLOCK_LYCIAN = 167,
  UBLOCK_CARIAN = 168,
  UBLOCK_LYDIAN = 169,
  UBLOCK_MAHJONG_TILES = 170,
  UBLOCK_DOMINO_TILES = 171,
  UBLOCK_SAMARITAN = 172,
  UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 173,
  UBLOCK_TAI_THAM = 174,
  UBLOCK_VEDIC_EXTENSIONS = 175,
  UBLOCK_LISU = 176,
  UBLOCK_BAMUM = 177,
  UBLOCK_COMMON_INDIC_NUMBER_FORMS = 178,
  UBLOCK_DEVANAGARI_EXTENDED = 179,
  UBLOCK_HANGUL_JAMO_EXTENDED_A = 180,
  UBLOCK_JAVANESE = 181,
  UBLOCK_MYANMAR_EXTENDED_A = 182,
  UBLOCK_TAI_VIET = 183,
  UBLOCK_MEETEI_MAYEK = 184,
  UBLOCK_HANGUL_JAMO_EXTENDED_B = 185,
  UBLOCK_IMPERIAL_ARAMAIC = 186,
  UBLOCK_OLD_SOUTH_ARABIAN = 187,
  UBLOCK_AVESTAN = 188,
  UBLOCK_INSCRIPTIONAL_PARTHIAN = 189,
  UBLOCK_INSCRIPTIONAL_PAHLAVI = 190,
  UBLOCK_OLD_TURKIC = 191,
  UBLOCK_RUMI_NUMERAL_SYMBOLS = 192,
  UBLOCK_KAITHI = 193,
  UBLOCK_EGYPTIAN_HIEROGLYPHS = 194,
  UBLOCK_ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 195,
  UBLOCK_ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 196,
  UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 197,
  UBLOCK_MANDAIC = 198,
  UBLOCK_BATAK = 199,
  UBLOCK_ETHIOPIC_EXTENDED_A = 200,
  UBLOCK_BRAHMI = 201,
  UBLOCK_BAMUM_SUPPLEMENT = 202,
  UBLOCK_KANA_SUPPLEMENT = 203,
  UBLOCK_PLAYING_CARDS = 204,
  UBLOCK_MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 205,
  UBLOCK_EMOTICONS = 206,
  UBLOCK_TRANSPORT_AND_MAP_SYMBOLS = 207,
  UBLOCK_ALCHEMICAL_SYMBOLS = 208,
  UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 209,
  UBLOCK_ARABIC_EXTENDED_A = 210,
  UBLOCK_ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = 211,
  UBLOCK_CHAKMA = 212,
  UBLOCK_MEETEI_MAYEK_EXTENSIONS = 213,
  UBLOCK_MEROITIC_CURSIVE = 214,
  UBLOCK_MEROITIC_HIEROGLYPHS = 215,
  UBLOCK_MIAO = 216,
  UBLOCK_SHARADA = 217,
  UBLOCK_SORA_SOMPENG = 218,
  UBLOCK_SUNDANESE_SUPPLEMENT = 219,
  UBLOCK_TAKRI = 220,
  UBLOCK_BASSA_VAH = 221,
  UBLOCK_CAUCASIAN_ALBANIAN = 222,
  UBLOCK_COPTIC_EPACT_NUMBERS = 223,
  UBLOCK_COMBINING_DIACRITICAL_MARKS_EXTENDED = 224,
  UBLOCK_DUPLOYAN = 225,
  UBLOCK_ELBASAN = 226,
  UBLOCK_GEOMETRIC_SHAPES_EXTENDED = 227,
  UBLOCK_GRANTHA = 228,
  UBLOCK_KHOJKI = 229,
  UBLOCK_KHUDAWADI = 230,
  UBLOCK_LATIN_EXTENDED_E = 231,
  UBLOCK_LINEAR_A = 232,
  UBLOCK_MAHAJANI = 233,
  UBLOCK_MANICHAEAN = 234,
  UBLOCK_MENDE_KIKAKUI = 235,
  UBLOCK_MODI = 236,
  UBLOCK_MRO = 237,
  UBLOCK_MYANMAR_EXTENDED_B = 238,
  UBLOCK_NABATAEAN = 239,
  UBLOCK_OLD_NORTH_ARABIAN = 240,
  UBLOCK_OLD_PERMIC = 241,
  UBLOCK_ORNAMENTAL_DINGBATS = 242,
  UBLOCK_PAHAWH_HMONG = 243,
  UBLOCK_PALMYRENE = 244,
  UBLOCK_PAU_CIN_HAU = 245,
  UBLOCK_PSALTER_PAHLAVI = 246,
  UBLOCK_SHORTHAND_FORMAT_CONTROLS = 247,
  UBLOCK_SIDDHAM = 248,
  UBLOCK_SINHALA_ARCHAIC_NUMBERS = 249,
  UBLOCK_SUPPLEMENTAL_ARROWS_C = 250,
  UBLOCK_TIRHUTA = 251,
  UBLOCK_WARANG_CITI = 252,
  UBLOCK_AHOM = 253,
  UBLOCK_ANATOLIAN_HIEROGLYPHS = 254,
  UBLOCK_CHEROKEE_SUPPLEMENT = 255,
  UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E = 256,
  UBLOCK_EARLY_DYNASTIC_CUNEIFORM = 257,
  UBLOCK_HATRAN = 258,
  UBLOCK_MULTANI = 259,
  UBLOCK_OLD_HUNGARIAN = 260,
  UBLOCK_SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS = 261,
  UBLOCK_SUTTON_SIGNWRITING = 262,
  UBLOCK_ADLAM = 263,
  UBLOCK_BHAIKSUKI = 264,
  UBLOCK_CYRILLIC_EXTENDED_C = 265,
  UBLOCK_GLAGOLITIC_SUPPLEMENT = 266,
  UBLOCK_IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION = 267,
  UBLOCK_MARCHEN = 268,
  UBLOCK_MONGOLIAN_SUPPLEMENT = 269,
  UBLOCK_NEWA = 270,
  UBLOCK_OSAGE = 271,
  UBLOCK_TANGUT = 272,
  UBLOCK_TANGUT_COMPONENTS = 273,
  UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F = 274,
  UBLOCK_KANA_EXTENDED_A = 275,
  UBLOCK_MASARAM_GONDI = 276,
  UBLOCK_NUSHU = 277,
  UBLOCK_SOYOMBO = 278,
  UBLOCK_SYRIAC_SUPPLEMENT = 279,
  UBLOCK_ZANABAZAR_SQUARE = 280,
  UBLOCK_CHESS_SYMBOLS = 281,
  UBLOCK_DOGRA = 282,
  UBLOCK_GEORGIAN_EXTENDED = 283,
  UBLOCK_GUNJALA_GONDI = 284,
  UBLOCK_HANIFI_ROHINGYA = 285,
  UBLOCK_INDIC_SIYAQ_NUMBERS = 286,
  UBLOCK_MAKASAR = 287,
  UBLOCK_MAYAN_NUMERALS = 288,
  UBLOCK_MEDEFAIDRIN = 289,
  UBLOCK_OLD_SOGDIAN = 290,
  UBLOCK_SOGDIAN = 291,
  UBLOCK_EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS = 292,
  UBLOCK_ELYMAIC = 293,
  UBLOCK_NANDINAGARI = 294,
  UBLOCK_NYIAKENG_PUACHUE_HMONG = 295,
  UBLOCK_OTTOMAN_SIYAQ_NUMBERS = 296,
  UBLOCK_SMALL_KANA_EXTENSION = 297,
  UBLOCK_SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A = 298,
  UBLOCK_TAMIL_SUPPLEMENT = 299,
  UBLOCK_WANCHO = 300,
  UBLOCK_CHORASMIAN = 301,
  UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G = 302,
  UBLOCK_DIVES_AKURU = 303,
  UBLOCK_KHITAN_SMALL_SCRIPT = 304,
  UBLOCK_LISU_SUPPLEMENT = 305,
  UBLOCK_SYMBOLS_FOR_LEGACY_COMPUTING = 306,
  UBLOCK_TANGUT_SUPPLEMENT = 307,
  UBLOCK_YEZIDI = 308,
  UBLOCK_ARABIC_EXTENDED_B = 309,
  UBLOCK_CYPRO_MINOAN = 310,
  UBLOCK_ETHIOPIC_EXTENDED_B = 311,
  UBLOCK_KANA_EXTENDED_B = 312,
  UBLOCK_LATIN_EXTENDED_F = 313,
  UBLOCK_LATIN_EXTENDED_G = 314,
  UBLOCK_OLD_UYGHUR = 315,
  UBLOCK_TANGSA = 316,
  UBLOCK_TOTO = 317,
  UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A = 318,
  UBLOCK_VITHKUQI = 319,
  UBLOCK_ZNAMENNY_MUSICAL_NOTATION = 320,
  UBLOCK_ARABIC_EXTENDED_C = 321,
  UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H = 322,
  UBLOCK_CYRILLIC_EXTENDED_D = 323,
  UBLOCK_DEVANAGARI_EXTENDED_A = 324,
  UBLOCK_KAKTOVIK_NUMERALS = 325,
  UBLOCK_KAWI = 326,
  UBLOCK_NAG_MUNDARI = 327,
  UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I = 328,
  UBLOCK_EGYPTIAN_HIEROGLYPHS_EXTENDED_A = 329,
  UBLOCK_GARAY = 330,
  UBLOCK_GURUNG_KHEMA = 331,
  UBLOCK_KIRAT_RAI = 332,
  UBLOCK_MYANMAR_EXTENDED_C = 333,
  UBLOCK_OL_ONAL = 334,
  UBLOCK_SUNUWAR = 335,
  UBLOCK_SYMBOLS_FOR_LEGACY_COMPUTING_SUPPLEMENT = 336,
  UBLOCK_TODHRI = 337,
  UBLOCK_TULU_TIGALARI = 338,
  UBLOCK_COUNT = 339,
  UBLOCK_INVALID_CODE =-1
}
enum
Constants for Unicode blocks, see the Unicode Data file Blocks.txt.
UBreakIteratorType{
  UBRK_CHARACTER = 0,
  UBRK_WORD = 1,
  UBRK_LINE = 2,
  UBRK_SENTENCE = 3,
  UBRK_TITLE = 4,
  UBRK_COUNT = 5
}
enum
The possible types of text boundaries.
UCPMapRangeOption{
  UCPMAP_RANGE_NORMAL,
  UCPMAP_RANGE_FIXED_LEAD_SURROGATES,
  UCPMAP_RANGE_FIXED_ALL_SURROGATES
}
enum
Selectors for how ucpmap_getRange() etc.
UCharCategory{
  U_UNASSIGNED = 0,
  U_GENERAL_OTHER_TYPES = 0,
  U_UPPERCASE_LETTER = 1,
  U_LOWERCASE_LETTER = 2,
  U_TITLECASE_LETTER = 3,
  U_MODIFIER_LETTER = 4,
  U_OTHER_LETTER = 5,
  U_NON_SPACING_MARK = 6,
  U_ENCLOSING_MARK = 7,
  U_COMBINING_SPACING_MARK = 8,
  U_DECIMAL_DIGIT_NUMBER = 9,
  U_LETTER_NUMBER = 10,
  U_OTHER_NUMBER = 11,
  U_SPACE_SEPARATOR = 12,
  U_LINE_SEPARATOR = 13,
  U_PARAGRAPH_SEPARATOR = 14,
  U_CONTROL_CHAR = 15,
  U_FORMAT_CHAR = 16,
  U_PRIVATE_USE_CHAR = 17,
  U_SURROGATE = 18,
  U_DASH_PUNCTUATION = 19,
  U_START_PUNCTUATION = 20,
  U_END_PUNCTUATION = 21,
  U_CONNECTOR_PUNCTUATION = 22,
  U_OTHER_PUNCTUATION = 23,
  U_MATH_SYMBOL = 24,
  U_CURRENCY_SYMBOL = 25,
  U_MODIFIER_SYMBOL = 26,
  U_OTHER_SYMBOL = 27,
  U_INITIAL_PUNCTUATION = 28,
  U_FINAL_PUNCTUATION = 29,
  U_CHAR_CATEGORY_COUNT
}
enum
Data for enumerated Unicode general category types.
UCharDirection{
  U_LEFT_TO_RIGHT = 0,
  U_RIGHT_TO_LEFT = 1,
  U_EUROPEAN_NUMBER = 2,
  U_EUROPEAN_NUMBER_SEPARATOR = 3,
  U_EUROPEAN_NUMBER_TERMINATOR = 4,
  U_ARABIC_NUMBER = 5,
  U_COMMON_NUMBER_SEPARATOR = 6,
  U_BLOCK_SEPARATOR = 7,
  U_SEGMENT_SEPARATOR = 8,
  U_WHITE_SPACE_NEUTRAL = 9,
  U_OTHER_NEUTRAL = 10,
  U_LEFT_TO_RIGHT_EMBEDDING = 11,
  U_LEFT_TO_RIGHT_OVERRIDE = 12,
  U_RIGHT_TO_LEFT_ARABIC = 13,
  U_RIGHT_TO_LEFT_EMBEDDING = 14,
  U_RIGHT_TO_LEFT_OVERRIDE = 15,
  U_POP_DIRECTIONAL_FORMAT = 16,
  U_DIR_NON_SPACING_MARK = 17,
  U_BOUNDARY_NEUTRAL = 18,
  U_FIRST_STRONG_ISOLATE = 19,
  U_LEFT_TO_RIGHT_ISOLATE = 20,
  U_RIGHT_TO_LEFT_ISOLATE = 21,
  U_POP_DIRECTIONAL_ISOLATE = 22,
  U_CHAR_DIRECTION_COUNT
}
enum
This specifies the language directional property of a character set.
UCharNameChoice{
  U_UNICODE_CHAR_NAME,
  U_UNICODE_10_CHAR_NAME,
  U_EXTENDED_CHAR_NAME = U_UNICODE_CHAR_NAME+2,
  U_CHAR_NAME_ALIAS,
  U_CHAR_NAME_CHOICE_COUNT
}
enum
Selector constants for u_charName().
UColAttribute{
  UCOL_FRENCH_COLLATION,
  UCOL_ALTERNATE_HANDLING,
  UCOL_CASE_FIRST,
  UCOL_CASE_LEVEL,
  UCOL_NORMALIZATION_MODE,
  UCOL_DECOMPOSITION_MODE = UCOL_NORMALIZATION_MODE,
  UCOL_STRENGTH,
  UCOL_HIRAGANA_QUATERNARY_MODE = UCOL_STRENGTH + 1,
  UCOL_NUMERIC_COLLATION = UCOL_STRENGTH + 2,
  UCOL_ATTRIBUTE_COUNT
}
enum
Attributes that collation service understands.
UColAttributeValue{
  UCOL_DEFAULT = -1,
  UCOL_PRIMARY = 0,
  UCOL_SECONDARY = 1,
  UCOL_TERTIARY = 2,
  UCOL_DEFAULT_STRENGTH = UCOL_TERTIARY,
  UCOL_CE_STRENGTH_LIMIT,
  UCOL_QUATERNARY =3,
  UCOL_IDENTICAL =15,
  UCOL_STRENGTH_LIMIT,
  UCOL_OFF = 16,
  UCOL_ON = 17,
  UCOL_SHIFTED = 20,
  UCOL_NON_IGNORABLE = 21,
  UCOL_LOWER_FIRST = 24,
  UCOL_UPPER_FIRST = 25,
  UCOL_ATTRIBUTE_VALUE_COUNT
}
enum
Enum containing attribute values for controlling collation behavior.
UColBoundMode{
  UCOL_BOUND_LOWER = 0,
  UCOL_BOUND_UPPER = 1,
  UCOL_BOUND_UPPER_LONG = 2,
  UCOL_BOUND_VALUE_COUNT
}
enum
enum that is taken by ucol_getBound API See below for explanation do not change the values assigned to the members of this enum.
UColReorderCode{
  UCOL_REORDER_CODE_DEFAULT = -1,
  UCOL_REORDER_CODE_NONE = USCRIPT_UNKNOWN,
  UCOL_REORDER_CODE_OTHERS = USCRIPT_UNKNOWN,
  UCOL_REORDER_CODE_SPACE = 0x1000,
  UCOL_REORDER_CODE_FIRST = UCOL_REORDER_CODE_SPACE,
  UCOL_REORDER_CODE_PUNCTUATION = 0x1001,
  UCOL_REORDER_CODE_SYMBOL = 0x1002,
  UCOL_REORDER_CODE_CURRENCY = 0x1003,
  UCOL_REORDER_CODE_DIGIT = 0x1004,
  UCOL_REORDER_CODE_LIMIT = 0x1005
}
enum
Enum containing the codes for reordering segments of the collation table that are not script codes.
UCollationResult{
  UCOL_EQUAL = 0,
  UCOL_GREATER = 1,
  UCOL_LESS = -1
}
enum
UCOL_LESS is returned if source string is compared to be less than target string in the ucol_strcoll() method.
UDecompositionType{
  U_DT_NONE,
  U_DT_CANONICAL,
  U_DT_COMPAT,
  U_DT_CIRCLE,
  U_DT_FINAL,
  U_DT_FONT,
  U_DT_FRACTION,
  U_DT_INITIAL,
  U_DT_ISOLATED,
  U_DT_MEDIAL,
  U_DT_NARROW,
  U_DT_NOBREAK,
  U_DT_SMALL,
  U_DT_SQUARE,
  U_DT_SUB,
  U_DT_SUPER,
  U_DT_VERTICAL,
  U_DT_WIDE,
  U_DT_COUNT
}
enum
Decomposition Type constants.
UDialectHandling{
  ULDN_STANDARD_NAMES = 0,
  ULDN_DIALECT_NAMES
}
enum
Enum used in LocaleDisplayNames::createInstance.
UDisplayContext{
  UDISPCTX_STANDARD_NAMES = (UDISPCTX_TYPE_DIALECT_HANDLING<<8) + 0,
  UDISPCTX_DIALECT_NAMES = (UDISPCTX_TYPE_DIALECT_HANDLING<<8) + 1,
  UDISPCTX_CAPITALIZATION_NONE = (UDISPCTX_TYPE_CAPITALIZATION<<8) + 0,
  UDISPCTX_CAPITALIZATION_FOR_MIDDLE_OF_SENTENCE = (UDISPCTX_TYPE_CAPITALIZATION<<8) + 1,
  UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE = (UDISPCTX_TYPE_CAPITALIZATION<<8) + 2,
  UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU = (UDISPCTX_TYPE_CAPITALIZATION<<8) + 3,
  UDISPCTX_CAPITALIZATION_FOR_STANDALONE = (UDISPCTX_TYPE_CAPITALIZATION<<8) + 4,
  UDISPCTX_LENGTH_FULL = (UDISPCTX_TYPE_DISPLAY_LENGTH<<8) + 0,
  UDISPCTX_LENGTH_SHORT = (UDISPCTX_TYPE_DISPLAY_LENGTH<<8) + 1,
  UDISPCTX_SUBSTITUTE = (UDISPCTX_TYPE_SUBSTITUTE_HANDLING<<8) + 0,
  UDISPCTX_NO_SUBSTITUTE = (UDISPCTX_TYPE_SUBSTITUTE_HANDLING<<8) + 1
}
enum
Display context settings.
UDisplayContextType{
  UDISPCTX_TYPE_DIALECT_HANDLING = 0,
  UDISPCTX_TYPE_CAPITALIZATION = 1,
  UDISPCTX_TYPE_DISPLAY_LENGTH = 2,
  UDISPCTX_TYPE_SUBSTITUTE_HANDLING = 3
}
enum
Display context types, for getting values of a particular setting.
UEastAsianWidth{
  U_EA_NEUTRAL,
  U_EA_AMBIGUOUS,
  U_EA_HALFWIDTH,
  U_EA_FULLWIDTH,
  U_EA_NARROW,
  U_EA_WIDE,
  U_EA_COUNT
}
enum
East Asian Width constants.
UErrorCode{
  U_USING_FALLBACK_WARNING = -128,
  U_ERROR_WARNING_START = -128,
  U_USING_DEFAULT_WARNING = -127,
  U_SAFECLONE_ALLOCATED_WARNING = -126,
  U_STATE_OLD_WARNING = -125,
  U_STRING_NOT_TERMINATED_WARNING = -124,
  U_SORT_KEY_TOO_SHORT_WARNING = -123,
  U_AMBIGUOUS_ALIAS_WARNING = -122,
  U_DIFFERENT_UCA_VERSION = -121,
  U_PLUGIN_CHANGED_LEVEL_WARNING = -120,
  U_ERROR_WARNING_LIMIT,
  U_ZERO_ERROR = 0,
  U_ILLEGAL_ARGUMENT_ERROR = 1,
  U_MISSING_RESOURCE_ERROR = 2,
  U_INVALID_FORMAT_ERROR = 3,
  U_FILE_ACCESS_ERROR = 4,
  U_INTERNAL_PROGRAM_ERROR = 5,
  U_MESSAGE_PARSE_ERROR = 6,
  U_MEMORY_ALLOCATION_ERROR = 7,
  U_INDEX_OUTOFBOUNDS_ERROR = 8,
  U_PARSE_ERROR = 9,
  U_INVALID_CHAR_FOUND = 10,
  U_TRUNCATED_CHAR_FOUND = 11,
  U_ILLEGAL_CHAR_FOUND = 12,
  U_INVALID_TABLE_FORMAT = 13,
  U_INVALID_TABLE_FILE = 14,
  U_BUFFER_OVERFLOW_ERROR = 15,
  U_UNSUPPORTED_ERROR = 16,
  U_RESOURCE_TYPE_MISMATCH = 17,
  U_ILLEGAL_ESCAPE_SEQUENCE = 18,
  U_UNSUPPORTED_ESCAPE_SEQUENCE = 19,
  U_NO_SPACE_AVAILABLE = 20,
  U_CE_NOT_FOUND_ERROR = 21,
  U_PRIMARY_TOO_LONG_ERROR = 22,
  U_STATE_TOO_OLD_ERROR = 23,
  U_TOO_MANY_ALIASES_ERROR = 24,
  U_ENUM_OUT_OF_SYNC_ERROR = 25,
  U_INVARIANT_CONVERSION_ERROR = 26,
  U_INVALID_STATE_ERROR = 27,
  U_COLLATOR_VERSION_MISMATCH = 28,
  U_USELESS_COLLATOR_ERROR = 29,
  U_NO_WRITE_PERMISSION = 30,
  U_INPUT_TOO_LONG_ERROR = 31,
  U_STANDARD_ERROR_LIMIT = 32,
  U_BAD_VARIABLE_DEFINITION =0x10000,
  U_PARSE_ERROR_START = 0x10000,
  U_MALFORMED_RULE,
  U_MALFORMED_SET,
  U_MALFORMED_SYMBOL_REFERENCE,
  U_MALFORMED_UNICODE_ESCAPE,
  U_MALFORMED_VARIABLE_DEFINITION,
  U_MALFORMED_VARIABLE_REFERENCE,
  U_MISMATCHED_SEGMENT_DELIMITERS,
  U_MISPLACED_ANCHOR_START,
  U_MISPLACED_CURSOR_OFFSET,
  U_MISPLACED_QUANTIFIER,
  U_MISSING_OPERATOR,
  U_MISSING_SEGMENT_CLOSE,
  U_MULTIPLE_ANTE_CONTEXTS,
  U_MULTIPLE_CURSORS,
  U_MULTIPLE_POST_CONTEXTS,
  U_TRAILING_BACKSLASH,
  U_UNDEFINED_SEGMENT_REFERENCE,
  U_UNDEFINED_VARIABLE,
  U_UNQUOTED_SPECIAL,
  U_UNTERMINATED_QUOTE,
  U_RULE_MASK_ERROR,
  U_MISPLACED_COMPOUND_FILTER,
  U_MULTIPLE_COMPOUND_FILTERS,
  U_INVALID_RBT_SYNTAX,
  U_INVALID_PROPERTY_PATTERN,
  U_MALFORMED_PRAGMA,
  U_UNCLOSED_SEGMENT,
  U_ILLEGAL_CHAR_IN_SEGMENT,
  U_VARIABLE_RANGE_EXHAUSTED,
  U_VARIABLE_RANGE_OVERLAP,
  U_ILLEGAL_CHARACTER,
  U_INTERNAL_TRANSLITERATOR_ERROR,
  U_INVALID_ID,
  U_INVALID_FUNCTION,
  U_PARSE_ERROR_LIMIT,
  U_UNEXPECTED_TOKEN =0x10100,
  U_FMT_PARSE_ERROR_START =0x10100,
  U_MULTIPLE_DECIMAL_SEPARATORS,
  U_MULTIPLE_DECIMAL_SEPERATORS = U_MULTIPLE_DECIMAL_SEPARATORS,
  U_MULTIPLE_EXPONENTIAL_SYMBOLS,
  U_MALFORMED_EXPONENTIAL_PATTERN,
  U_MULTIPLE_PERCENT_SYMBOLS,
  U_MULTIPLE_PERMILL_SYMBOLS,
  U_MULTIPLE_PAD_SPECIFIERS,
  U_PATTERN_SYNTAX_ERROR,
  U_ILLEGAL_PAD_POSITION,
  U_UNMATCHED_BRACES,
  U_UNSUPPORTED_PROPERTY,
  U_UNSUPPORTED_ATTRIBUTE,
  U_ARGUMENT_TYPE_MISMATCH,
  U_DUPLICATE_KEYWORD,
  U_UNDEFINED_KEYWORD,
  U_DEFAULT_KEYWORD_MISSING,
  U_DECIMAL_NUMBER_SYNTAX_ERROR,
  U_FORMAT_INEXACT_ERROR,
  U_NUMBER_ARG_OUTOFBOUNDS_ERROR,
  U_NUMBER_SKELETON_SYNTAX_ERROR,
  U_MF_UNRESOLVED_VARIABLE_ERROR,
  U_MF_SYNTAX_ERROR,
  U_MF_UNKNOWN_FUNCTION_ERROR,
  U_MF_VARIANT_KEY_MISMATCH_ERROR,
  U_MF_FORMATTING_ERROR,
  U_MF_NONEXHAUSTIVE_PATTERN_ERROR,
  U_MF_DUPLICATE_OPTION_NAME_ERROR,
  U_MF_SELECTOR_ERROR,
  U_MF_MISSING_SELECTOR_ANNOTATION_ERROR,
  U_MF_DUPLICATE_DECLARATION_ERROR,
  U_MF_OPERAND_MISMATCH_ERROR,
  U_MF_DUPLICATE_VARIANT_ERROR,
  U_FMT_PARSE_ERROR_LIMIT = 0x10120,
  U_BRK_INTERNAL_ERROR =0x10200,
  U_BRK_ERROR_START =0x10200,
  U_BRK_HEX_DIGITS_EXPECTED,
  U_BRK_SEMICOLON_EXPECTED,
  U_BRK_RULE_SYNTAX,
  U_BRK_UNCLOSED_SET,
  U_BRK_ASSIGN_ERROR,
  U_BRK_VARIABLE_REDFINITION,
  U_BRK_MISMATCHED_PAREN,
  U_BRK_NEW_LINE_IN_QUOTED_STRING,
  U_BRK_UNDEFINED_VARIABLE,
  U_BRK_INIT_ERROR,
  U_BRK_RULE_EMPTY_SET,
  U_BRK_UNRECOGNIZED_OPTION,
  U_BRK_MALFORMED_RULE_TAG,
  U_BRK_ERROR_LIMIT,
  U_REGEX_INTERNAL_ERROR =0x10300,
  U_REGEX_ERROR_START =0x10300,
  U_REGEX_RULE_SYNTAX,
  U_REGEX_INVALID_STATE,
  U_REGEX_BAD_ESCAPE_SEQUENCE,
  U_REGEX_PROPERTY_SYNTAX,
  U_REGEX_UNIMPLEMENTED,
  U_REGEX_MISMATCHED_PAREN,
  U_REGEX_NUMBER_TOO_BIG,
  U_REGEX_BAD_INTERVAL,
  U_REGEX_MAX_LT_MIN,
  U_REGEX_INVALID_BACK_REF,
  U_REGEX_INVALID_FLAG,
  U_REGEX_LOOK_BEHIND_LIMIT,
  U_REGEX_SET_CONTAINS_STRING,
  U_REGEX_OCTAL_TOO_BIG,
  U_REGEX_MISSING_CLOSE_BRACKET =U_REGEX_SET_CONTAINS_STRING+2,
  U_REGEX_INVALID_RANGE,
  U_REGEX_STACK_OVERFLOW,
  U_REGEX_TIME_OUT,
  U_REGEX_STOPPED_BY_CALLER,
  U_REGEX_PATTERN_TOO_BIG,
  U_REGEX_INVALID_CAPTURE_GROUP_NAME,
  U_REGEX_ERROR_LIMIT =U_REGEX_STOPPED_BY_CALLER+3,
  U_IDNA_PROHIBITED_ERROR =0x10400,
  U_IDNA_ERROR_START =0x10400,
  U_IDNA_UNASSIGNED_ERROR,
  U_IDNA_CHECK_BIDI_ERROR,
  U_IDNA_STD3_ASCII_RULES_ERROR,
  U_IDNA_ACE_PREFIX_ERROR,
  U_IDNA_VERIFICATION_ERROR,
  U_IDNA_LABEL_TOO_LONG_ERROR,
  U_IDNA_ZERO_LENGTH_LABEL_ERROR,
  U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR,
  U_IDNA_ERROR_LIMIT,
  U_STRINGPREP_PROHIBITED_ERROR = U_IDNA_PROHIBITED_ERROR,
  U_STRINGPREP_UNASSIGNED_ERROR = U_IDNA_UNASSIGNED_ERROR,
  U_STRINGPREP_CHECK_BIDI_ERROR = U_IDNA_CHECK_BIDI_ERROR,
  U_PLUGIN_ERROR_START =0x10500,
  U_PLUGIN_TOO_HIGH =0x10500,
  U_PLUGIN_DIDNT_SET_LEVEL,
  U_PLUGIN_ERROR_LIMIT,
  U_ERROR_LIMIT =U_PLUGIN_ERROR_LIMIT
}
enum
Standard ICU4C error code type, a substitute for exceptions.
UGraphemeClusterBreak{
  U_GCB_OTHER = 0,
  U_GCB_CONTROL = 1,
  U_GCB_CR = 2,
  U_GCB_EXTEND = 3,
  U_GCB_L = 4,
  U_GCB_LF = 5,
  U_GCB_LV = 6,
  U_GCB_LVT = 7,
  U_GCB_T = 8,
  U_GCB_V = 9,
  U_GCB_SPACING_MARK = 10,
  U_GCB_PREPEND = 11,
  U_GCB_REGIONAL_INDICATOR = 12,
  U_GCB_E_BASE = 13,
  U_GCB_E_BASE_GAZ = 14,
  U_GCB_E_MODIFIER = 15,
  U_GCB_GLUE_AFTER_ZWJ = 16,
  U_GCB_ZWJ = 17,
  U_GCB_COUNT = 18
}
enum
Grapheme Cluster Break constants.
UHangulSyllableType{
  U_HST_NOT_APPLICABLE,
  U_HST_LEADING_JAMO,
  U_HST_VOWEL_JAMO,
  U_HST_TRAILING_JAMO,
  U_HST_LV_SYLLABLE,
  U_HST_LVT_SYLLABLE,
  U_HST_COUNT
}
enum
Hangul Syllable Type constants.
UIdentifierStatus{
  U_ID_STATUS_RESTRICTED,
  U_ID_STATUS_ALLOWED
}
enum
Identifier Status constants.
UIdentifierType{
  U_ID_TYPE_NOT_CHARACTER,
  U_ID_TYPE_DEPRECATED,
  U_ID_TYPE_DEFAULT_IGNORABLE,
  U_ID_TYPE_NOT_NFKC,
  U_ID_TYPE_NOT_XID,
  U_ID_TYPE_EXCLUSION,
  U_ID_TYPE_OBSOLETE,
  U_ID_TYPE_TECHNICAL,
  U_ID_TYPE_UNCOMMON_USE,
  U_ID_TYPE_LIMITED_USE,
  U_ID_TYPE_INCLUSION,
  U_ID_TYPE_RECOMMENDED
}
enum
Identifier Type constants.
UIndicConjunctBreak{
  U_INCB_NONE,
  U_INCB_CONSONANT,
  U_INCB_EXTEND,
  U_INCB_LINKER
}
enum
Indic Conjunct Break constants.
UIndicPositionalCategory{
  U_INPC_NA,
  U_INPC_BOTTOM,
  U_INPC_BOTTOM_AND_LEFT,
  U_INPC_BOTTOM_AND_RIGHT,
  U_INPC_LEFT,
  U_INPC_LEFT_AND_RIGHT,
  U_INPC_OVERSTRUCK,
  U_INPC_RIGHT,
  U_INPC_TOP,
  U_INPC_TOP_AND_BOTTOM,
  U_INPC_TOP_AND_BOTTOM_AND_RIGHT,
  U_INPC_TOP_AND_LEFT,
  U_INPC_TOP_AND_LEFT_AND_RIGHT,
  U_INPC_TOP_AND_RIGHT,
  U_INPC_VISUAL_ORDER_LEFT,
  U_INPC_TOP_AND_BOTTOM_AND_LEFT
}
enum
Indic Positional Category constants.
UIndicSyllabicCategory{
  U_INSC_OTHER,
  U_INSC_AVAGRAHA,
  U_INSC_BINDU,
  U_INSC_BRAHMI_JOINING_NUMBER,
  U_INSC_CANTILLATION_MARK,
  U_INSC_CONSONANT,
  U_INSC_CONSONANT_DEAD,
  U_INSC_CONSONANT_FINAL,
  U_INSC_CONSONANT_HEAD_LETTER,
  U_INSC_CONSONANT_INITIAL_POSTFIXED,
  U_INSC_CONSONANT_KILLER,
  U_INSC_CONSONANT_MEDIAL,
  U_INSC_CONSONANT_PLACEHOLDER,
  U_INSC_CONSONANT_PRECEDING_REPHA,
  U_INSC_CONSONANT_PREFIXED,
  U_INSC_CONSONANT_SUBJOINED,
  U_INSC_CONSONANT_SUCCEEDING_REPHA,
  U_INSC_CONSONANT_WITH_STACKER,
  U_INSC_GEMINATION_MARK,
  U_INSC_INVISIBLE_STACKER,
  U_INSC_JOINER,
  U_INSC_MODIFYING_LETTER,
  U_INSC_NON_JOINER,
  U_INSC_NUKTA,
  U_INSC_NUMBER,
  U_INSC_NUMBER_JOINER,
  U_INSC_PURE_KILLER,
  U_INSC_REGISTER_SHIFTER,
  U_INSC_SYLLABLE_MODIFIER,
  U_INSC_TONE_LETTER,
  U_INSC_TONE_MARK,
  U_INSC_VIRAMA,
  U_INSC_VISARGA,
  U_INSC_VOWEL,
  U_INSC_VOWEL_DEPENDENT,
  U_INSC_VOWEL_INDEPENDENT,
  U_INSC_REORDERING_KILLER
}
enum
Indic Syllabic Category constants.
UJoiningGroup{
  U_JG_NO_JOINING_GROUP,
  U_JG_AIN,
  U_JG_ALAPH,
  U_JG_ALEF,
  U_JG_BEH,
  U_JG_BETH,
  U_JG_DAL,
  U_JG_DALATH_RISH,
  U_JG_E,
  U_JG_FEH,
  U_JG_FINAL_SEMKATH,
  U_JG_GAF,
  U_JG_GAMAL,
  U_JG_HAH,
  U_JG_TEH_MARBUTA_GOAL,
  U_JG_HAMZA_ON_HEH_GOAL =U_JG_TEH_MARBUTA_GOAL,
  U_JG_HE,
  U_JG_HEH,
  U_JG_HEH_GOAL,
  U_JG_HETH,
  U_JG_KAF,
  U_JG_KAPH,
  U_JG_KNOTTED_HEH,
  U_JG_LAM,
  U_JG_LAMADH,
  U_JG_MEEM,
  U_JG_MIM,
  U_JG_NOON,
  U_JG_NUN,
  U_JG_PE,
  U_JG_QAF,
  U_JG_QAPH,
  U_JG_REH,
  U_JG_REVERSED_PE,
  U_JG_SAD,
  U_JG_SADHE,
  U_JG_SEEN,
  U_JG_SEMKATH,
  U_JG_SHIN,
  U_JG_SWASH_KAF,
  U_JG_SYRIAC_WAW,
  U_JG_TAH,
  U_JG_TAW,
  U_JG_TEH_MARBUTA,
  U_JG_TETH,
  U_JG_WAW,
  U_JG_YEH,
  U_JG_YEH_BARREE,
  U_JG_YEH_WITH_TAIL,
  U_JG_YUDH,
  U_JG_YUDH_HE,
  U_JG_ZAIN,
  U_JG_FE,
  U_JG_KHAPH,
  U_JG_ZHAIN,
  U_JG_BURUSHASKI_YEH_BARREE,
  U_JG_FARSI_YEH,
  U_JG_NYA,
  U_JG_ROHINGYA_YEH,
  U_JG_MANICHAEAN_ALEPH,
  U_JG_MANICHAEAN_AYIN,
  U_JG_MANICHAEAN_BETH,
  U_JG_MANICHAEAN_DALETH,
  U_JG_MANICHAEAN_DHAMEDH,
  U_JG_MANICHAEAN_FIVE,
  U_JG_MANICHAEAN_GIMEL,
  U_JG_MANICHAEAN_HETH,
  U_JG_MANICHAEAN_HUNDRED,
  U_JG_MANICHAEAN_KAPH,
  U_JG_MANICHAEAN_LAMEDH,
  U_JG_MANICHAEAN_MEM,
  U_JG_MANICHAEAN_NUN,
  U_JG_MANICHAEAN_ONE,
  U_JG_MANICHAEAN_PE,
  U_JG_MANICHAEAN_QOPH,
  U_JG_MANICHAEAN_RESH,
  U_JG_MANICHAEAN_SADHE,
  U_JG_MANICHAEAN_SAMEKH,
  U_JG_MANICHAEAN_TAW,
  U_JG_MANICHAEAN_TEN,
  U_JG_MANICHAEAN_TETH,
  U_JG_MANICHAEAN_THAMEDH,
  U_JG_MANICHAEAN_TWENTY,
  U_JG_MANICHAEAN_WAW,
  U_JG_MANICHAEAN_YODH,
  U_JG_MANICHAEAN_ZAYIN,
  U_JG_STRAIGHT_WAW,
  U_JG_AFRICAN_FEH,
  U_JG_AFRICAN_NOON,
  U_JG_AFRICAN_QAF,
  U_JG_MALAYALAM_BHA,
  U_JG_MALAYALAM_JA,
  U_JG_MALAYALAM_LLA,
  U_JG_MALAYALAM_LLLA,
  U_JG_MALAYALAM_NGA,
  U_JG_MALAYALAM_NNA,
  U_JG_MALAYALAM_NNNA,
  U_JG_MALAYALAM_NYA,
  U_JG_MALAYALAM_RA,
  U_JG_MALAYALAM_SSA,
  U_JG_MALAYALAM_TTA,
  U_JG_HANIFI_ROHINGYA_KINNA_YA,
  U_JG_HANIFI_ROHINGYA_PA,
  U_JG_THIN_YEH,
  U_JG_VERTICAL_TAIL,
  U_JG_KASHMIRI_YEH,
  U_JG_COUNT
}
enum
Joining Group constants.
UJoiningType{
  U_JT_NON_JOINING,
  U_JT_JOIN_CAUSING,
  U_JT_DUAL_JOINING,
  U_JT_LEFT_JOINING,
  U_JT_RIGHT_JOINING,
  U_JT_TRANSPARENT,
  U_JT_COUNT
}
enum
Joining Type constants.
ULayoutType{
  ULOC_LAYOUT_LTR = 0,
  ULOC_LAYOUT_RTL = 1,
  ULOC_LAYOUT_TTB = 2,
  ULOC_LAYOUT_BTT = 3,
  ULOC_LAYOUT_UNKNOWN
}
enum
enums for the return value for the character and line orientation functions.
ULineBreak{
  U_LB_UNKNOWN = 0,
  U_LB_AMBIGUOUS = 1,
  U_LB_ALPHABETIC = 2,
  U_LB_BREAK_BOTH = 3,
  U_LB_BREAK_AFTER = 4,
  U_LB_BREAK_BEFORE = 5,
  U_LB_MANDATORY_BREAK = 6,
  U_LB_CONTINGENT_BREAK = 7,
  U_LB_CLOSE_PUNCTUATION = 8,
  U_LB_COMBINING_MARK = 9,
  U_LB_CARRIAGE_RETURN = 10,
  U_LB_EXCLAMATION = 11,
  U_LB_GLUE = 12,
  U_LB_HYPHEN = 13,
  U_LB_IDEOGRAPHIC = 14,
  U_LB_INSEPARABLE = 15,
  U_LB_INSEPERABLE = U_LB_INSEPARABLE,
  U_LB_INFIX_NUMERIC = 16,
  U_LB_LINE_FEED = 17,
  U_LB_NONSTARTER = 18,
  U_LB_NUMERIC = 19,
  U_LB_OPEN_PUNCTUATION = 20,
  U_LB_POSTFIX_NUMERIC = 21,
  U_LB_PREFIX_NUMERIC = 22,
  U_LB_QUOTATION = 23,
  U_LB_COMPLEX_CONTEXT = 24,
  U_LB_SURROGATE = 25,
  U_LB_SPACE = 26,
  U_LB_BREAK_SYMBOLS = 27,
  U_LB_ZWSPACE = 28,
  U_LB_NEXT_LINE = 29,
  U_LB_WORD_JOINER = 30,
  U_LB_H2 = 31,
  U_LB_H3 = 32,
  U_LB_JL = 33,
  U_LB_JT = 34,
  U_LB_JV = 35,
  U_LB_CLOSE_PARENTHESIS = 36,
  U_LB_CONDITIONAL_JAPANESE_STARTER = 37,
  U_LB_HEBREW_LETTER = 38,
  U_LB_REGIONAL_INDICATOR = 39,
  U_LB_E_BASE = 40,
  U_LB_E_MODIFIER = 41,
  U_LB_ZWJ = 42,
  U_LB_AKSARA = 43,
  U_LB_AKSARA_PREBASE = 44,
  U_LB_AKSARA_START = 45,
  U_LB_VIRAMA_FINAL = 46,
  U_LB_VIRAMA = 47,
  U_LB_COUNT = 48
}
enum
Line Break constants.
ULineBreakTag{
  UBRK_LINE_SOFT = 0,
  UBRK_LINE_SOFT_LIMIT = 100,
  UBRK_LINE_HARD = 100,
  UBRK_LINE_HARD_LIMIT = 200
}
enum
Enum constants for the line break tags returned by getRuleStatus().
ULocAvailableType{
  ULOC_AVAILABLE_DEFAULT,
  ULOC_AVAILABLE_ONLY_LEGACY_ALIASES,
  ULOC_AVAILABLE_WITH_LEGACY_ALIASES,
  ULOC_AVAILABLE_COUNT
}
enum
Types for uloc_getAvailableByType and uloc_countAvailableByType.
ULocDataLocaleType{
  ULOC_ACTUAL_LOCALE = 0,
  ULOC_VALID_LOCALE = 1,
  ULOC_REQUESTED_LOCALE = 2,
  ULOC_DATA_LOCALE_TYPE_LIMIT = 3
}
enum
Constants for *_getLocale() Allow user to select whether she wants information on requested, valid or actual locale.
UNormalization2Mode{
  UNORM2_COMPOSE,
  UNORM2_DECOMPOSE,
  UNORM2_FCD,
  UNORM2_COMPOSE_CONTIGUOUS
}
enum
Constants for normalization modes.
UNormalizationCheckResult{
  UNORM_NO,
  UNORM_YES,
  UNORM_MAYBE
}
enum
Result values for normalization quick check functions.
UNumericType{
  U_NT_NONE,
  U_NT_DECIMAL,
  U_NT_DIGIT,
  U_NT_NUMERIC,
  U_NT_COUNT
}
enum
Numeric Type constants.
UProperty{
  UCHAR_ALPHABETIC =0,
  UCHAR_BINARY_START =UCHAR_ALPHABETIC,
  UCHAR_ASCII_HEX_DIGIT =1,
  UCHAR_BIDI_CONTROL =2,
  UCHAR_BIDI_MIRRORED =3,
  UCHAR_DASH =4,
  UCHAR_DEFAULT_IGNORABLE_CODE_POINT =5,
  UCHAR_DEPRECATED =6,
  UCHAR_DIACRITIC =7,
  UCHAR_EXTENDER =8,
  UCHAR_FULL_COMPOSITION_EXCLUSION =9,
  UCHAR_GRAPHEME_BASE =10,
  UCHAR_GRAPHEME_EXTEND =11,
  UCHAR_GRAPHEME_LINK =12,
  UCHAR_HEX_DIGIT =13,
  UCHAR_HYPHEN =14,
  UCHAR_ID_CONTINUE =15,
  UCHAR_ID_START =16,
  UCHAR_IDEOGRAPHIC =17,
  UCHAR_IDS_BINARY_OPERATOR =18,
  UCHAR_IDS_TRINARY_OPERATOR =19,
  UCHAR_JOIN_CONTROL =20,
  UCHAR_LOGICAL_ORDER_EXCEPTION =21,
  UCHAR_LOWERCASE =22,
  UCHAR_MATH =23,
  UCHAR_NONCHARACTER_CODE_POINT =24,
  UCHAR_QUOTATION_MARK =25,
  UCHAR_RADICAL =26,
  UCHAR_SOFT_DOTTED =27,
  UCHAR_TERMINAL_PUNCTUATION =28,
  UCHAR_UNIFIED_IDEOGRAPH =29,
  UCHAR_UPPERCASE =30,
  UCHAR_WHITE_SPACE =31,
  UCHAR_XID_CONTINUE =32,
  UCHAR_XID_START =33,
  UCHAR_CASE_SENSITIVE =34,
  UCHAR_S_TERM =35,
  UCHAR_VARIATION_SELECTOR =36,
  UCHAR_NFD_INERT =37,
  UCHAR_NFKD_INERT =38,
  UCHAR_NFC_INERT =39,
  UCHAR_NFKC_INERT =40,
  UCHAR_SEGMENT_STARTER =41,
  UCHAR_PATTERN_SYNTAX =42,
  UCHAR_PATTERN_WHITE_SPACE =43,
  UCHAR_POSIX_ALNUM =44,
  UCHAR_POSIX_BLANK =45,
  UCHAR_POSIX_GRAPH =46,
  UCHAR_POSIX_PRINT =47,
  UCHAR_POSIX_XDIGIT =48,
  UCHAR_CASED =49,
  UCHAR_CASE_IGNORABLE =50,
  UCHAR_CHANGES_WHEN_LOWERCASED =51,
  UCHAR_CHANGES_WHEN_UPPERCASED =52,
  UCHAR_CHANGES_WHEN_TITLECASED =53,
  UCHAR_CHANGES_WHEN_CASEFOLDED =54,
  UCHAR_CHANGES_WHEN_CASEMAPPED =55,
  UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED =56,
  UCHAR_EMOJI =57,
  UCHAR_EMOJI_PRESENTATION =58,
  UCHAR_EMOJI_MODIFIER =59,
  UCHAR_EMOJI_MODIFIER_BASE =60,
  UCHAR_EMOJI_COMPONENT =61,
  UCHAR_REGIONAL_INDICATOR =62,
  UCHAR_PREPENDED_CONCATENATION_MARK =63,
  UCHAR_EXTENDED_PICTOGRAPHIC =64,
  UCHAR_BASIC_EMOJI =65,
  UCHAR_EMOJI_KEYCAP_SEQUENCE =66,
  UCHAR_RGI_EMOJI_MODIFIER_SEQUENCE =67,
  UCHAR_RGI_EMOJI_FLAG_SEQUENCE =68,
  UCHAR_RGI_EMOJI_TAG_SEQUENCE =69,
  UCHAR_RGI_EMOJI_ZWJ_SEQUENCE =70,
  UCHAR_RGI_EMOJI =71,
  UCHAR_IDS_UNARY_OPERATOR =72,
  UCHAR_ID_COMPAT_MATH_START =73,
  UCHAR_ID_COMPAT_MATH_CONTINUE =74,
  UCHAR_MODIFIER_COMBINING_MARK =75,
  UCHAR_BINARY_LIMIT =76,
  UCHAR_BIDI_CLASS =0x1000,
  UCHAR_INT_START =UCHAR_BIDI_CLASS,
  UCHAR_BLOCK =0x1001,
  UCHAR_CANONICAL_COMBINING_CLASS =0x1002,
  UCHAR_DECOMPOSITION_TYPE =0x1003,
  UCHAR_EAST_ASIAN_WIDTH =0x1004,
  UCHAR_GENERAL_CATEGORY =0x1005,
  UCHAR_JOINING_GROUP =0x1006,
  UCHAR_JOINING_TYPE =0x1007,
  UCHAR_LINE_BREAK =0x1008,
  UCHAR_NUMERIC_TYPE =0x1009,
  UCHAR_SCRIPT =0x100A,
  UCHAR_HANGUL_SYLLABLE_TYPE =0x100B,
  UCHAR_NFD_QUICK_CHECK =0x100C,
  UCHAR_NFKD_QUICK_CHECK =0x100D,
  UCHAR_NFC_QUICK_CHECK =0x100E,
  UCHAR_NFKC_QUICK_CHECK =0x100F,
  UCHAR_LEAD_CANONICAL_COMBINING_CLASS =0x1010,
  UCHAR_TRAIL_CANONICAL_COMBINING_CLASS =0x1011,
  UCHAR_GRAPHEME_CLUSTER_BREAK =0x1012,
  UCHAR_SENTENCE_BREAK =0x1013,
  UCHAR_WORD_BREAK =0x1014,
  UCHAR_BIDI_PAIRED_BRACKET_TYPE =0x1015,
  UCHAR_INDIC_POSITIONAL_CATEGORY =0x1016,
  UCHAR_INDIC_SYLLABIC_CATEGORY =0x1017,
  UCHAR_VERTICAL_ORIENTATION =0x1018,
  UCHAR_IDENTIFIER_STATUS =0x1019,
  UCHAR_INDIC_CONJUNCT_BREAK =0x101A,
  UCHAR_INT_LIMIT =0x101B,
  UCHAR_GENERAL_CATEGORY_MASK =0x2000,
  UCHAR_MASK_START =UCHAR_GENERAL_CATEGORY_MASK,
  UCHAR_MASK_LIMIT =0x2001,
  UCHAR_NUMERIC_VALUE =0x3000,
  UCHAR_DOUBLE_START =UCHAR_NUMERIC_VALUE,
  UCHAR_DOUBLE_LIMIT =0x3001,
  UCHAR_AGE =0x4000,
  UCHAR_STRING_START =UCHAR_AGE,
  UCHAR_BIDI_MIRRORING_GLYPH =0x4001,
  UCHAR_CASE_FOLDING =0x4002,
  UCHAR_ISO_COMMENT =0x4003,
  UCHAR_LOWERCASE_MAPPING =0x4004,
  UCHAR_NAME =0x4005,
  UCHAR_SIMPLE_CASE_FOLDING =0x4006,
  UCHAR_SIMPLE_LOWERCASE_MAPPING =0x4007,
  UCHAR_SIMPLE_TITLECASE_MAPPING =0x4008,
  UCHAR_SIMPLE_UPPERCASE_MAPPING =0x4009,
  UCHAR_TITLECASE_MAPPING =0x400A,
  UCHAR_UNICODE_1_NAME =0x400B,
  UCHAR_UPPERCASE_MAPPING =0x400C,
  UCHAR_BIDI_PAIRED_BRACKET =0x400D,
  UCHAR_STRING_LIMIT =0x400E,
  UCHAR_SCRIPT_EXTENSIONS =0x7000,
  UCHAR_OTHER_PROPERTY_START =UCHAR_SCRIPT_EXTENSIONS,
  UCHAR_IDENTIFIER_TYPE =0x7001,
  UCHAR_OTHER_PROPERTY_LIMIT =0x7002,
  UCHAR_INVALID_CODE = -1
}
enum
Selection constants for Unicode properties.
UPropertyNameChoice{
  U_SHORT_PROPERTY_NAME,
  U_LONG_PROPERTY_NAME,
  U_PROPERTY_NAME_CHOICE_COUNT
}
enum
Selector constants for u_getPropertyName() and u_getPropertyValueName().
UScriptCode{
  USCRIPT_INVALID_CODE = -1,
  USCRIPT_COMMON = 0,
  USCRIPT_INHERITED = 1,
  USCRIPT_ARABIC = 2,
  USCRIPT_ARMENIAN = 3,
  USCRIPT_BENGALI = 4,
  USCRIPT_BOPOMOFO = 5,
  USCRIPT_CHEROKEE = 6,
  USCRIPT_COPTIC = 7,
  USCRIPT_CYRILLIC = 8,
  USCRIPT_DESERET = 9,
  USCRIPT_DEVANAGARI = 10,
  USCRIPT_ETHIOPIC = 11,
  USCRIPT_GEORGIAN = 12,
  USCRIPT_GOTHIC = 13,
  USCRIPT_GREEK = 14,
  USCRIPT_GUJARATI = 15,
  USCRIPT_GURMUKHI = 16,
  USCRIPT_HAN = 17,
  USCRIPT_HANGUL = 18,
  USCRIPT_HEBREW = 19,
  USCRIPT_HIRAGANA = 20,
  USCRIPT_KANNADA = 21,
  USCRIPT_KATAKANA = 22,
  USCRIPT_KHMER = 23,
  USCRIPT_LAO = 24,
  USCRIPT_LATIN = 25,
  USCRIPT_MALAYALAM = 26,
  USCRIPT_MONGOLIAN = 27,
  USCRIPT_MYANMAR = 28,
  USCRIPT_OGHAM = 29,
  USCRIPT_OLD_ITALIC = 30,
  USCRIPT_ORIYA = 31,
  USCRIPT_RUNIC = 32,
  USCRIPT_SINHALA = 33,
  USCRIPT_SYRIAC = 34,
  USCRIPT_TAMIL = 35,
  USCRIPT_TELUGU = 36,
  USCRIPT_THAANA = 37,
  USCRIPT_THAI = 38,
  USCRIPT_TIBETAN = 39,
  USCRIPT_CANADIAN_ABORIGINAL = 40,
  USCRIPT_UCAS = USCRIPT_CANADIAN_ABORIGINAL,
  USCRIPT_YI = 41,
  USCRIPT_TAGALOG = 42,
  USCRIPT_HANUNOO = 43,
  USCRIPT_BUHID = 44,
  USCRIPT_TAGBANWA = 45,
  USCRIPT_BRAILLE = 46,
  USCRIPT_CYPRIOT = 47,
  USCRIPT_LIMBU = 48,
  USCRIPT_LINEAR_B = 49,
  USCRIPT_OSMANYA = 50,
  USCRIPT_SHAVIAN = 51,
  USCRIPT_TAI_LE = 52,
  USCRIPT_UGARITIC = 53,
  USCRIPT_KATAKANA_OR_HIRAGANA = 54,
  USCRIPT_BUGINESE = 55,
  USCRIPT_GLAGOLITIC = 56,
  USCRIPT_KHAROSHTHI = 57,
  USCRIPT_SYLOTI_NAGRI = 58,
  USCRIPT_NEW_TAI_LUE = 59,
  USCRIPT_TIFINAGH = 60,
  USCRIPT_OLD_PERSIAN = 61,
  USCRIPT_BALINESE = 62,
  USCRIPT_BATAK = 63,
  USCRIPT_BLISSYMBOLS = 64,
  USCRIPT_BRAHMI = 65,
  USCRIPT_CHAM = 66,
  USCRIPT_CIRTH = 67,
  USCRIPT_OLD_CHURCH_SLAVONIC_CYRILLIC = 68,
  USCRIPT_DEMOTIC_EGYPTIAN = 69,
  USCRIPT_HIERATIC_EGYPTIAN = 70,
  USCRIPT_EGYPTIAN_HIEROGLYPHS = 71,
  USCRIPT_KHUTSURI = 72,
  USCRIPT_SIMPLIFIED_HAN = 73,
  USCRIPT_TRADITIONAL_HAN = 74,
  USCRIPT_PAHAWH_HMONG = 75,
  USCRIPT_OLD_HUNGARIAN = 76,
  USCRIPT_HARAPPAN_INDUS = 77,
  USCRIPT_JAVANESE = 78,
  USCRIPT_KAYAH_LI = 79,
  USCRIPT_LATIN_FRAKTUR = 80,
  USCRIPT_LATIN_GAELIC = 81,
  USCRIPT_LEPCHA = 82,
  USCRIPT_LINEAR_A = 83,
  USCRIPT_MANDAIC = 84,
  USCRIPT_MANDAEAN = USCRIPT_MANDAIC,
  USCRIPT_MAYAN_HIEROGLYPHS = 85,
  USCRIPT_MEROITIC_HIEROGLYPHS = 86,
  USCRIPT_MEROITIC = USCRIPT_MEROITIC_HIEROGLYPHS,
  USCRIPT_NKO = 87,
  USCRIPT_ORKHON = 88,
  USCRIPT_OLD_PERMIC = 89,
  USCRIPT_PHAGS_PA = 90,
  USCRIPT_PHOENICIAN = 91,
  USCRIPT_MIAO = 92,
  USCRIPT_PHONETIC_POLLARD = USCRIPT_MIAO,
  USCRIPT_RONGORONGO = 93,
  USCRIPT_SARATI = 94,
  USCRIPT_ESTRANGELO_SYRIAC = 95,
  USCRIPT_WESTERN_SYRIAC = 96,
  USCRIPT_EASTERN_SYRIAC = 97,
  USCRIPT_TENGWAR = 98,
  USCRIPT_VAI = 99,
  USCRIPT_VISIBLE_SPEECH = 100,
  USCRIPT_CUNEIFORM = 101,
  USCRIPT_UNWRITTEN_LANGUAGES = 102,
  USCRIPT_UNKNOWN = 103,
  USCRIPT_CARIAN = 104,
  USCRIPT_JAPANESE = 105,
  USCRIPT_LANNA = 106,
  USCRIPT_LYCIAN = 107,
  USCRIPT_LYDIAN = 108,
  USCRIPT_OL_CHIKI = 109,
  USCRIPT_REJANG = 110,
  USCRIPT_SAURASHTRA = 111,
  USCRIPT_SIGN_WRITING = 112,
  USCRIPT_SUNDANESE = 113,
  USCRIPT_MOON = 114,
  USCRIPT_MEITEI_MAYEK = 115,
  USCRIPT_IMPERIAL_ARAMAIC = 116,
  USCRIPT_AVESTAN = 117,
  USCRIPT_CHAKMA = 118,
  USCRIPT_KOREAN = 119,
  USCRIPT_KAITHI = 120,
  USCRIPT_MANICHAEAN = 121,
  USCRIPT_INSCRIPTIONAL_PAHLAVI = 122,
  USCRIPT_PSALTER_PAHLAVI = 123,
  USCRIPT_BOOK_PAHLAVI = 124,
  USCRIPT_INSCRIPTIONAL_PARTHIAN = 125,
  USCRIPT_SAMARITAN = 126,
  USCRIPT_TAI_VIET = 127,
  USCRIPT_MATHEMATICAL_NOTATION = 128,
  USCRIPT_SYMBOLS = 129,
  USCRIPT_BAMUM = 130,
  USCRIPT_LISU = 131,
  USCRIPT_NAKHI_GEBA = 132,
  USCRIPT_OLD_SOUTH_ARABIAN = 133,
  USCRIPT_BASSA_VAH = 134,
  USCRIPT_DUPLOYAN = 135,
  USCRIPT_DUPLOYAN_SHORTAND = USCRIPT_DUPLOYAN,
  USCRIPT_ELBASAN = 136,
  USCRIPT_GRANTHA = 137,
  USCRIPT_KPELLE = 138,
  USCRIPT_LOMA = 139,
  USCRIPT_MENDE = 140,
  USCRIPT_MEROITIC_CURSIVE = 141,
  USCRIPT_OLD_NORTH_ARABIAN = 142,
  USCRIPT_NABATAEAN = 143,
  USCRIPT_PALMYRENE = 144,
  USCRIPT_KHUDAWADI = 145,
  USCRIPT_SINDHI = USCRIPT_KHUDAWADI,
  USCRIPT_WARANG_CITI = 146,
  USCRIPT_AFAKA = 147,
  USCRIPT_JURCHEN = 148,
  USCRIPT_MRO = 149,
  USCRIPT_NUSHU = 150,
  USCRIPT_SHARADA = 151,
  USCRIPT_SORA_SOMPENG = 152,
  USCRIPT_TAKRI = 153,
  USCRIPT_TANGUT = 154,
  USCRIPT_WOLEAI = 155,
  USCRIPT_ANATOLIAN_HIEROGLYPHS = 156,
  USCRIPT_KHOJKI = 157,
  USCRIPT_TIRHUTA = 158,
  USCRIPT_CAUCASIAN_ALBANIAN = 159,
  USCRIPT_MAHAJANI = 160,
  USCRIPT_AHOM = 161,
  USCRIPT_HATRAN = 162,
  USCRIPT_MODI = 163,
  USCRIPT_MULTANI = 164,
  USCRIPT_PAU_CIN_HAU = 165,
  USCRIPT_SIDDHAM = 166,
  USCRIPT_ADLAM = 167,
  USCRIPT_BHAIKSUKI = 168,
  USCRIPT_MARCHEN = 169,
  USCRIPT_NEWA = 170,
  USCRIPT_OSAGE = 171,
  USCRIPT_HAN_WITH_BOPOMOFO = 172,
  USCRIPT_JAMO = 173,
  USCRIPT_SYMBOLS_EMOJI = 174,
  USCRIPT_MASARAM_GONDI = 175,
  USCRIPT_SOYOMBO = 176,
  USCRIPT_ZANABAZAR_SQUARE = 177,
  USCRIPT_DOGRA = 178,
  USCRIPT_GUNJALA_GONDI = 179,
  USCRIPT_MAKASAR = 180,
  USCRIPT_MEDEFAIDRIN = 181,
  USCRIPT_HANIFI_ROHINGYA = 182,
  USCRIPT_SOGDIAN = 183,
  USCRIPT_OLD_SOGDIAN = 184,
  USCRIPT_ELYMAIC = 185,
  USCRIPT_NYIAKENG_PUACHUE_HMONG = 186,
  USCRIPT_NANDINAGARI = 187,
  USCRIPT_WANCHO = 188,
  USCRIPT_CHORASMIAN = 189,
  USCRIPT_DIVES_AKURU = 190,
  USCRIPT_KHITAN_SMALL_SCRIPT = 191,
  USCRIPT_YEZIDI = 192,
  USCRIPT_CYPRO_MINOAN = 193,
  USCRIPT_OLD_UYGHUR = 194,
  USCRIPT_TANGSA = 195,
  USCRIPT_TOTO = 196,
  USCRIPT_VITHKUQI = 197,
  USCRIPT_KAWI = 198,
  USCRIPT_NAG_MUNDARI = 199,
  USCRIPT_ARABIC_NASTALIQ = 200,
  USCRIPT_GARAY = 201,
  USCRIPT_GURUNG_KHEMA = 202,
  USCRIPT_KIRAT_RAI = 203,
  USCRIPT_OL_ONAL = 204,
  USCRIPT_SUNUWAR = 205,
  USCRIPT_TODHRI = 206,
  USCRIPT_TULU_TIGALARI = 207,
  USCRIPT_CODE_LIMIT = 208
}
enum
Constants for ISO 15924 script codes.
UScriptUsage{
  USCRIPT_USAGE_NOT_ENCODED,
  USCRIPT_USAGE_UNKNOWN,
  USCRIPT_USAGE_EXCLUDED,
  USCRIPT_USAGE_LIMITED_USE,
  USCRIPT_USAGE_ASPIRATIONAL,
  USCRIPT_USAGE_RECOMMENDED
}
enum
Script usage constants.
USentenceBreak{
  U_SB_OTHER = 0,
  U_SB_ATERM = 1,
  U_SB_CLOSE = 2,
  U_SB_FORMAT = 3,
  U_SB_LOWER = 4,
  U_SB_NUMERIC = 5,
  U_SB_OLETTER = 6,
  U_SB_SEP = 7,
  U_SB_SP = 8,
  U_SB_STERM = 9,
  U_SB_UPPER = 10,
  U_SB_CR = 11,
  U_SB_EXTEND = 12,
  U_SB_LF = 13,
  U_SB_SCONTINUE = 14,
  U_SB_COUNT = 15
}
enum
Sentence Break constants.
USentenceBreakTag{
  UBRK_SENTENCE_TERM = 0,
  UBRK_SENTENCE_TERM_LIMIT = 100,
  UBRK_SENTENCE_SEP = 100,
  UBRK_SENTENCE_SEP_LIMIT = 200
}
enum
Enum constants for the sentence break tags returned by getRuleStatus().
UTransDirection{
  UTRANS_FORWARD,
  UTRANS_REVERSE
}
enum
Direction constant indicating the direction in a transliterator, e.g., the forward or reverse rules of a RuleBasedTransliterator.
UVerticalOrientation{
  U_VO_ROTATED,
  U_VO_TRANSFORMED_ROTATED,
  U_VO_TRANSFORMED_UPRIGHT,
  U_VO_UPRIGHT
}
enum
Vertical Orientation constants.
UWordBreak{
  UBRK_WORD_NONE = 0,
  UBRK_WORD_NONE_LIMIT = 100,
  UBRK_WORD_NUMBER = 100,
  UBRK_WORD_NUMBER_LIMIT = 200,
  UBRK_WORD_LETTER = 200,
  UBRK_WORD_LETTER_LIMIT = 300,
  UBRK_WORD_KANA = 300,
  UBRK_WORD_KANA_LIMIT = 400,
  UBRK_WORD_IDEO = 400,
  UBRK_WORD_IDEO_LIMIT = 500
}
enum
Enum constants for the word break tags returned by getRuleStatus().
UWordBreakValues{
  U_WB_OTHER = 0,
  U_WB_ALETTER = 1,
  U_WB_FORMAT = 2,
  U_WB_KATAKANA = 3,
  U_WB_MIDLETTER = 4,
  U_WB_MIDNUM = 5,
  U_WB_NUMERIC = 6,
  U_WB_EXTENDNUMLET = 7,
  U_WB_CR = 8,
  U_WB_EXTEND = 9,
  U_WB_LF = 10,
  U_WB_MIDNUMLET =11,
  U_WB_NEWLINE =12,
  U_WB_REGIONAL_INDICATOR = 13,
  U_WB_HEBREW_LETTER = 14,
  U_WB_SINGLE_QUOTE = 15,
  U_WB_DOUBLE_QUOTE = 16,
  U_WB_E_BASE = 17,
  U_WB_E_BASE_GAZ = 18,
  U_WB_E_MODIFIER = 19,
  U_WB_GLUE_AFTER_ZWJ = 20,
  U_WB_ZWJ = 21,
  U_WB_WSEGSPACE = 22,
  U_WB_COUNT = 23
}
enum
Word Break constants.

Typedefs

OldUChar OldUChar
uint16_t
Default ICU 58 definition of UChar.
UBidiPairedBracketType typedef
Bidi Paired Bracket Type constants.
UBlockCode typedef
enum UBlockCode
UBool typedef
int8_t
The ICU boolean type, a signed-byte integer.
UBreakIteratorType typedef
The possible types of text boundaries.
UCPMap typedef
struct UCPMap
Abstract map from Unicode code points (U+0000..U+10FFFF) to integer values.
UCPMapValueFilter(const void *context, uint32_t value) typedef
uint32_t U_CALLCONV
Callback function type: Modifies a map value.
UChar UChar
char16_t
The base type for UTF-16 code units and pointers.
UChar32 typedef
int32_t
Define UChar32 as a type for single Unicode code points.
UCharCategory typedef
Data for enumerated Unicode general category types.
UCharDirection typedef
This specifies the language directional property of a character set.
UCharEnumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type) typedef
Callback from u_enumCharTypes(), is called for each contiguous range of code points c (where start<=c
UCharNameChoice typedef
Selector constants for u_charName().
UColAttribute typedef
Attributes that collation service understands.
UColAttributeValue typedef
Enum containing attribute values for controlling collation behavior.
UColBoundMode typedef
enum that is taken by ucol_getBound API See below for explanation do not change the values assigned to the members of this enum.
UColReorderCode typedef
Enum containing the codes for reordering segments of the collation table that are not script codes.
UCollationResult typedef
UCOL_LESS is returned if source string is compared to be less than target string in the ucol_strcoll() method.
UCollationStrength typedef
Base letter represents a primary difference.
UCollator typedef
struct UCollator
structure representing a collator object instance
UDate typedef
double
Date and Time data type.
UDecompositionType typedef
Decomposition Type constants.
UDisplayContext typedef
UDisplayContextType typedef
UEastAsianWidth typedef
East Asian Width constants.
UEnumCharNamesFn(void *context, UChar32 code, UCharNameChoice nameChoice, const char *name, int32_t length) typedef
Type of a callback function for u_enumCharNames() that gets called for each Unicode character with the code point value and the character name.
UEnumeration typedef
struct UEnumeration
structure representing an enumeration object instance
UErrorCode typedef
enum UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
UGraphemeClusterBreak typedef
Grapheme Cluster Break constants.
UHangulSyllableType typedef
Hangul Syllable Type constants.
UIdentifierStatus typedef
Identifier Status constants.
UIdentifierType typedef
Identifier Type constants.
UIndicConjunctBreak typedef
Indic Conjunct Break constants.
UIndicPositionalCategory typedef
Indic Positional Category constants.
UIndicSyllabicCategory typedef
Indic Syllabic Category constants.
UJoiningGroup typedef
Joining Group constants.
UJoiningType typedef
Joining Type constants.
ULineBreak typedef
enum ULineBreak
Line Break constants.
ULineBreakTag typedef
Enum constants for the line break tags returned by getRuleStatus().
ULocAvailableType typedef
Types for uloc_getAvailableByType and uloc_countAvailableByType.
ULocaleData typedef
struct ULocaleData
A locale data object.
ULocaleDisplayNames typedef
C typedef for struct ULocaleDisplayNames.
UNormalizationCheckResult typedef
Result values for normalization quick check functions.
UNormalizer2 typedef
struct UNormalizer2
C typedef for struct UNormalizer2.
UNumericType typedef
Numeric Type constants.
UParseError typedef
struct UParseError
A UParseError struct is used to returned detailed information about parsing errors.
UProperty typedef
enum UProperty
Selection constants for Unicode properties.
UPropertyNameChoice typedef
Selector constants for u_getPropertyName() and u_getPropertyValueName().
UReplaceable typedef
void *
An opaque replaceable text object.
UReplaceableCallbacks typedef
A set of function pointers that transliterators use to manipulate a UReplaceable.
UScriptCode typedef
Constants for ISO 15924 script codes.
UScriptUsage typedef
Script usage constants.
USentenceBreak typedef
Sentence Break constants.
USentenceBreakTag typedef
Enum constants for the sentence break tags returned by getRuleStatus().
UText typedef
struct UText
C typedef for struct UText.
UTransDirection typedef
Direction constant indicating the direction in a transliterator, e.g., the forward or reverse rules of a RuleBasedTransliterator.
UTransPosition typedef
Position structure for utrans_transIncremental() incremental transliteration.
UTransliterator typedef
void *
An opaque transliterator for use in C.
UVersionInfo[U_MAX_VERSION_LENGTH] typedef
uint8_t
The binary form of a version on ICU APIs is an array of 4 uint8_t.
UVerticalOrientation typedef
Vertical Orientation constants.
UWordBreak typedef
enum UWordBreak
Enum constants for the word break tags returned by getRuleStatus().
UWordBreakValues typedef
Word Break constants.

Variables

context
U_CDECL_BEGIN typedef void *

Functions

UChar(U_CALLCONV *UNESCAPE_CHAR_AT)(int32_t offset
U_CDECL_BEGIN typedef
Callback function for u_unescapeAt() that returns a character of the source text given an offset and a context pointer.
u_charAge(UChar32 c, UVersionInfo versionArray)
U_CAPI void U_EXPORT2
Get the "age" of the code point.
u_charDigitValue(UChar32 c)
U_CAPI int32_t U_EXPORT2
Returns the decimal digit value of a decimal digit character.
u_charDirection(UChar32 c)
Returns the bidirectional category value for the code point, which is used in the Unicode bidirectional algorithm (UAX #9 http://www.unicode.org/reports/tr9/).
u_charFromName(UCharNameChoice nameChoice, const char *name, UErrorCode *pErrorCode)
U_CAPIUChar32 U_EXPORT2
Find a Unicode character by its name and return its code point value.
u_charMirror(UChar32 c)
U_CAPIUChar32 U_EXPORT2
Maps the specified character to a "mirror-image" character.
u_charName(UChar32 code, UCharNameChoice nameChoice, char *buffer, int32_t bufferLength, UErrorCode *pErrorCode)
U_CAPI int32_t U_EXPORT2
Retrieve the name of a Unicode character.
u_charType(UChar32 c)
U_CAPI int8_t U_EXPORT2
Returns the general category value for the code point.
u_countChar32(const UChar *s, int32_t length)
U_CAPI int32_t U_EXPORT2
Count Unicode code points in the length UChar code units of the string.
u_digit(UChar32 ch, int8_t radix)
U_CAPI int32_t U_EXPORT2
Returns the decimal digit value of the code point in the specified radix.
u_enumCharNames(UChar32 start, UChar32 limit, UEnumCharNamesFn *fn, void *context, UCharNameChoice nameChoice, UErrorCode *pErrorCode)
U_CAPI void U_EXPORT2
Enumerate all assigned Unicode characters between the start and limit code points (start inclusive, limit exclusive) and call a function for each, passing the code point value and the character name.
u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context)
U_CAPI void U_EXPORT2
Enumerate efficiently all code points with their Unicode general categories.
u_errorName(UErrorCode code)
U_CAPI const char *U_EXPORT2
Return a string for a UErrorCode value.
u_foldCase(UChar32 c, uint32_t options)
U_CAPIUChar32 U_EXPORT2
The given character is mapped to its case folding equivalent according to UnicodeData.txt and CaseFolding.txt; if the character has no case folding equivalent, the character itself is returned.
u_forDigit(int32_t digit, int8_t radix)
U_CAPIUChar32 U_EXPORT2
Determines the character representation for a specific digit in the specified radix.
u_getBidiPairedBracket(UChar32 c)
U_CAPIUChar32 U_EXPORT2
Maps the specified character to its paired bracket character.
u_getCombiningClass(UChar32 c)
U_CAPI uint8_t U_EXPORT2
Returns the combining class of the code point as specified in UnicodeData.txt.
u_getIntPropertyMaxValue(UProperty which)
U_CAPI int32_t U_EXPORT2
Get the maximum value for an enumerated/integer/binary Unicode property.
u_getIntPropertyMinValue(UProperty which)
U_CAPI int32_t U_EXPORT2
Get the minimum value for an enumerated/integer/binary Unicode property.
u_getIntPropertyValue(UChar32 c, UProperty which)
U_CAPI int32_t U_EXPORT2
Get the property value for an enumerated or integer Unicode property for a code point.
u_getNumericValue(UChar32 c)
U_CAPI double U_EXPORT2
Get the numeric value for a Unicode code point as defined in the Unicode Character Database.
u_getPropertyEnum(const char *alias)
U_CAPIUProperty U_EXPORT2
Return the UProperty enum for a given property name, as specified in the Unicode database file PropertyAliases.txt.
u_getPropertyName(UProperty property, UPropertyNameChoice nameChoice)
U_CAPI const char *U_EXPORT2
Return the Unicode name for a given property, as given in the Unicode database file PropertyAliases.txt.
u_getPropertyValueEnum(UProperty property, const char *alias)
U_CAPI int32_t U_EXPORT2
Return the property value integer for a given value name, as specified in the Unicode database file PropertyValueAliases.txt.
u_getPropertyValueName(UProperty property, int32_t value, UPropertyNameChoice nameChoice)
U_CAPI const char *U_EXPORT2
Return the Unicode name for a given property value, as given in the Unicode database file PropertyValueAliases.txt.
u_getUnicodeVersion(UVersionInfo versionArray)
U_CAPI void U_EXPORT2
Gets the Unicode version information.
u_getVersion(UVersionInfo versionArray)
U_CAPI void U_EXPORT2
Gets the ICU release version.
u_hasBinaryProperty(UChar32 c, UProperty which)
U_CAPIUBool U_EXPORT2
Check a binary Unicode property for a code point.
u_isIDIgnorable(UChar32 c)
U_CAPIUBool U_EXPORT2
Determines if the specified character should be regarded as an ignorable character in an identifier, according to Java.
u_isIDPart(UChar32 c)
U_CAPIUBool U_EXPORT2
Determines if the specified character is permissible as a non-initial character of an identifier according to UAX #31 Unicode Identifier and Pattern Syntax.
u_isIDStart(UChar32 c)
U_CAPIUBool U_EXPORT2
Determines if the specified character is permissible as the first character in an identifier according to UAX #31 Unicode Identifier and Pattern Syntax.
u_isISOControl(UChar32 c)
U_CAPIUBool U_EXPORT2
Determines whether the specified code point is an ISO control code.
u_isJavaIDPart(UChar32 c)
U_CAPIUBool U_EXPORT2
Determines if the specified character is permissible in a Java identifier.
u_isJavaIDStart(UChar32 c)
U_CAPIUBool U_EXPORT2
Determines if the specified character is permissible as the first character in a Java identifier.
u_isJavaSpaceChar(UChar32 c)
U_CAPIUBool U_EXPORT2
Determine if the specified code point is a space character according to Java.
u_isMirrored(UChar32 c)
U_CAPIUBool U_EXPORT2
Determines whether the code point has the Bidi_Mirrored property.
u_isUAlphabetic(UChar32 c)
U_CAPIUBool U_EXPORT2
Check if a code point has the Alphabetic Unicode property.
u_isULowercase(UChar32 c)
U_CAPIUBool U_EXPORT2
Check if a code point has the Lowercase Unicode property.
u_isUUppercase(UChar32 c)
U_CAPIUBool U_EXPORT2
Check if a code point has the Uppercase Unicode property.
u_isUWhiteSpace(UChar32 c)
U_CAPIUBool U_EXPORT2
Check if a code point has the White_Space Unicode property.
u_isWhitespace(UChar32 c)
U_CAPIUBool U_EXPORT2
Determines if the specified code point is a whitespace character according to Java/ICU.
u_isalnum(UChar32 c)
U_CAPIUBool U_EXPORT2
Determines whether the specified code point is an alphanumeric character (letter or digit) according to Java.
u_isalpha(UChar32 c)
U_CAPIUBool U_EXPORT2
Determines whether the specified code point is a letter character.
u_isbase(UChar32 c)
U_CAPIUBool U_EXPORT2
Non-standard: Determines whether the specified code point is a base character.
u_isblank(UChar32 c)
U_CAPIUBool U_EXPORT2
Determines whether the specified code point is a "blank" or "horizontal space", a character that visibly separates words on a line.
u_iscntrl(UChar32 c)
U_CAPIUBool U_EXPORT2
Determines whether the specified code point is a control character (as defined by this function).
u_isdefined(UChar32 c)
U_CAPIUBool U_EXPORT2
Determines whether the specified code point is "defined", which usually means that it is assigned a character.
u_isdigit(UChar32 c)
U_CAPIUBool U_EXPORT2
Determines whether the specified code point is a digit character according to Java.
u_isgraph(UChar32 c)
U_CAPIUBool U_EXPORT2
Determines whether the specified code point is a "graphic" character (printable, excluding spaces).
u_islower(UChar32 c)
U_CAPIUBool U_EXPORT2
Determines whether the specified code point has the general category "Ll" (lowercase letter).
u_isprint(UChar32 c)
U_CAPIUBool U_EXPORT2
Determines whether the specified code point is a printable character.
u_ispunct(UChar32 c)
U_CAPIUBool U_EXPORT2
Determines whether the specified code point is a punctuation character.
u_isspace(UChar32 c)
U_CAPIUBool U_EXPORT2
Determines if the specified character is a space character or not.
u_istitle(UChar32 c)
U_CAPIUBool U_EXPORT2
Determines whether the specified code point is a titlecase letter.
u_isupper(UChar32 c)
U_CAPIUBool U_EXPORT2
Determines whether the specified code point has the general category "Lu" (uppercase letter).
u_isxdigit(UChar32 c)
U_CAPIUBool U_EXPORT2
Determines whether the specified code point is a hexadecimal digit.
u_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options)
U_CAPI int32_t U_EXPORT2
Compare two strings case-insensitively using full case folding.
u_memchr(const UChar *s, UChar c, int32_t count)
U_CAPIUChar *U_EXPORT2
Find the first occurrence of a BMP code point in a string.
u_memchr32(const UChar *s, UChar32 c, int32_t count)
U_CAPIUChar *U_EXPORT2
Find the first occurrence of a code point in a string.
u_memcmp(const UChar *buf1, const UChar *buf2, int32_t count)
U_CAPI int32_t U_EXPORT2
Compare the first count UChars of each buffer.
u_memcmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t count)
U_CAPI int32_t U_EXPORT2
Compare two Unicode strings in code point order.
u_memcpy(UChar *dest, const UChar *src, int32_t count)
U_CAPIUChar *U_EXPORT2
Synonym for memcpy(), but with UChars only.
u_memmove(UChar *dest, const UChar *src, int32_t count)
U_CAPIUChar *U_EXPORT2
Synonym for memmove(), but with UChars only.
u_memrchr(const UChar *s, UChar c, int32_t count)
U_CAPIUChar *U_EXPORT2
Find the last occurrence of a BMP code point in a string.
u_memrchr32(const UChar *s, UChar32 c, int32_t count)
U_CAPIUChar *U_EXPORT2
Find the last occurrence of a code point in a string.
u_memset(UChar *dest, UChar c, int32_t count)
U_CAPIUChar *U_EXPORT2
Initialize count characters of dest to c.
u_strCaseCompare(const UChar *s1, int32_t length1, const UChar *s2, int32_t length2, uint32_t options, UErrorCode *pErrorCode)
U_CAPI int32_t U_EXPORT2
Compare two strings case-insensitively using full case folding.
u_strCompare(const UChar *s1, int32_t length1, const UChar *s2, int32_t length2, UBool codePointOrder)
U_CAPI int32_t U_EXPORT2
Compare two Unicode strings (binary order).
u_strFindFirst(const UChar *s, int32_t length, const UChar *substring, int32_t subLength)
U_CAPIUChar *U_EXPORT2
Find the first occurrence of a substring in a string.
u_strFindLast(const UChar *s, int32_t length, const UChar *substring, int32_t subLength)
U_CAPIUChar *U_EXPORT2
Find the last occurrence of a substring in a string.
u_strFoldCase(UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, uint32_t options, UErrorCode *pErrorCode)
U_CAPI int32_t U_EXPORT2
Case-folds the characters in a string.
u_strFromUTF32(UChar *dest, int32_t destCapacity, int32_t *pDestLength, const UChar32 *src, int32_t srcLength, UErrorCode *pErrorCode)
U_CAPIUChar *U_EXPORT2
Convert a UTF-32 string to UTF-16.
u_strFromUTF32WithSub(UChar *dest, int32_t destCapacity, int32_t *pDestLength, const UChar32 *src, int32_t srcLength, UChar32 subchar, int32_t *pNumSubstitutions, UErrorCode *pErrorCode)
U_CAPIUChar *U_EXPORT2
Convert a UTF-32 string to UTF-16.
u_strFromUTF8(UChar *dest, int32_t destCapacity, int32_t *pDestLength, const char *src, int32_t srcLength, UErrorCode *pErrorCode)
U_CAPIUChar *U_EXPORT2
Convert a UTF-8 string to UTF-16.
u_strFromUTF8Lenient(UChar *dest, int32_t destCapacity, int32_t *pDestLength, const char *src, int32_t srcLength, UErrorCode *pErrorCode)
U_CAPIUChar *U_EXPORT2
Convert a UTF-8 string to UTF-16.
u_strFromUTF8WithSub(UChar *dest, int32_t destCapacity, int32_t *pDestLength, const char *src, int32_t srcLength, UChar32 subchar, int32_t *pNumSubstitutions, UErrorCode *pErrorCode)
U_CAPIUChar *U_EXPORT2
Convert a UTF-8 string to UTF-16.
u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number)
U_CAPIUBool U_EXPORT2
Check if the string contains more Unicode code points than a certain number.
u_strToLower(UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, const char *locale, UErrorCode *pErrorCode)
U_CAPI int32_t U_EXPORT2
Lowercase the characters in a string.
u_strToTitle(UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, UBreakIterator *titleIter, const char *locale, UErrorCode *pErrorCode)
U_CAPI int32_t U_EXPORT2
Titlecase a string.
u_strToUTF32(UChar32 *dest, int32_t destCapacity, int32_t *pDestLength, const UChar *src, int32_t srcLength, UErrorCode *pErrorCode)
U_CAPIUChar32 *U_EXPORT2
Convert a UTF-16 string to UTF-32.
u_strToUTF32WithSub(UChar32 *dest, int32_t destCapacity, int32_t *pDestLength, const UChar *src, int32_t srcLength, UChar32 subchar, int32_t *pNumSubstitutions, UErrorCode *pErrorCode)
U_CAPIUChar32 *U_EXPORT2
Convert a UTF-16 string to UTF-32.
u_strToUTF8(char *dest, int32_t destCapacity, int32_t *pDestLength, const UChar *src, int32_t srcLength, UErrorCode *pErrorCode)
U_CAPI char *U_EXPORT2
Convert a UTF-16 string to UTF-8.
u_strToUTF8WithSub(char *dest, int32_t destCapacity, int32_t *pDestLength, const UChar *src, int32_t srcLength, UChar32 subchar, int32_t *pNumSubstitutions, UErrorCode *pErrorCode)
U_CAPI char *U_EXPORT2
Convert a UTF-16 string to UTF-8.
u_strToUpper(UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, const char *locale, UErrorCode *pErrorCode)
U_CDECL_ENDU_CAPI int32_t U_EXPORT2
Uppercase the characters in a string.
u_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options)
U_CAPI int32_t U_EXPORT2
Compare two strings case-insensitively using full case folding.
u_strcat(UChar *dst, const UChar *src)
U_CAPIUChar *U_EXPORT2
Concatenate two ustrings.
u_strchr(const UChar *s, UChar c)
U_CAPIUChar *U_EXPORT2
Find the first occurrence of a BMP code point in a string.
u_strchr32(const UChar *s, UChar32 c)
U_CAPIUChar *U_EXPORT2
Find the first occurrence of a code point in a string.
u_strcmp(const UChar *s1, const UChar *s2)
U_CAPI int32_t U_EXPORT2
Compare two Unicode strings for bitwise equality (code unit order).
u_strcmpCodePointOrder(const UChar *s1, const UChar *s2)
U_CAPI int32_t U_EXPORT2
Compare two Unicode strings in code point order.
u_strcpy(UChar *dst, const UChar *src)
U_CAPIUChar *U_EXPORT2
Copy a ustring.
u_strcspn(const UChar *string, const UChar *matchSet)
U_CAPI int32_t U_EXPORT2
Returns the number of consecutive characters in string, beginning with the first, that do not occur somewhere in matchSet.
u_strlen(const UChar *s)
U_CAPI int32_t U_EXPORT2
Determine the length of an array of UChar.
u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options)
U_CAPI int32_t U_EXPORT2
Compare two strings case-insensitively using full case folding.
u_strncat(UChar *dst, const UChar *src, int32_t n)
U_CAPIUChar *U_EXPORT2
Concatenate two ustrings.
u_strncmp(const UChar *ucs1, const UChar *ucs2, int32_t n)
U_CAPI int32_t U_EXPORT2
Compare two ustrings for bitwise equality.
u_strncmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t n)
U_CAPI int32_t U_EXPORT2
Compare two Unicode strings in code point order.
u_strncpy(UChar *dst, const UChar *src, int32_t n)
U_CAPIUChar *U_EXPORT2
Copy a ustring.
u_strpbrk(const UChar *string, const UChar *matchSet)
U_CAPIUChar *U_EXPORT2
Locates the first occurrence in the string string of any of the characters in the string matchSet.
u_strrchr(const UChar *s, UChar c)
U_CAPIUChar *U_EXPORT2
Find the last occurrence of a BMP code point in a string.
u_strrchr32(const UChar *s, UChar32 c)
U_CAPIUChar *U_EXPORT2
Find the last occurrence of a code point in a string.
u_strrstr(const UChar *s, const UChar *substring)
U_CAPIUChar *U_EXPORT2
Find the last occurrence of a substring in a string.
u_strspn(const UChar *string, const UChar *matchSet)
U_CAPI int32_t U_EXPORT2
Returns the number of consecutive characters in string, beginning with the first, that occur somewhere in matchSet.
u_strstr(const UChar *s, const UChar *substring)
U_CAPIUChar *U_EXPORT2
Find the first occurrence of a substring in a string.
u_strtok_r(UChar *src, const UChar *delim, UChar **saveState)
U_CAPIUChar *U_EXPORT2
The string tokenizer API allows an application to break a string into tokens.
u_tolower(UChar32 c)
U_CAPIUChar32 U_EXPORT2
The given character is mapped to its lowercase equivalent according to UnicodeData.txt; if the character has no lowercase equivalent, the character itself is returned.
u_totitle(UChar32 c)
U_CAPIUChar32 U_EXPORT2
The given character is mapped to its titlecase equivalent according to UnicodeData.txt; if none is defined, the character itself is returned.
u_toupper(UChar32 c)
U_CAPIUChar32 U_EXPORT2
The given character is mapped to its uppercase equivalent according to UnicodeData.txt; if the character has no uppercase equivalent, the character itself is returned.
u_versionToString(const UVersionInfo versionArray, char *versionString)
U_CAPI void U_EXPORT2
Write a string with dotted-decimal version information according to the input UVersionInfo.
ubrk_clone(const UBreakIterator *bi, UErrorCode *status)
Thread safe cloning operation.
ubrk_close(UBreakIterator *bi)
U_CAPI void U_EXPORT2
Close a UBreakIterator.
ubrk_countAvailable(void)
U_CAPI int32_t U_EXPORT2
Determine how many locales have text breaking information available.
ubrk_current(const UBreakIterator *bi)
U_CAPI int32_t U_EXPORT2
Determine the most recently-returned text boundary.
ubrk_first(UBreakIterator *bi)
U_CAPI int32_t U_EXPORT2
Set the iterator position to zero, the start of the text being scanned.
ubrk_following(UBreakIterator *bi, int32_t offset)
U_CAPI int32_t U_EXPORT2
Advance the iterator to the first boundary following the specified offset.
ubrk_getAvailable(int32_t index)
U_CAPI const char *U_EXPORT2
Get a locale for which text breaking information is available.
ubrk_getRuleStatus(UBreakIterator *bi)
U_CAPI int32_t U_EXPORT2
Return the status from the break rule that determined the most recently returned break position.
ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, UErrorCode *status)
U_CAPI int32_t U_EXPORT2
Get the statuses from the break rules that determined the most recently returned break position.
ubrk_isBoundary(UBreakIterator *bi, int32_t offset)
U_CAPIUBool U_EXPORT2
Returns true if the specified position is a boundary position.
ubrk_last(UBreakIterator *bi)
U_CAPI int32_t U_EXPORT2
Set the iterator position to the index immediately beyond the last character in the text being scanned.
ubrk_next(UBreakIterator *bi)
U_CAPI int32_t U_EXPORT2
Advance the iterator to the boundary following the current boundary.
ubrk_open(UBreakIteratorType type, const char *locale, const UChar *text, int32_t textLength, UErrorCode *status)
Open a new UBreakIterator for locating text boundaries for a specified locale.
ubrk_preceding(UBreakIterator *bi, int32_t offset)
U_CAPI int32_t U_EXPORT2
Set the iterator position to the first boundary preceding the specified offset.
ubrk_previous(UBreakIterator *bi)
U_CAPI int32_t U_EXPORT2
Set the iterator position to the boundary preceding the current boundary.
ubrk_setText(UBreakIterator *bi, const UChar *text, int32_t textLength, UErrorCode *status)
U_CAPI void U_EXPORT2
Sets an existing iterator to point to a new piece of text.
ubrk_setUText(UBreakIterator *bi, UText *text, UErrorCode *status)
U_CAPI void U_EXPORT2
Sets an existing iterator to point to a new piece of text.
ucol_clone(const UCollator *coll, UErrorCode *status)
U_CAPIUCollator *U_EXPORT2
Thread safe cloning operation.
ucol_close(UCollator *coll)
U_CAPI void U_EXPORT2
Close a UCollator.
ucol_countAvailable(void)
U_CAPI int32_t U_EXPORT2
Determine how many locales have collation rules available.
ucol_getAttribute(const UCollator *coll, UColAttribute attr, UErrorCode *status)
Universal attribute getter.
ucol_getAvailable(int32_t localeIndex)
U_CAPI const char *U_EXPORT2
Get a locale for which collation rules are available.
ucol_getDisplayName(const char *objLoc, const char *dispLoc, UChar *result, int32_t resultLength, UErrorCode *status)
U_CAPI int32_t U_EXPORT2
Get the display name for a UCollator.
ucol_getEquivalentReorderCodes(int32_t reorderCode, int32_t *dest, int32_t destCapacity, UErrorCode *pErrorCode)
U_CAPI int32_t U_EXPORT2
Retrieves the reorder codes that are grouped with the given reorder code.
ucol_getFunctionalEquivalent(char *result, int32_t resultCapacity, const char *keyword, const char *locale, UBool *isAvailable, UErrorCode *status)
U_CAPI int32_t U_EXPORT2
Return the functionally equivalent locale for the specified input locale, with respect to given keyword, for the collation service.
ucol_getKeywordValues(const char *keyword, UErrorCode *status)
U_CAPIUEnumeration *U_EXPORT2
Given a keyword, create a string enumeration of all values for that keyword that are currently in use.
ucol_getKeywordValuesForLocale(const char *key, const char *locale, UBool commonlyUsed, UErrorCode *status)
U_CAPIUEnumeration *U_EXPORT2
Given a key and a locale, returns an array of string values in a preferred order that would make a difference.
ucol_getKeywords(UErrorCode *status)
U_CAPIUEnumeration *U_EXPORT2
Create a string enumerator of all possible keywords that are relevant to collation.
ucol_getMaxVariable(const UCollator *coll)
Returns the maximum reordering group whose characters are affected by UCOL_ALTERNATE_HANDLING.
ucol_getReorderCodes(const UCollator *coll, int32_t *dest, int32_t destCapacity, UErrorCode *pErrorCode)
U_CAPI int32_t U_EXPORT2
Retrieves the reordering codes for this collator.
ucol_getSortKey(const UCollator *coll, const UChar *source, int32_t sourceLength, uint8_t *result, int32_t resultLength)
U_CAPI int32_t U_EXPORT2
Get a sort key for a string from a UCollator.
ucol_getStrength(const UCollator *coll)
Get the collation strength used in a UCollator.
ucol_mergeSortkeys(const uint8_t *src1, int32_t src1Length, const uint8_t *src2, int32_t src2Length, uint8_t *dest, int32_t destCapacity)
U_CAPI int32_t U_EXPORT2
Merges two sort keys.
ucol_open(const char *loc, UErrorCode *status)
U_CAPIUCollator *U_EXPORT2
Open a UCollator for comparing strings.
ucol_openAvailableLocales(UErrorCode *status)
U_CAPIUEnumeration *U_EXPORT2
Create a string enumerator of all locales for which a valid collator may be opened.
ucol_openRules(const UChar *rules, int32_t rulesLength, UColAttributeValue normalizationMode, UCollationStrength strength, UParseError *parseError, UErrorCode *status)
U_CAPIUCollator *U_EXPORT2
Produce a UCollator instance according to the rules supplied.
ucol_setAttribute(UCollator *coll, UColAttribute attr, UColAttributeValue value, UErrorCode *status)
U_CAPI void U_EXPORT2
Universal attribute setter.
ucol_setMaxVariable(UCollator *coll, UColReorderCode group, UErrorCode *pErrorCode)
U_CAPI void U_EXPORT2
Sets the variable top to the top of the specified reordering group.
ucol_setReorderCodes(UCollator *coll, const int32_t *reorderCodes, int32_t reorderCodesLength, UErrorCode *pErrorCode)
U_CAPI void U_EXPORT2
Sets the reordering codes for this collator.
ucol_setStrength(UCollator *coll, UCollationStrength strength)
U_CAPI void U_EXPORT2
Set the collation strength used in a UCollator.
ucol_strcoll(const UCollator *coll, const UChar *source, int32_t sourceLength, const UChar *target, int32_t targetLength)
Compare two strings.
ucol_strcollUTF8(const UCollator *coll, const char *source, int32_t sourceLength, const char *target, int32_t targetLength, UErrorCode *status)
Compare two strings in UTF-8.
uenum_close(UEnumeration *en)
U_CAPI void U_EXPORT2
Disposes of resources in use by the iterator.
uenum_count(UEnumeration *en, UErrorCode *status)
U_CAPI int32_t U_EXPORT2
Returns the number of elements that the iterator traverses.
uenum_next(UEnumeration *en, int32_t *resultLength, UErrorCode *status)
U_CAPI const char *U_EXPORT2
Returns the next element in the iterator's list.
uenum_openCharStringsEnumeration(const char *const strings[], int32_t count, UErrorCode *ec)
U_CAPIUEnumeration *U_EXPORT2
Given an array of const char* strings (invariant chars only), return a UEnumeration.
uenum_openUCharStringsEnumeration(const UChar *const strings[], int32_t count, UErrorCode *ec)
U_CAPIUEnumeration *U_EXPORT2
Given an array of const UChar* strings, return a UEnumeration.
uenum_reset(UEnumeration *en, UErrorCode *status)
U_CAPI void U_EXPORT2
Resets the iterator to the current list of service IDs.
uenum_unext(UEnumeration *en, int32_t *resultLength, UErrorCode *status)
U_CAPI const UChar *U_EXPORT2
Returns the next element in the iterator's list.
uldn_close(ULocaleDisplayNames *ldn)
U_CAPI void U_EXPORT2
Closes a ULocaleDisplayNames instance obtained from uldn_open().
uldn_getContext(const ULocaleDisplayNames *ldn, UDisplayContextType type, UErrorCode *pErrorCode)
Returns the UDisplayContext value for the specified UDisplayContextType.
uldn_getDialectHandling(const ULocaleDisplayNames *ldn)
Returns the dialect handling used in the display names.
uldn_getLocale(const ULocaleDisplayNames *ldn)
U_CAPI const char *U_EXPORT2
Returns the locale used to determine the display names.
uldn_keyDisplayName(const ULocaleDisplayNames *ldn, const char *key, UChar *result, int32_t maxResultSize, UErrorCode *pErrorCode)
U_CAPI int32_t U_EXPORT2
Returns the display name of the provided locale key.
uldn_keyValueDisplayName(const ULocaleDisplayNames *ldn, const char *key, const char *value, UChar *result, int32_t maxResultSize, UErrorCode *pErrorCode)
U_CAPI int32_t U_EXPORT2
Returns the display name of the provided value (used with the provided key).
uldn_languageDisplayName(const ULocaleDisplayNames *ldn, const char *lang, UChar *result, int32_t maxResultSize, UErrorCode *pErrorCode)
U_CAPI int32_t U_EXPORT2
Returns the display name of the provided language code.
uldn_localeDisplayName(const ULocaleDisplayNames *ldn, const char *locale, UChar *result, int32_t maxResultSize, UErrorCode *pErrorCode)
U_CAPI int32_t U_EXPORT2
Returns the display name of the provided locale.
uldn_open(const char *locale, UDialectHandling dialectHandling, UErrorCode *pErrorCode)
Returns an instance of LocaleDisplayNames that returns names formatted for the provided locale, using the provided dialectHandling.
uldn_openForContext(const char *locale, UDisplayContext *contexts, int32_t length, UErrorCode *pErrorCode)
Returns an instance of LocaleDisplayNames that returns names formatted for the provided locale, using the provided UDisplayContext settings.
uldn_regionDisplayName(const ULocaleDisplayNames *ldn, const char *region, UChar *result, int32_t maxResultSize, UErrorCode *pErrorCode)
U_CAPI int32_t U_EXPORT2
Returns the display name of the provided region code.
uldn_scriptCodeDisplayName(const ULocaleDisplayNames *ldn, UScriptCode scriptCode, UChar *result, int32_t maxResultSize, UErrorCode *pErrorCode)
U_CAPI int32_t U_EXPORT2
Returns the display name of the provided script code.
uldn_scriptDisplayName(const ULocaleDisplayNames *ldn, const char *script, UChar *result, int32_t maxResultSize, UErrorCode *pErrorCode)
U_CAPI int32_t U_EXPORT2
Returns the display name of the provided script.
uldn_variantDisplayName(const ULocaleDisplayNames *ldn, const char *variant, UChar *result, int32_t maxResultSize, UErrorCode *pErrorCode)
U_CAPI int32_t U_EXPORT2
Returns the display name of the provided variant.
uloc_acceptLanguage(char *result, int32_t resultAvailable, UAcceptResult *outResult, const char **acceptList, int32_t acceptListCount, UEnumeration *availableLocales, UErrorCode *status)
U_CAPI int32_t U_EXPORT2
Based on a list of available locales, determine an acceptable locale for the user.
uloc_addLikelySubtags(const char *localeID, char *maximizedLocaleID, int32_t maximizedLocaleIDCapacity, UErrorCode *err)
U_CAPI int32_t U_EXPORT2
Add the likely subtags for a provided locale ID, per the algorithm described in the following CLDR technical report:
uloc_canonicalize(const char *localeID, char *name, int32_t nameCapacity, UErrorCode *err)
U_CAPI int32_t U_EXPORT2
Gets the full name for the specified locale.
uloc_countAvailable(void)
U_CAPI int32_t U_EXPORT2
Gets the size of the all available locale list.
uloc_forLanguageTag(const char *langtag, char *localeID, int32_t localeIDCapacity, int32_t *parsedLength, UErrorCode *err)
U_CAPI int32_t U_EXPORT2
Returns a locale ID for the specified BCP47 language tag string.
uloc_getAvailable(int32_t n)
U_CAPI const char *U_EXPORT2
Gets the specified locale from a list of available locales.
uloc_getBaseName(const char *localeID, char *name, int32_t nameCapacity, UErrorCode *err)
U_CAPI int32_t U_EXPORT2
Gets the full name for the specified locale, like uloc_getName(), but without keywords.
uloc_getCharacterOrientation(const char *localeId, UErrorCode *status)
U_CAPIULayoutType U_EXPORT2
Get the layout character orientation for the specified locale.
uloc_getCountry(const char *localeID, char *country, int32_t countryCapacity, UErrorCode *err)
U_CAPI int32_t U_EXPORT2
Gets the country code for the specified locale.
uloc_getDefault(void)
U_CAPI const char *U_EXPORT2
Gets ICU's default locale.
uloc_getDisplayCountry(const char *locale, const char *displayLocale, UChar *country, int32_t countryCapacity, UErrorCode *status)
U_CAPI int32_t U_EXPORT2
Gets the country name suitable for display for the specified locale.
uloc_getDisplayKeyword(const char *keyword, const char *displayLocale, UChar *dest, int32_t destCapacity, UErrorCode *status)
U_CAPI int32_t U_EXPORT2
Gets the keyword name suitable for display for the specified locale.
uloc_getDisplayKeywordValue(const char *locale, const char *keyword, const char *displayLocale, UChar *dest, int32_t destCapacity, UErrorCode *status)
U_CAPI int32_t U_EXPORT2
Gets the value of the keyword suitable for display for the specified locale.
uloc_getDisplayLanguage(const char *locale, const char *displayLocale, UChar *language, int32_t languageCapacity, UErrorCode *status)
U_CAPI int32_t U_EXPORT2
Gets the language name suitable for display for the specified locale.
uloc_getDisplayName(const char *localeID, const char *inLocaleID, UChar *result, int32_t maxResultSize, UErrorCode *err)
U_CAPI int32_t U_EXPORT2
Gets the full name suitable for display for the specified locale.
uloc_getDisplayScript(const char *locale, const char *displayLocale, UChar *script, int32_t scriptCapacity, UErrorCode *status)
U_CAPI int32_t U_EXPORT2
Gets the script name suitable for display for the specified locale.
uloc_getDisplayVariant(const char *locale, const char *displayLocale, UChar *variant, int32_t variantCapacity, UErrorCode *status)
U_CAPI int32_t U_EXPORT2
Gets the variant name suitable for display for the specified locale.
uloc_getISO3Country(const char *localeID)
U_CAPI const char *U_EXPORT2
Gets the ISO country code for the specified locale.
uloc_getISO3Language(const char *localeID)
U_CAPI const char *U_EXPORT2
Gets the ISO language code for the specified locale.
uloc_getISOCountries(void)
U_CAPI const char *const *U_EXPORT2
Gets a list of all available 2-letter country codes defined in ISO 639.
uloc_getISOLanguages(void)
U_CAPI const char *const *U_EXPORT2
Gets a list of all available 2-letter language codes defined in ISO 639, plus additional 3-letter codes determined to be useful for locale generation as defined by Unicode CLDR.
uloc_getKeywordValue(const char *localeID, const char *keywordName, char *buffer, int32_t bufferCapacity, UErrorCode *status)
U_CAPI int32_t U_EXPORT2
Get the value for a keyword.
uloc_getLanguage(const char *localeID, char *language, int32_t languageCapacity, UErrorCode *err)
U_CAPI int32_t U_EXPORT2
Gets the language code for the specified locale.
uloc_getLineOrientation(const char *localeId, UErrorCode *status)
U_CAPIULayoutType U_EXPORT2
Get the layout line orientation for the specified locale.
uloc_getName(const char *localeID, char *name, int32_t nameCapacity, UErrorCode *err)
U_CAPI int32_t U_EXPORT2
Gets the full name for the specified locale.
uloc_getScript(const char *localeID, char *script, int32_t scriptCapacity, UErrorCode *err)
U_CAPI int32_t U_EXPORT2
Gets the script code for the specified locale.
uloc_getVariant(const char *localeID, char *variant, int32_t variantCapacity, UErrorCode *err)
U_CAPI int32_t U_EXPORT2
Gets the variant code for the specified locale.
uloc_isRightToLeft(const char *locale)
U_CAPIUBool U_EXPORT2
Returns whether the locale's script is written right-to-left.
uloc_minimizeSubtags(const char *localeID, char *minimizedLocaleID, int32_t minimizedLocaleIDCapacity, UErrorCode *err)
U_CAPI int32_t U_EXPORT2
Minimize the subtags for a provided locale ID, per the algorithm described in the following CLDR technical report:
uloc_openKeywords(const char *localeID, UErrorCode *status)
U_CAPIUEnumeration *U_EXPORT2
Gets an enumeration of keywords for the specified locale.
uloc_setKeywordValue(const char *keywordName, const char *keywordValue, char *buffer, int32_t bufferCapacity, UErrorCode *status)
U_CAPI int32_t U_EXPORT2
Sets or removes the value of the specified keyword.
uloc_toLanguageTag(const char *localeID, char *langtag, int32_t langtagCapacity, UBool strict, UErrorCode *err)
U_CAPI int32_t U_EXPORT2
Returns a well-formed language tag for this locale ID.
uloc_toLegacyKey(const char *keyword)
U_CAPI const char *U_EXPORT2
Converts the specified keyword (BCP 47 Unicode locale extension key, or legacy key) to the legacy key.
uloc_toLegacyType(const char *keyword, const char *value)
U_CAPI const char *U_EXPORT2
Converts the specified keyword value (BCP 47 Unicode locale extension type, or legacy type or type alias) to the canonical legacy type.
uloc_toUnicodeLocaleKey(const char *keyword)
U_CAPI const char *U_EXPORT2
Converts the specified keyword (legacy key, or BCP 47 Unicode locale extension key) to the equivalent BCP 47 Unicode locale extension key.
uloc_toUnicodeLocaleType(const char *keyword, const char *value)
U_CAPI const char *U_EXPORT2
Converts the specified keyword value (legacy type, or BCP 47 Unicode locale extension type) to the well-formed BCP 47 Unicode locale extension type for the specified keyword (category).
ulocdata_getCLDRVersion(UVersionInfo versionArray, UErrorCode *status)
U_CAPI void U_EXPORT2
Return the current CLDR version used by the library.
unorm2_append(const UNormalizer2 *norm2, UChar *first, int32_t firstLength, int32_t firstCapacity, const UChar *second, int32_t secondLength, UErrorCode *pErrorCode)
U_CAPI int32_t U_EXPORT2
Appends the second string to the first string (merging them at the boundary) and returns the length of the first string.
unorm2_close(UNormalizer2 *norm2)
U_CAPI void U_EXPORT2
Closes a UNormalizer2 instance from unorm2_openFiltered().
unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b)
U_CAPIUChar32 U_EXPORT2
Performs pairwise composition of a & b and returns the composite if there is one.
unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c)
U_CAPI uint8_t U_EXPORT2
Gets the combining class of c.
unorm2_getDecomposition(const UNormalizer2 *norm2, UChar32 c, UChar *decomposition, int32_t capacity, UErrorCode *pErrorCode)
U_CAPI int32_t U_EXPORT2
Gets the decomposition mapping of c.
unorm2_getNFCInstance(UErrorCode *pErrorCode)
U_CAPI const UNormalizer2 *U_EXPORT2
Returns a UNormalizer2 instance for Unicode NFC normalization.
unorm2_getNFDInstance(UErrorCode *pErrorCode)
U_CAPI const UNormalizer2 *U_EXPORT2
Returns a UNormalizer2 instance for Unicode NFD normalization.
unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode)
U_CAPI const UNormalizer2 *U_EXPORT2
Returns a UNormalizer2 instance for Unicode toNFKC_Casefold() normalization which is equivalent to applying the NFKC_Casefold mappings and then NFC.
unorm2_getNFKCInstance(UErrorCode *pErrorCode)
U_CAPI const UNormalizer2 *U_EXPORT2
Returns a UNormalizer2 instance for Unicode NFKC normalization.
unorm2_getNFKDInstance(UErrorCode *pErrorCode)
U_CAPI const UNormalizer2 *U_EXPORT2
Returns a UNormalizer2 instance for Unicode NFKD normalization.
unorm2_getRawDecomposition(const UNormalizer2 *norm2, UChar32 c, UChar *decomposition, int32_t capacity, UErrorCode *pErrorCode)
U_CAPI int32_t U_EXPORT2
Gets the raw decomposition mapping of c.
unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c)
U_CAPIUBool U_EXPORT2
Tests if the character always has a normalization boundary after it, regardless of context.
unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c)
U_CAPIUBool U_EXPORT2
Tests if the character always has a normalization boundary before it, regardless of context.
unorm2_isInert(const UNormalizer2 *norm2, UChar32 c)
U_CAPIUBool U_EXPORT2
Tests if the character is normalization-inert.
unorm2_isNormalized(const UNormalizer2 *norm2, const UChar *s, int32_t length, UErrorCode *pErrorCode)
U_CAPIUBool U_EXPORT2
Tests if the string is normalized.
unorm2_normalize(const UNormalizer2 *norm2, const UChar *src, int32_t length, UChar *dest, int32_t capacity, UErrorCode *pErrorCode)
U_CAPI int32_t U_EXPORT2
Writes the normalized form of the source string to the destination string (replacing its contents) and returns the length of the destination string.
unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2, UChar *first, int32_t firstLength, int32_t firstCapacity, const UChar *second, int32_t secondLength, UErrorCode *pErrorCode)
U_CAPI int32_t U_EXPORT2
Appends the normalized form of the second string to the first string (merging them at the boundary) and returns the length of the first string.
unorm2_quickCheck(const UNormalizer2 *norm2, const UChar *s, int32_t length, UErrorCode *pErrorCode)
Tests if the string is normalized.
unorm2_spanQuickCheckYes(const UNormalizer2 *norm2, const UChar *s, int32_t length, UErrorCode *pErrorCode)
U_CAPI int32_t U_EXPORT2
Returns the end of the normalized substring of the input string.
uscript_breaksBetweenLetters(UScriptCode script)
U_CAPIUBool U_EXPORT2
Returns true if the script allows line breaks between letters (excluding hyphenation).
uscript_getCode(const char *nameOrAbbrOrLocale, UScriptCode *fillIn, int32_t capacity, UErrorCode *err)
U_CAPI int32_t U_EXPORT2
Gets the script codes associated with the given locale or ISO 15924 abbreviation or name.
uscript_getName(UScriptCode scriptCode)
U_CAPI const char *U_EXPORT2
Returns the long Unicode script name, if there is one.
uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode)
U_CAPI int32_t U_EXPORT2
Writes the script sample character string.
uscript_getScript(UChar32 codepoint, UErrorCode *err)
U_CAPIUScriptCode U_EXPORT2
Gets the script code associated with the given codepoint.
uscript_getScriptExtensions(UChar32 c, UScriptCode *scripts, int32_t capacity, UErrorCode *errorCode)
U_CAPI int32_t U_EXPORT2
Writes code point c's Script_Extensions as a list of UScriptCode values to the output scripts array and returns the number of script codes.
uscript_getShortName(UScriptCode scriptCode)
U_CAPI const char *U_EXPORT2
Returns the 4-letter ISO 15924 script code, which is the same as the short Unicode script name if Unicode has names for the script.
uscript_getUsage(UScriptCode script)
U_CAPIUScriptUsage U_EXPORT2
Returns the script usage according to UAX #31 Unicode Identifier and Pattern Syntax.
uscript_hasScript(UChar32 c, UScriptCode sc)
U_CAPIUBool U_EXPORT2
Do the Script_Extensions of code point c contain script sc? If c does not have explicit Script_Extensions, then this tests whether c has the Script property value sc.
uscript_isCased(UScriptCode script)
U_CAPIUBool U_EXPORT2
Returns true if in modern (or most recent) usage of the script case distinctions are customary.
uscript_isRightToLeft(UScriptCode script)
U_CAPIUBool U_EXPORT2
Returns true if the script is written right-to-left.
utext_char32At(UText *ut, int64_t nativeIndex)
U_CAPIUChar32 U_EXPORT2
Returns the code point at the requested index, or U_SENTINEL (-1) if it is out of bounds.
utext_clone(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCode *status)
U_CAPIUText *U_EXPORT2
Clone a UText.
utext_close(UText *ut)
U_CAPIUText *U_EXPORT2
Close function for UText instances.
utext_current32(UText *ut)
U_CAPIUChar32 U_EXPORT2
Get the code point at the current iteration position, or U_SENTINEL (-1) if the iteration has reached the end of the input text.
utext_equals(const UText *a, const UText *b)
U_CAPIUBool U_EXPORT2
Compare two UText objects for equality.
utext_extract(UText *ut, int64_t nativeStart, int64_t nativeLimit, UChar *dest, int32_t destCapacity, UErrorCode *status)
U_CAPI int32_t U_EXPORT2
Extract text from a UText into a UChar buffer.
utext_getNativeIndex(const UText *ut)
U_CAPI int64_t U_EXPORT2
Get the current iterator position, which can range from 0 to the length of the text.
utext_getPreviousNativeIndex(UText *ut)
U_CAPI int64_t U_EXPORT2
Get the native index of the character preceding the current position.
utext_moveIndex32(UText *ut, int32_t delta)
U_CAPIUBool U_EXPORT2
Move the iterator position by delta code points.
utext_nativeLength(UText *ut)
U_CAPI int64_t U_EXPORT2
Get the length of the text.
utext_next32(UText *ut)
U_CAPIUChar32 U_EXPORT2
Get the code point at the current iteration position of the UText, and advance the position to the first index following the character.
utext_next32From(UText *ut, int64_t nativeIndex)
U_CAPIUChar32 U_EXPORT2
Set the iteration index and return the code point at that index.
utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status)
U_CAPIUText *U_EXPORT2
Open a read-only UText for UChar * string.
utext_openUTF8(UText *ut, const char *s, int64_t length, UErrorCode *status)
U_CAPIUText *U_EXPORT2
Open a read-only UText implementation for UTF-8 strings.
utext_previous32(UText *ut)
U_CAPIUChar32 U_EXPORT2
Move the iterator position to the character (code point) whose index precedes the current position, and return that character.
utext_previous32From(UText *ut, int64_t nativeIndex)
U_CAPIUChar32 U_EXPORT2
Set the iteration index, and return the code point preceding the one specified by the initial index.
utext_setNativeIndex(UText *ut, int64_t nativeIndex)
U_CAPI void U_EXPORT2
Set the current iteration position to the nearest code point boundary at or preceding the specified index.
utrans_clone(const UTransliterator *trans, UErrorCode *status)
Create a copy of a transliterator.
utrans_close(UTransliterator *trans)
U_CAPI void U_EXPORT2
Close a transliterator.
utrans_openIDs(UErrorCode *pErrorCode)
U_CAPIUEnumeration *U_EXPORT2
Return a UEnumeration for the available transliterators.
utrans_openInverse(const UTransliterator *trans, UErrorCode *status)
Open an inverse of an existing transliterator.
utrans_openU(const UChar *id, int32_t idLength, UTransDirection dir, const UChar *rules, int32_t rulesLength, UParseError *parseError, UErrorCode *pErrorCode)
Open a custom transliterator, given a custom rules string OR a system transliterator, given its ID.
utrans_setFilter(UTransliterator *trans, const UChar *filterPattern, int32_t filterPatternLen, UErrorCode *status)
U_CAPI void U_EXPORT2
Set the filter used by a transliterator.
utrans_toRules(const UTransliterator *trans, UBool escapeUnprintable, UChar *result, int32_t resultLength, UErrorCode *status)
U_CAPI int32_t U_EXPORT2
Create a rule string that can be passed to utrans_openU to recreate this transliterator.
utrans_trans(const UTransliterator *trans, UReplaceable *rep, const UReplaceableCallbacks *repFunc, int32_t start, int32_t *limit, UErrorCode *status)
U_CAPI void U_EXPORT2
Transliterate a segment of a UReplaceable string.
utrans_transIncremental(const UTransliterator *trans, UReplaceable *rep, const UReplaceableCallbacks *repFunc, UTransPosition *pos, UErrorCode *status)
U_CAPI void U_EXPORT2
Transliterate the portion of the UReplaceable text buffer that can be transliterated unambiguously.
utrans_transIncrementalUChars(const UTransliterator *trans, UChar *text, int32_t *textLength, int32_t textCapacity, UTransPosition *pos, UErrorCode *status)
U_CAPI void U_EXPORT2
Transliterate the portion of the UChar* text buffer that can be transliterated unambiguously.
utrans_transUChars(const UTransliterator *trans, UChar *text, int32_t *textLength, int32_t textCapacity, int32_t start, int32_t *limit, UErrorCode *status)
U_CAPI void U_EXPORT2
Transliterate a segment of a UChar* string.

Structs

UParseError

A UParseError struct is used to returned detailed information about parsing errors.

UReplaceableCallbacks

A set of function pointers that transliterators use to manipulate a UReplaceable.

UTransPosition

Position structure for utrans_transIncremental() incremental transliteration.

Enumerations

Anonymous Enum 117

Declared in unicode/parseerr.h
 Anonymous Enum 117

The capacity of the context strings in UParseError.

Properties
U_PARSE_CONTEXT_LEN

UAcceptResult

Declared in unicode/uloc.h
 UAcceptResult

Output values which uloc_acceptLanguage() writes to the 'outResult' parameter.

See also:uloc_acceptLanguageFromHTTP See also:uloc_acceptLanguage

Properties
ULOC_ACCEPT_FAILED

No exact match was found.

ULOC_ACCEPT_FALLBACK

A fallback was found.

For example, the Accept-Language list includes 'ja_JP' and is matched with available locale 'ja'.

ULOC_ACCEPT_VALID

An exact match was found.

UBidiPairedBracketType

Declared in unicode/uchar.h
 UBidiPairedBracketType

Bidi Paired Bracket Type constants.

See also: UCHAR_BIDI_PAIRED_BRACKET_TYPE

Properties
U_BPT_CLOSE

Close paired bracket.

U_BPT_COUNT

One more than the highest normal UBidiPairedBracketType value.

The highest value is available via u_getIntPropertyMaxValue(UCHAR_BIDI_PAIRED_BRACKET_TYPE).

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

U_BPT_NONE

Not a paired bracket.

U_BPT_OPEN

Open paired bracket.

UBlockCode

Declared in unicode/uchar.h
 UBlockCode

Constants for Unicode blocks, see the Unicode Data file Blocks.txt.

Properties
UBLOCK_ADLAM

UBLOCK_AEGEAN_NUMBERS

UBLOCK_AHOM

UBLOCK_ALCHEMICAL_SYMBOLS

UBLOCK_ALPHABETIC_PRESENTATION_FORMS

UBLOCK_ANATOLIAN_HIEROGLYPHS

UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION

UBLOCK_ANCIENT_GREEK_NUMBERS

UBLOCK_ANCIENT_SYMBOLS

UBLOCK_ARABIC

UBLOCK_ARABIC_EXTENDED_A

UBLOCK_ARABIC_EXTENDED_B

UBLOCK_ARABIC_EXTENDED_C

UBLOCK_ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS

UBLOCK_ARABIC_PRESENTATION_FORMS_A

UBLOCK_ARABIC_PRESENTATION_FORMS_B

UBLOCK_ARABIC_SUPPLEMENT

UBLOCK_ARMENIAN

UBLOCK_ARROWS

UBLOCK_AVESTAN

UBLOCK_BALINESE

UBLOCK_BAMUM

UBLOCK_BAMUM_SUPPLEMENT

UBLOCK_BASIC_LATIN

UBLOCK_BASSA_VAH

UBLOCK_BATAK

UBLOCK_BENGALI

UBLOCK_BHAIKSUKI

UBLOCK_BLOCK_ELEMENTS

UBLOCK_BOPOMOFO

UBLOCK_BOPOMOFO_EXTENDED

UBLOCK_BOX_DRAWING

UBLOCK_BRAHMI

UBLOCK_BRAILLE_PATTERNS

UBLOCK_BUGINESE

UBLOCK_BUHID

UBLOCK_BYZANTINE_MUSICAL_SYMBOLS

UBLOCK_CARIAN

UBLOCK_CAUCASIAN_ALBANIAN

UBLOCK_CHAKMA

UBLOCK_CHAM

UBLOCK_CHEROKEE

UBLOCK_CHEROKEE_SUPPLEMENT

UBLOCK_CHESS_SYMBOLS

UBLOCK_CHORASMIAN

UBLOCK_CJK_COMPATIBILITY

UBLOCK_CJK_COMPATIBILITY_FORMS

UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS

UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT

UBLOCK_CJK_RADICALS_SUPPLEMENT

UBLOCK_CJK_STROKES

UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION

UBLOCK_CJK_UNIFIED_IDEOGRAPHS

UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A

UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B

UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C

UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D

UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E

UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F

UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G

UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H

UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I

UBLOCK_COMBINING_DIACRITICAL_MARKS

UBLOCK_COMBINING_DIACRITICAL_MARKS_EXTENDED

UBLOCK_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT

UBLOCK_COMBINING_HALF_MARKS

UBLOCK_COMBINING_MARKS_FOR_SYMBOLS

Unicode 3.2 renames this block to "Combining Diacritical Marks for Symbols".

UBLOCK_COMMON_INDIC_NUMBER_FORMS

UBLOCK_CONTROL_PICTURES

UBLOCK_COPTIC

UBLOCK_COPTIC_EPACT_NUMBERS

UBLOCK_COUNT

One more than the highest normal UBlockCode value.

The highest value is available via u_getIntPropertyMaxValue(UCHAR_BLOCK).

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

UBLOCK_COUNTING_ROD_NUMERALS

UBLOCK_CUNEIFORM

UBLOCK_CUNEIFORM_NUMBERS_AND_PUNCTUATION

UBLOCK_CURRENCY_SYMBOLS

UBLOCK_CYPRIOT_SYLLABARY

UBLOCK_CYPRO_MINOAN

UBLOCK_CYRILLIC

UBLOCK_CYRILLIC_EXTENDED_A

UBLOCK_CYRILLIC_EXTENDED_B

UBLOCK_CYRILLIC_EXTENDED_C

UBLOCK_CYRILLIC_EXTENDED_D

UBLOCK_CYRILLIC_SUPPLEMENT

UBLOCK_CYRILLIC_SUPPLEMENTARY

Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".

UBLOCK_DESERET

UBLOCK_DEVANAGARI

UBLOCK_DEVANAGARI_EXTENDED

UBLOCK_DEVANAGARI_EXTENDED_A

UBLOCK_DINGBATS

UBLOCK_DIVES_AKURU

UBLOCK_DOGRA

UBLOCK_DOMINO_TILES

UBLOCK_DUPLOYAN

UBLOCK_EARLY_DYNASTIC_CUNEIFORM

UBLOCK_EGYPTIAN_HIEROGLYPHS

UBLOCK_EGYPTIAN_HIEROGLYPHS_EXTENDED_A

UBLOCK_EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS

UBLOCK_ELBASAN

UBLOCK_ELYMAIC

UBLOCK_EMOTICONS

UBLOCK_ENCLOSED_ALPHANUMERICS

UBLOCK_ENCLOSED_ALPHANUMERIC_SUPPLEMENT

UBLOCK_ENCLOSED_CJK_LETTERS_AND_MONTHS

UBLOCK_ENCLOSED_IDEOGRAPHIC_SUPPLEMENT

UBLOCK_ETHIOPIC

UBLOCK_ETHIOPIC_EXTENDED

UBLOCK_ETHIOPIC_EXTENDED_A

UBLOCK_ETHIOPIC_EXTENDED_B

UBLOCK_ETHIOPIC_SUPPLEMENT

UBLOCK_GARAY

UBLOCK_GENERAL_PUNCTUATION

UBLOCK_GEOMETRIC_SHAPES

UBLOCK_GEOMETRIC_SHAPES_EXTENDED

UBLOCK_GEORGIAN

UBLOCK_GEORGIAN_EXTENDED

UBLOCK_GEORGIAN_SUPPLEMENT

UBLOCK_GLAGOLITIC

UBLOCK_GLAGOLITIC_SUPPLEMENT

UBLOCK_GOTHIC

UBLOCK_GRANTHA

UBLOCK_GREEK

Unicode 3.2 renames this block to "Greek and Coptic".

UBLOCK_GREEK_EXTENDED

UBLOCK_GUJARATI

UBLOCK_GUNJALA_GONDI

UBLOCK_GURMUKHI

UBLOCK_GURUNG_KHEMA

UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS

UBLOCK_HANGUL_COMPATIBILITY_JAMO

UBLOCK_HANGUL_JAMO

UBLOCK_HANGUL_JAMO_EXTENDED_A

UBLOCK_HANGUL_JAMO_EXTENDED_B

UBLOCK_HANGUL_SYLLABLES

UBLOCK_HANIFI_ROHINGYA

UBLOCK_HANUNOO

UBLOCK_HATRAN

UBLOCK_HEBREW

UBLOCK_HIGH_PRIVATE_USE_SURROGATES

UBLOCK_HIGH_SURROGATES

UBLOCK_HIRAGANA

UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS

UBLOCK_IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION

UBLOCK_IMPERIAL_ARAMAIC

UBLOCK_INDIC_SIYAQ_NUMBERS

UBLOCK_INSCRIPTIONAL_PAHLAVI

UBLOCK_INSCRIPTIONAL_PARTHIAN

UBLOCK_INVALID_CODE

UBLOCK_IPA_EXTENSIONS

UBLOCK_JAVANESE

UBLOCK_KAITHI

UBLOCK_KAKTOVIK_NUMERALS

UBLOCK_KANA_EXTENDED_A

UBLOCK_KANA_EXTENDED_B

UBLOCK_KANA_SUPPLEMENT

UBLOCK_KANBUN

UBLOCK_KANGXI_RADICALS

UBLOCK_KANNADA

UBLOCK_KATAKANA

UBLOCK_KATAKANA_PHONETIC_EXTENSIONS

UBLOCK_KAWI

UBLOCK_KAYAH_LI

UBLOCK_KHAROSHTHI

UBLOCK_KHITAN_SMALL_SCRIPT

UBLOCK_KHMER

UBLOCK_KHMER_SYMBOLS

UBLOCK_KHOJKI

UBLOCK_KHUDAWADI

UBLOCK_KIRAT_RAI

UBLOCK_LAO

UBLOCK_LATIN_1_SUPPLEMENT

UBLOCK_LATIN_EXTENDED_A

UBLOCK_LATIN_EXTENDED_ADDITIONAL

UBLOCK_LATIN_EXTENDED_B

UBLOCK_LATIN_EXTENDED_C

UBLOCK_LATIN_EXTENDED_D

UBLOCK_LATIN_EXTENDED_E

UBLOCK_LATIN_EXTENDED_F

UBLOCK_LATIN_EXTENDED_G

UBLOCK_LEPCHA

UBLOCK_LETTERLIKE_SYMBOLS

UBLOCK_LIMBU

UBLOCK_LINEAR_A

UBLOCK_LINEAR_B_IDEOGRAMS

UBLOCK_LINEAR_B_SYLLABARY

UBLOCK_LISU

UBLOCK_LISU_SUPPLEMENT

UBLOCK_LOW_SURROGATES

UBLOCK_LYCIAN

UBLOCK_LYDIAN

UBLOCK_MAHAJANI

UBLOCK_MAHJONG_TILES

UBLOCK_MAKASAR

UBLOCK_MALAYALAM

UBLOCK_MANDAIC

UBLOCK_MANICHAEAN

UBLOCK_MARCHEN

UBLOCK_MASARAM_GONDI

UBLOCK_MATHEMATICAL_ALPHANUMERIC_SYMBOLS

UBLOCK_MATHEMATICAL_OPERATORS

UBLOCK_MAYAN_NUMERALS

UBLOCK_MEDEFAIDRIN

UBLOCK_MEETEI_MAYEK

UBLOCK_MEETEI_MAYEK_EXTENSIONS

UBLOCK_MENDE_KIKAKUI

UBLOCK_MEROITIC_CURSIVE

UBLOCK_MEROITIC_HIEROGLYPHS

UBLOCK_MIAO

UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A

UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B

UBLOCK_MISCELLANEOUS_SYMBOLS

UBLOCK_MISCELLANEOUS_SYMBOLS_AND_ARROWS

UBLOCK_MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS

UBLOCK_MISCELLANEOUS_TECHNICAL

UBLOCK_MODI

UBLOCK_MODIFIER_TONE_LETTERS

UBLOCK_MONGOLIAN

UBLOCK_MONGOLIAN_SUPPLEMENT

UBLOCK_MRO

UBLOCK_MULTANI

UBLOCK_MUSICAL_SYMBOLS

UBLOCK_MYANMAR

UBLOCK_MYANMAR_EXTENDED_A

UBLOCK_MYANMAR_EXTENDED_B

UBLOCK_MYANMAR_EXTENDED_C

UBLOCK_NABATAEAN

UBLOCK_NAG_MUNDARI

UBLOCK_NANDINAGARI

UBLOCK_NEWA

UBLOCK_NEW_TAI_LUE

UBLOCK_NKO

UBLOCK_NO_BLOCK

New No_Block value in Unicode 4.

UBLOCK_NUMBER_FORMS

UBLOCK_NUSHU

UBLOCK_NYIAKENG_PUACHUE_HMONG

UBLOCK_OGHAM

UBLOCK_OLD_HUNGARIAN

UBLOCK_OLD_ITALIC

UBLOCK_OLD_NORTH_ARABIAN

UBLOCK_OLD_PERMIC

UBLOCK_OLD_PERSIAN

UBLOCK_OLD_SOGDIAN

UBLOCK_OLD_SOUTH_ARABIAN

UBLOCK_OLD_TURKIC

UBLOCK_OLD_UYGHUR

UBLOCK_OL_CHIKI

UBLOCK_OL_ONAL

UBLOCK_OPTICAL_CHARACTER_RECOGNITION

UBLOCK_ORIYA

UBLOCK_ORNAMENTAL_DINGBATS

UBLOCK_OSAGE

UBLOCK_OSMANYA

UBLOCK_OTTOMAN_SIYAQ_NUMBERS

UBLOCK_PAHAWH_HMONG

UBLOCK_PALMYRENE

UBLOCK_PAU_CIN_HAU

UBLOCK_PHAGS_PA

UBLOCK_PHAISTOS_DISC

UBLOCK_PHOENICIAN

UBLOCK_PHONETIC_EXTENSIONS

UBLOCK_PHONETIC_EXTENSIONS_SUPPLEMENT

UBLOCK_PLAYING_CARDS

UBLOCK_PRIVATE_USE

Same as UBLOCK_PRIVATE_USE_AREA.

Until Unicode 3.1.1, the corresponding block name was "Private Use", and multiple code point ranges had this block. Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" and adds separate blocks for the supplementary PUAs.

UBLOCK_PRIVATE_USE_AREA

Same as UBLOCK_PRIVATE_USE.

Until Unicode 3.1.1, the corresponding block name was "Private Use", and multiple code point ranges had this block. Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" and adds separate blocks for the supplementary PUAs.

UBLOCK_PSALTER_PAHLAVI

UBLOCK_REJANG

UBLOCK_RUMI_NUMERAL_SYMBOLS

UBLOCK_RUNIC

UBLOCK_SAMARITAN

UBLOCK_SAURASHTRA

UBLOCK_SHARADA

UBLOCK_SHAVIAN

UBLOCK_SHORTHAND_FORMAT_CONTROLS

UBLOCK_SIDDHAM

UBLOCK_SINHALA

UBLOCK_SINHALA_ARCHAIC_NUMBERS

UBLOCK_SMALL_FORM_VARIANTS

UBLOCK_SMALL_KANA_EXTENSION

UBLOCK_SOGDIAN

UBLOCK_SORA_SOMPENG

UBLOCK_SOYOMBO

UBLOCK_SPACING_MODIFIER_LETTERS

UBLOCK_SPECIALS

UBLOCK_SUNDANESE

UBLOCK_SUNDANESE_SUPPLEMENT

UBLOCK_SUNUWAR

UBLOCK_SUPERSCRIPTS_AND_SUBSCRIPTS

UBLOCK_SUPPLEMENTAL_ARROWS_A

UBLOCK_SUPPLEMENTAL_ARROWS_B

UBLOCK_SUPPLEMENTAL_ARROWS_C

UBLOCK_SUPPLEMENTAL_MATHEMATICAL_OPERATORS

UBLOCK_SUPPLEMENTAL_PUNCTUATION

UBLOCK_SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS

UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_A

UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B

UBLOCK_SUTTON_SIGNWRITING

UBLOCK_SYLOTI_NAGRI

UBLOCK_SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A

UBLOCK_SYMBOLS_FOR_LEGACY_COMPUTING

UBLOCK_SYMBOLS_FOR_LEGACY_COMPUTING_SUPPLEMENT

UBLOCK_SYRIAC

UBLOCK_SYRIAC_SUPPLEMENT

UBLOCK_TAGALOG

UBLOCK_TAGBANWA

UBLOCK_TAGS

UBLOCK_TAI_LE

UBLOCK_TAI_THAM

UBLOCK_TAI_VIET

UBLOCK_TAI_XUAN_JING_SYMBOLS

UBLOCK_TAKRI

UBLOCK_TAMIL

UBLOCK_TAMIL_SUPPLEMENT

UBLOCK_TANGSA

UBLOCK_TANGUT

UBLOCK_TANGUT_COMPONENTS

UBLOCK_TANGUT_SUPPLEMENT

UBLOCK_TELUGU

UBLOCK_THAANA

UBLOCK_THAI

UBLOCK_TIBETAN

UBLOCK_TIFINAGH

UBLOCK_TIRHUTA

UBLOCK_TODHRI

UBLOCK_TOTO

UBLOCK_TRANSPORT_AND_MAP_SYMBOLS

UBLOCK_TULU_TIGALARI

UBLOCK_UGARITIC

UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS

UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED

UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A

UBLOCK_VAI

UBLOCK_VARIATION_SELECTORS

UBLOCK_VARIATION_SELECTORS_SUPPLEMENT

UBLOCK_VEDIC_EXTENSIONS

UBLOCK_VERTICAL_FORMS

UBLOCK_VITHKUQI

UBLOCK_WANCHO

UBLOCK_WARANG_CITI

UBLOCK_YEZIDI

UBLOCK_YIJING_HEXAGRAM_SYMBOLS

UBLOCK_YI_RADICALS

UBLOCK_YI_SYLLABLES

UBLOCK_ZANABAZAR_SQUARE

UBLOCK_ZNAMENNY_MUSICAL_NOTATION

UBreakIteratorType

Declared in unicode/ubrk.h
 UBreakIteratorType

The possible types of text boundaries.

Properties
UBRK_CHARACTER

Character breaks.

UBRK_COUNT

One more than the highest normal UBreakIteratorType value.

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

UBRK_LINE

Line breaks.

UBRK_SENTENCE

Sentence breaks.

UBRK_TITLE

Title Case breaks The iterator created using this type locates title boundaries as described for Unicode 3.2 only.

For Unicode 4.0 and above title boundary iteration, please use Word Boundary iterator.

Deprecated. ICU 2.8 Use the word break iterator for titlecasing for Unicode 4 and later.

UBRK_WORD

Word breaks.

UCPMapRangeOption

Declared in unicode/ucpmap.h
 UCPMapRangeOption

Selectors for how ucpmap_getRange() etc.

should report value ranges overlapping with surrogates. Most users should use UCPMAP_RANGE_NORMAL.

See also:ucpmap_getRange See also:ucptrie_getRange See also:umutablecptrie_getRange

Properties
UCPMAP_RANGE_FIXED_ALL_SURROGATES

ucpmap_getRange() enumerates all same-value ranges as stored in the map, except that all surrogates (U+D800..U+DFFF) are treated as having the surrogateValue, which is passed to getRange() as a separate parameter.

The surrogateValue is not transformed via filter(). See U_IS_SURROGATE(c).

Most users should use UCPMAP_RANGE_NORMAL instead.

This option is useful for maps that map surrogate code units to special values optimized for UTF-16 string processing or for special error behavior for unpaired surrogates, but those values are not to be associated with the lead surrogate code points.

UCPMAP_RANGE_FIXED_LEAD_SURROGATES

ucpmap_getRange() enumerates all same-value ranges as stored in the map, except that lead surrogates (U+D800..U+DBFF) are treated as having the surrogateValue, which is passed to getRange() as a separate parameter.

The surrogateValue is not transformed via filter(). See U_IS_LEAD(c).

Most users should use UCPMAP_RANGE_NORMAL instead.

This option is useful for maps that map surrogate code units to special values optimized for UTF-16 string processing or for special error behavior for unpaired surrogates, but those values are not to be associated with the lead surrogate code points.

UCPMAP_RANGE_NORMAL

ucpmap_getRange() enumerates all same-value ranges as stored in the map.

Most users should use this option.

UCharCategory

Declared in unicode/uchar.h
 UCharCategory

Data for enumerated Unicode general category types.

See http://www.unicode.org/Public/UNIDATA/UnicodeData.html .

Properties
U_CHAR_CATEGORY_COUNT

One higher than the last enum UCharCategory constant.

This numeric value is stable (will not change), see http://www.unicode.org/policies/stability_policy.html#Property_Value

U_COMBINING_SPACING_MARK

Mc.

U_CONNECTOR_PUNCTUATION

Pc.

U_CONTROL_CHAR

Cc.

U_CURRENCY_SYMBOL

Sc.

U_DASH_PUNCTUATION

Pd.

U_DECIMAL_DIGIT_NUMBER

Nd.

U_ENCLOSING_MARK

Me.

U_END_PUNCTUATION

Pe.

U_FINAL_PUNCTUATION

Pf.

U_FORMAT_CHAR

Cf.

U_GENERAL_OTHER_TYPES

Cn "Other, Not Assigned (no characters in [UnicodeData.txt] have this property)" (same as U_UNASSIGNED!)

U_INITIAL_PUNCTUATION

Pi.

U_LETTER_NUMBER

Nl.

U_LINE_SEPARATOR

Zl.

U_LOWERCASE_LETTER

Ll.

U_MATH_SYMBOL

Sm.

U_MODIFIER_LETTER

Lm.

U_MODIFIER_SYMBOL

Sk.

U_NON_SPACING_MARK

Mn.

U_OTHER_LETTER

Lo.

U_OTHER_NUMBER

No.

U_OTHER_PUNCTUATION

Po.

U_OTHER_SYMBOL

So.

U_PARAGRAPH_SEPARATOR

Zp.

U_PRIVATE_USE_CHAR

Co.

U_SPACE_SEPARATOR

Zs.

U_START_PUNCTUATION

Ps.

U_SURROGATE

Cs.

U_TITLECASE_LETTER

Lt.

U_UNASSIGNED

Non-category for unassigned and non-character code points.

U_UPPERCASE_LETTER

Lu.

UCharDirection

Declared in unicode/uchar.h
 UCharDirection

This specifies the language directional property of a character set.

Properties
U_ARABIC_NUMBER

AN.

U_BLOCK_SEPARATOR

B.

U_BOUNDARY_NEUTRAL

BN.

U_CHAR_DIRECTION_COUNT

One more than the highest UCharDirection value.

The highest value is available via u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS).

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

U_COMMON_NUMBER_SEPARATOR

CS.

U_DIR_NON_SPACING_MARK

NSM.

U_EUROPEAN_NUMBER

EN.

U_EUROPEAN_NUMBER_SEPARATOR

ES.

U_EUROPEAN_NUMBER_TERMINATOR

ET.

U_FIRST_STRONG_ISOLATE

FSI.

U_LEFT_TO_RIGHT

L.

U_LEFT_TO_RIGHT_EMBEDDING

LRE.

U_LEFT_TO_RIGHT_ISOLATE

LRI.

U_LEFT_TO_RIGHT_OVERRIDE

LRO.

U_OTHER_NEUTRAL

ON.

U_POP_DIRECTIONAL_FORMAT

PDF.

U_POP_DIRECTIONAL_ISOLATE

PDI.

U_RIGHT_TO_LEFT

R.

U_RIGHT_TO_LEFT_ARABIC

AL.

U_RIGHT_TO_LEFT_EMBEDDING

RLE.

U_RIGHT_TO_LEFT_ISOLATE

RLI.

U_RIGHT_TO_LEFT_OVERRIDE

RLO.

U_SEGMENT_SEPARATOR

S.

U_WHITE_SPACE_NEUTRAL

WS.

UCharNameChoice

Declared in unicode/uchar.h
 UCharNameChoice

Selector constants for u_charName().

u_charName() returns the "modern" name of a Unicode character; or the name that was defined in Unicode version 1.0, before the Unicode standard merged with ISO-10646; or an "extended" name that gives each Unicode code point a unique name.

See also: u_charName

Properties
U_CHAR_NAME_ALIAS

Corrected name from NameAliases.txt.

U_CHAR_NAME_CHOICE_COUNT

One more than the highest normal UCharNameChoice value.

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

U_EXTENDED_CHAR_NAME

Standard or synthetic character name.

U_UNICODE_10_CHAR_NAME

The Unicode_1_Name property value which is of little practical value.

Beginning with ICU 49, ICU APIs return an empty string for this name choice. Deprecated. ICU 49

U_UNICODE_CHAR_NAME

Unicode character name (Name property).

UColAttribute

Declared in unicode/ucol.h
 UColAttribute

Attributes that collation service understands.

All the attributes can take UCOL_DEFAULT value, as well as the values specific to each one.

Properties
UCOL_ALTERNATE_HANDLING

Attribute for handling variable elements.

Acceptable values are UCOL_NON_IGNORABLE which treats all the codepoints with non-ignorable primary weights in the same way, and UCOL_SHIFTED which causes codepoints with primary weights that are equal or below the variable top value to be ignored on primary level and moved to the quaternary level. The default setting in a Collator object depends on the locale data loaded from the resources. For most locales, the default is UCOL_NON_IGNORABLE, but for others, such as "th", the default could be UCOL_SHIFTED.

UCOL_ATTRIBUTE_COUNT

One more than the highest normal UColAttribute value.

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

UCOL_CASE_FIRST

Controls the ordering of upper and lower case letters.

Acceptable values are UCOL_OFF, which orders upper and lower case letters in accordance to their tertiary weights, UCOL_UPPER_FIRST which forces upper case letters to sort before lower case letters, and UCOL_LOWER_FIRST which does the opposite. The default setting in a Collator object depends on the locale data loaded from the resources. For most locales, the default is UCOL_OFF, but for others, such as "da" or "mt", the default could be UCOL_UPPER.

UCOL_CASE_LEVEL

Controls whether an extra case level (positioned before the third level) is generated or not.

Acceptable values are UCOL_OFF, when case level is not generated, and UCOL_ON which causes the case level to be generated. Contents of the case level are affected by the value of UCOL_CASE_FIRST attribute. A simple way to ignore accent differences in a string is to set the strength to UCOL_PRIMARY and enable case level. The default setting in a Collator object depends on the locale data loaded from the resources.

UCOL_DECOMPOSITION_MODE

An alias for UCOL_NORMALIZATION_MODE attribute.

UCOL_FRENCH_COLLATION

Attribute for direction of secondary weights - used in Canadian French.

Acceptable values are UCOL_ON, which results in secondary weights being considered backwards and UCOL_OFF which treats secondary weights in the order they appear.

UCOL_HIRAGANA_QUATERNARY_MODE

When turned on, this attribute positions Hiragana before all non-ignorables on quaternary level This is a sneaky way to produce JIS sort order.

This attribute was an implementation detail of the CLDR Japanese tailoring. Since ICU 50, this attribute is not settable any more via API functions. Since CLDR 25/ICU 53, explicit quaternary relations are used to achieve the same Japanese sort order.

Deprecated. ICU 50 Implementation detail, cannot be set via API, was removed from implementation.

UCOL_NORMALIZATION_MODE

Controls whether the normalization check and necessary normalizations are performed.

When set to UCOL_OFF no normalization check is performed. The correctness of the result is guaranteed only if the input data is in so-called FCD form (see users manual for more info). When set to UCOL_ON, an incremental check is performed to see whether the input data is in the FCD form. If the data is not in the FCD form, incremental NFD normalization is performed. The default setting in a Collator object depends on the locale data loaded from the resources. For many locales, the default is UCOL_OFF, but for others, such as "hi" "vi', or "bn", * the default could be UCOL_ON.

UCOL_NUMERIC_COLLATION

When turned on, this attribute makes substrings of digits sort according to their numeric values.

This is a way to get '100' to sort AFTER '2'. Note that the longest digit substring that can be treated as a single unit is 254 digits (not counting leading zeros). If a digit substring is longer than that, the digits beyond the limit will be treated as a separate digit substring.

A "digit" in this sense is a code point with General_Category=Nd, which does not include circled numbers, roman numerals, etc. Only a contiguous digit substring is considered, that is, non-negative integers without separators. There is no support for plus/minus signs, decimals, exponents, etc.

UCOL_STRENGTH

The strength attribute.

Can be either UCOL_PRIMARY, UCOL_SECONDARY, UCOL_TERTIARY, UCOL_QUATERNARY or UCOL_IDENTICAL. The usual strength for most locales (except Japanese) is tertiary.

Quaternary strength is useful when combined with shifted setting for alternate handling attribute and for JIS X 4061 collation, when it is used to distinguish between Katakana and Hiragana. Otherwise, quaternary level is affected only by the number of non-ignorable code points in the string.

Identical strength is rarely useful, as it amounts to codepoints of the NFD form of the string.

UColAttributeValue

Declared in unicode/ucol.h
 UColAttributeValue

Enum containing attribute values for controlling collation behavior.

Here are all the allowable values. Not every attribute can take every value. The only universal value is UCOL_DEFAULT, which resets the attribute value to the predefined value for that locale

Properties
UCOL_ATTRIBUTE_VALUE_COUNT

One more than the highest normal UColAttributeValue value.

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

UCOL_CE_STRENGTH_LIMIT
UCOL_DEFAULT

accepted by most attributes

UCOL_DEFAULT_STRENGTH

Default collation strength.

UCOL_IDENTICAL

Identical collation strength.

UCOL_LOWER_FIRST

Valid for UCOL_CASE_FIRST - lower case sorts before upper case.

UCOL_NON_IGNORABLE

Valid for UCOL_ALTERNATE_HANDLING.

Alternate handling will be non ignorable

UCOL_OFF

Turn the feature off - works for UCOL_FRENCH_COLLATION, UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE & UCOL_DECOMPOSITION_MODE.

UCOL_ON

Turn the feature on - works for UCOL_FRENCH_COLLATION, UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE & UCOL_DECOMPOSITION_MODE.

UCOL_PRIMARY

Primary collation strength.

UCOL_QUATERNARY

Quaternary collation strength.

UCOL_SECONDARY

Secondary collation strength.

UCOL_SHIFTED

Valid for UCOL_ALTERNATE_HANDLING.

Alternate handling will be shifted

UCOL_STRENGTH_LIMIT
UCOL_TERTIARY

Tertiary collation strength.

UCOL_UPPER_FIRST

upper case sorts before lower case

UColBoundMode

Declared in unicode/ucol.h
 UColBoundMode

enum that is taken by ucol_getBound API See below for explanation do not change the values assigned to the members of this enum.

Underlying code depends on them having these numbers

Properties
UCOL_BOUND_LOWER

lower bound

UCOL_BOUND_UPPER

upper bound that will match strings of exact size

UCOL_BOUND_UPPER_LONG

upper bound that will match all the strings that have the same initial substring as the given string

UCOL_BOUND_VALUE_COUNT

One more than the highest normal UColBoundMode value.

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

UColReorderCode

Declared in unicode/ucol.h
 UColReorderCode

Enum containing the codes for reordering segments of the collation table that are not script codes.

These reordering codes are to be used in conjunction with the script codes. See also:ucol_getReorderCodesSee also:ucol_setReorderCodesSee also:ucol_getEquivalentReorderCodesSee also:UScriptCode

Properties
UCOL_REORDER_CODE_CURRENCY

Characters with the currency property.

This is equivalent to the rule value "currency".

UCOL_REORDER_CODE_DEFAULT

A special reordering code that is used to specify the default reordering codes for a locale.

UCOL_REORDER_CODE_DIGIT

Characters with the digit property.

This is equivalent to the rule value "digit".

UCOL_REORDER_CODE_FIRST

The first entry in the enumeration of reordering groups.

This is intended for use in range checking and enumeration of the reorder codes.

UCOL_REORDER_CODE_LIMIT

One more than the highest normal UColReorderCode value.

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

UCOL_REORDER_CODE_NONE

A special reordering code that is used to specify no reordering codes.

UCOL_REORDER_CODE_OTHERS

A special reordering code that is used to specify all other codes used for reordering except for the codes lised as UColReorderCode values and those listed explicitly in a reordering.

UCOL_REORDER_CODE_PUNCTUATION

Characters with the punctuation property.

This is equivalent to the rule value "punct".

UCOL_REORDER_CODE_SPACE

Characters with the space property.

This is equivalent to the rule value "space".

UCOL_REORDER_CODE_SYMBOL

Characters with the symbol property.

This is equivalent to the rule value "symbol".

UCollationResult

Declared in unicode/ucol.h
 UCollationResult

UCOL_LESS is returned if source string is compared to be less than target string in the ucol_strcoll() method.

UCOL_EQUAL is returned if source string is compared to be equal to target string in the ucol_strcoll() method. UCOL_GREATER is returned if source string is compared to be greater than target string in the ucol_strcoll() method. See also:ucol_strcoll() Possible values for a comparison result

Properties
UCOL_EQUAL

string a == string b

UCOL_GREATER

string a > string b

UCOL_LESS

string a < string b

UDecompositionType

Declared in unicode/uchar.h
 UDecompositionType

Decomposition Type constants.

See also: UCHAR_DECOMPOSITION_TYPE

Properties
U_DT_CANONICAL
U_DT_CIRCLE
U_DT_COMPAT
U_DT_COUNT

One more than the highest normal UDecompositionType value.

The highest value is available via u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE).

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

U_DT_FINAL
U_DT_FONT
U_DT_FRACTION
U_DT_INITIAL
U_DT_ISOLATED
U_DT_MEDIAL
U_DT_NARROW
U_DT_NOBREAK
U_DT_NONE
U_DT_SMALL
U_DT_SQUARE
U_DT_SUB
U_DT_SUPER
U_DT_VERTICAL
U_DT_WIDE

UDialectHandling

Declared in unicode/uldnames.h
 UDialectHandling

Enum used in LocaleDisplayNames::createInstance.

Properties
ULDN_DIALECT_NAMES

Use dialect names, when generating a locale name, e.g.

en_GB displays as 'British English'.

ULDN_STANDARD_NAMES

Use standard names when generating a locale name, e.g.

en_GB displays as 'English (United Kingdom)'.

UDisplayContext

Declared in unicode/udisplaycontext.h
 UDisplayContext

Display context settings.

Note, the specific numeric values are internal and may change.

Properties
UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE

The capitalization context if a date, date symbol or display name is to be formatted with capitalization appropriate for the beginning of a sentence.

UDISPCTX_CAPITALIZATION_FOR_MIDDLE_OF_SENTENCE

The capitalization context if a date, date symbol or display name is to be formatted with capitalization appropriate for the middle of a sentence.

UDISPCTX_CAPITALIZATION_FOR_STANDALONE

The capitalization context if a date, date symbol or display name is to be formatted with capitalization appropriate for stand-alone usage such as an isolated name on a calendar page.

UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU

The capitalization context if a date, date symbol or display name is to be formatted with capitalization appropriate for a user-interface list or menu item.

UDISPCTX_CAPITALIZATION_NONE


CAPITALIZATION can be set to one of UDISPCTX_CAPITALIZATION_NONE, UDISPCTX_CAPITALIZATION_FOR_MIDDLE_OF_SENTENCE, UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE, UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU, or UDISPCTX_CAPITALIZATION_FOR_STANDALONE.

Use UDisplayContextType UDISPCTX_TYPE_CAPITALIZATION to get the value. The capitalization context to be used is unknown (this is the default value).

UDISPCTX_DIALECT_NAMES

A possible setting for DIALECT_HANDLING: use dialect names, when generating a locale name, e.g.

en_GB displays as 'British English'.

UDISPCTX_LENGTH_FULL


DISPLAY_LENGTH can be set to one of UDISPCTX_LENGTH_FULL or UDISPCTX_LENGTH_SHORT.

Use UDisplayContextType UDISPCTX_TYPE_DISPLAY_LENGTH to get the value. A possible setting for DISPLAY_LENGTH: use full names when generating a locale name, e.g. "United States" for US.

UDISPCTX_LENGTH_SHORT

A possible setting for DISPLAY_LENGTH: use short names when generating a locale name, e.g.

"U.S." for US.

UDISPCTX_NO_SUBSTITUTE

A possible setting for SUBSTITUTE_HANDLING: Returns a null value with error code set to U_ILLEGAL_ARGUMENT_ERROR when no data is available.

UDISPCTX_STANDARD_NAMES


DIALECT_HANDLING can be set to one of UDISPCTX_STANDARD_NAMES or UDISPCTX_DIALECT_NAMES.

Use UDisplayContextType UDISPCTX_TYPE_DIALECT_HANDLING to get the value. A possible setting for DIALECT_HANDLING: use standard names when generating a locale name, e.g. en_GB displays as 'English (United Kingdom)'.

UDISPCTX_SUBSTITUTE


SUBSTITUTE_HANDLING can be set to one of UDISPCTX_SUBSTITUTE or UDISPCTX_NO_SUBSTITUTE.

Use UDisplayContextType UDISPCTX_TYPE_SUBSTITUTE_HANDLING to get the value. A possible setting for SUBSTITUTE_HANDLING: Returns a fallback value (e.g., the input code) when no data is available. This is the default value.

UDisplayContextType

Declared in unicode/udisplaycontext.h
 UDisplayContextType

Display context types, for getting values of a particular setting.

Note, the specific numeric values are internal and may change.

Properties
UDISPCTX_TYPE_CAPITALIZATION

Type to retrieve the capitalization context setting, e.g.

UDISPCTX_CAPITALIZATION_NONE, UDISPCTX_CAPITALIZATION_FOR_MIDDLE_OF_SENTENCE, UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE, etc.

UDISPCTX_TYPE_DIALECT_HANDLING

Type to retrieve the dialect handling setting, e.g.

UDISPCTX_STANDARD_NAMES or UDISPCTX_DIALECT_NAMES.

UDISPCTX_TYPE_DISPLAY_LENGTH

Type to retrieve the display length setting, e.g.

UDISPCTX_LENGTH_FULL, UDISPCTX_LENGTH_SHORT.

UDISPCTX_TYPE_SUBSTITUTE_HANDLING

Type to retrieve the substitute handling setting, e.g.

UDISPCTX_SUBSTITUTE, UDISPCTX_NO_SUBSTITUTE.

UEastAsianWidth

Declared in unicode/uchar.h
 UEastAsianWidth

East Asian Width constants.

See also: UCHAR_EAST_ASIAN_WIDTH See also: u_getIntPropertyValue

Properties
U_EA_AMBIGUOUS
U_EA_COUNT

One more than the highest normal UEastAsianWidth value.

The highest value is available via u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH).

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

U_EA_FULLWIDTH
U_EA_HALFWIDTH
U_EA_NARROW
U_EA_NEUTRAL
U_EA_WIDE

UErrorCode

Declared in unicode/utypes.h
 UErrorCode

Standard ICU4C error code type, a substitute for exceptions.

Initialize the UErrorCode with U_ZERO_ERROR, and check for success or failure using U_SUCCESS() or U_FAILURE():

UErrorCode errorCode = U_ZERO_ERROR;
// call ICU API that needs an error code parameter.
if (U_FAILURE(errorCode)) {
    // An error occurred. Handle it here.
}

C++ code should use icu::ErrorCode, available in unicode/errorcode.h, or a suitable subclass.

For more information, see: https://unicode-org.github.io/icu/userguide/dev/codingguidelines#details-about-icu-error-codes

Note: By convention, ICU functions that take a reference (C++) or a pointer (C) to a UErrorCode first test:

if (U_FAILURE(errorCode)) { return immediately; }

so that in a chain of such functions the first one that sets an error code causes the following ones to not perform any operations.

Properties
U_AMBIGUOUS_ALIAS_WARNING

This converter alias can go to different converter implementations.

U_ARGUMENT_TYPE_MISMATCH

Argument name and argument index mismatch in MessageFormat functions.

U_BAD_VARIABLE_DEFINITION

Missing '$' or duplicate variable name.

U_BRK_ASSIGN_ERROR

Syntax error in RBBI rule assignment statement.

U_BRK_ERROR_LIMIT

One more than the highest normal BreakIterator error code.

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

U_BRK_ERROR_START

Start of codes indicating Break Iterator failures.

U_BRK_HEX_DIGITS_EXPECTED

Hex digits expected as part of a escaped char in a rule.

U_BRK_INIT_ERROR

Initialization failure.

Probable missing ICU Data.

U_BRK_INTERNAL_ERROR

An internal error (bug) was detected.

U_BRK_MALFORMED_RULE_TAG

The {nnn} tag on a rule is malformed.

U_BRK_MISMATCHED_PAREN

Mis-matched parentheses in an RBBI rule.

U_BRK_NEW_LINE_IN_QUOTED_STRING

Missing closing quote in an RBBI rule.

U_BRK_RULE_EMPTY_SET

Rule contains an empty Unicode Set.

U_BRK_RULE_SYNTAX

Syntax error in RBBI rule.

U_BRK_SEMICOLON_EXPECTED

Missing ';' at the end of a RBBI rule.

U_BRK_UNCLOSED_SET

UnicodeSet writing an RBBI rule missing a closing ']'.

U_BRK_UNDEFINED_VARIABLE

Use of an undefined $Variable in an RBBI rule.

U_BRK_UNRECOGNIZED_OPTION

!!option in RBBI rules not recognized.

U_BRK_VARIABLE_REDFINITION

RBBI rule $Variable redefined.

U_BUFFER_OVERFLOW_ERROR

A result would not fit in the supplied buffer.

U_CE_NOT_FOUND_ERROR

Currently used only while setting variable top, but can be used generally.

U_COLLATOR_VERSION_MISMATCH

Collator version is not compatible with the base version.

U_DECIMAL_NUMBER_SYNTAX_ERROR

Decimal number syntax error.

U_DEFAULT_KEYWORD_MISSING

Missing DEFAULT rule in plural rules.

U_DIFFERENT_UCA_VERSION

ucol_open encountered a mismatch between UCA version and collator image version, so the collator was constructed from rules.

No impact to further function

U_DUPLICATE_KEYWORD

Duplicate keyword in PluralFormat.

U_ENUM_OUT_OF_SYNC_ERROR

UEnumeration out of sync with underlying collection.

U_ERROR_LIMIT

One more than the highest normal error code.

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

U_ERROR_WARNING_LIMIT

One more than the highest normal UErrorCode warning value.

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

U_ERROR_WARNING_START

Start of information results (semantically successful)

U_FILE_ACCESS_ERROR

The requested file cannot be found.

U_FMT_PARSE_ERROR_LIMIT

One more than the highest normal formatting API error code.

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

U_FMT_PARSE_ERROR_START

Start of format library errors.

U_FORMAT_INEXACT_ERROR

Cannot format a number exactly and rounding mode is ROUND_UNNECESSARY.

U_IDNA_ACE_PREFIX_ERROR
U_IDNA_CHECK_BIDI_ERROR
U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR
U_IDNA_ERROR_LIMIT

One more than the highest normal IDNA error code.

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

U_IDNA_ERROR_START
U_IDNA_LABEL_TOO_LONG_ERROR
U_IDNA_PROHIBITED_ERROR
U_IDNA_STD3_ASCII_RULES_ERROR
U_IDNA_UNASSIGNED_ERROR
U_IDNA_VERIFICATION_ERROR
U_IDNA_ZERO_LENGTH_LABEL_ERROR
U_ILLEGAL_ARGUMENT_ERROR

Start of codes indicating failure.

U_ILLEGAL_CHARACTER

A special character is outside its allowed context.

U_ILLEGAL_CHAR_FOUND

Character conversion: Illegal input sequence/combination of input units.

U_ILLEGAL_CHAR_IN_SEGMENT

UNUSED as of ICU 2.4.

U_ILLEGAL_ESCAPE_SEQUENCE

ISO-2022 illegal escape sequence.

U_ILLEGAL_PAD_POSITION

Pad symbol misplaced in number pattern.

U_INDEX_OUTOFBOUNDS_ERROR

Trying to access the index that is out of bounds.

U_INPUT_TOO_LONG_ERROR

The input is impractically long for an operation.

It is rejected because it may lead to problems such as excessive processing time, stack depth, or heap memory requirements.

U_INTERNAL_PROGRAM_ERROR

Indicates a bug in the library code.

U_INTERNAL_TRANSLITERATOR_ERROR

Internal transliterator system error.

U_INVALID_CHAR_FOUND

Character conversion: Unmappable input sequence.

In other APIs: Invalid character.

U_INVALID_FORMAT_ERROR

Data format is not what is expected.

U_INVALID_FUNCTION

A "&fn()" rule specifies an unknown transliterator.

U_INVALID_ID

A "::id" rule specifies an unknown transliterator.

U_INVALID_PROPERTY_PATTERN

UNUSED as of ICU 2.4.

U_INVALID_RBT_SYNTAX

A "::id" rule was passed to the RuleBasedTransliterator parser.

U_INVALID_STATE_ERROR

Requested operation can not be completed with ICU in its current state.

U_INVALID_TABLE_FILE

Conversion table file not found.

U_INVALID_TABLE_FORMAT

Conversion table file found, but corrupted.

U_INVARIANT_CONVERSION_ERROR

Unable to convert a UChar* string to char* with the invariant converter.

U_MALFORMED_EXPONENTIAL_PATTERN

Grouping symbol in exponent pattern.

U_MALFORMED_PRAGMA

A 'use' pragma is invalid.

U_MALFORMED_RULE

Elements of a rule are misplaced.

U_MALFORMED_SET

A UnicodeSet pattern is invalid.

U_MALFORMED_SYMBOL_REFERENCE

UNUSED as of ICU 2.4.

U_MALFORMED_UNICODE_ESCAPE

A Unicode escape pattern is invalid.

U_MALFORMED_VARIABLE_DEFINITION

A variable definition is invalid.

U_MALFORMED_VARIABLE_REFERENCE

A variable reference is invalid.

U_MEMORY_ALLOCATION_ERROR

Memory allocation error.

U_MESSAGE_PARSE_ERROR

Unable to parse a message (message format)

U_MF_DUPLICATE_DECLARATION_ERROR

The same variable is declared in more than one .local or .input declaration.

Deprecated. This API is for technology preview only.

U_MF_DUPLICATE_OPTION_NAME_ERROR

In an annotation, the same option name appears more than once.

This API is for internal use only. ICU 75 technology preview Deprecated. This API is for technology preview only.

U_MF_DUPLICATE_VARIANT_ERROR

A message includes a variant with the same key list as another variant.

Deprecated. This API is for technology preview only.

U_MF_FORMATTING_ERROR

Covers all runtime errors: for example, an internally inconsistent set of options.

Deprecated. This API is for technology preview only.

U_MF_MISSING_SELECTOR_ANNOTATION_ERROR

A selector expression evaluates to an unannotated operand.

Deprecated. This API is for technology preview only.

U_MF_NONEXHAUSTIVE_PATTERN_ERROR

In a match-construct, the variants do not cover all possible values.

This API is for internal use only. ICU 75 technology preview Deprecated. This API is for technology preview only.

U_MF_OPERAND_MISMATCH_ERROR

An operand provided to a function does not have the required form for that function.

This API is for internal use only. ICU 75 technology preview Deprecated. This API is for technology preview only.

U_MF_SELECTOR_ERROR

A selector function is applied to an operand of the wrong type.

This API is for internal use only. ICU 75 technology preview Deprecated. This API is for technology preview only.

U_MF_SYNTAX_ERROR

Includes all syntax errors.

This API is for internal use only. ICU 75 technology preview Deprecated. This API is for technology preview only.

U_MF_UNKNOWN_FUNCTION_ERROR

An annotation refers to a function not defined by the standard or custom function registry.

This API is for internal use only. ICU 75 technology preview Deprecated. This API is for technology preview only.

U_MF_UNRESOLVED_VARIABLE_ERROR

A variable is referred to but not bound by any definition.

This API is for internal use only. ICU 75 technology preview Deprecated. This API is for technology preview only.

U_MF_VARIANT_KEY_MISMATCH_ERROR

In a match-construct, one or more variants had a different number of keys from the number of selectors.

This API is for internal use only. ICU 75 technology preview Deprecated. This API is for technology preview only.

U_MISMATCHED_SEGMENT_DELIMITERS

UNUSED as of ICU 2.4.

U_MISPLACED_ANCHOR_START

A start anchor appears at an illegal position.

U_MISPLACED_COMPOUND_FILTER

A compound filter is in an invalid location.

U_MISPLACED_CURSOR_OFFSET

A cursor offset occurs at an illegal position.

U_MISPLACED_QUANTIFIER

A quantifier appears after a segment close delimiter.

U_MISSING_OPERATOR

A rule contains no operator.

U_MISSING_RESOURCE_ERROR

The requested resource cannot be found.

U_MISSING_SEGMENT_CLOSE

UNUSED as of ICU 2.4.

U_MULTIPLE_ANTE_CONTEXTS

More than one ante context.

U_MULTIPLE_COMPOUND_FILTERS

More than one compound filter.

U_MULTIPLE_CURSORS

More than one cursor.

U_MULTIPLE_DECIMAL_SEPARATORS

More than one decimal separator in number pattern.

U_MULTIPLE_DECIMAL_SEPERATORS

Typo: kept for backward compatibility.

Use U_MULTIPLE_DECIMAL_SEPARATORS

U_MULTIPLE_EXPONENTIAL_SYMBOLS

More than one exponent symbol in number pattern.

U_MULTIPLE_PAD_SPECIFIERS

More than one pad symbol in number pattern.

U_MULTIPLE_PERCENT_SYMBOLS

More than one percent symbol in number pattern.

U_MULTIPLE_PERMILL_SYMBOLS

More than one permill symbol in number pattern.

U_MULTIPLE_POST_CONTEXTS

More than one post context.

U_NO_SPACE_AVAILABLE

No space available for in-buffer expansion for Arabic shaping.

U_NO_WRITE_PERMISSION

Attempt to modify read-only or constant data.

U_NUMBER_ARG_OUTOFBOUNDS_ERROR

The argument to a NumberFormatter helper method was out of bounds; the bounds are usually 0 to 999.

U_NUMBER_SKELETON_SYNTAX_ERROR

The number skeleton passed to C++ NumberFormatter or C UNumberFormatter was invalid or contained a syntax error.

U_PARSE_ERROR

Equivalent to Java ParseException.

U_PARSE_ERROR_LIMIT

One more than the highest normal Transliterator error code.

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

U_PARSE_ERROR_START

Start of Transliterator errors.

U_PATTERN_SYNTAX_ERROR

Syntax error in format pattern.

U_PLUGIN_CHANGED_LEVEL_WARNING

A plugin caused a level change.

May not be an error, but later plugins may not load.

U_PLUGIN_DIDNT_SET_LEVEL

The plugin didn't call uplug_setPlugLevel in response to a QUERY.

U_PLUGIN_ERROR_LIMIT

One more than the highest normal plug-in error code.

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

U_PLUGIN_ERROR_START

Start of codes indicating plugin failures.

U_PLUGIN_TOO_HIGH

The plugin's level is too high to be loaded right now.

U_PRIMARY_TOO_LONG_ERROR

User tried to set variable top to a primary that is longer than two bytes.

U_REGEX_BAD_ESCAPE_SEQUENCE

Unrecognized backslash escape sequence in pattern.

U_REGEX_BAD_INTERVAL

Error in {min,max} interval.

U_REGEX_ERROR_LIMIT

One more than the highest normal regular expression error code.

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

U_REGEX_ERROR_START

Start of codes indicating Regexp failures.

U_REGEX_INTERNAL_ERROR

An internal error (bug) was detected.

U_REGEX_INVALID_BACK_REF

Back-reference to a non-existent capture group.

U_REGEX_INVALID_CAPTURE_GROUP_NAME

Invalid capture group name.

U_REGEX_INVALID_FLAG

Invalid value for match mode flags.

U_REGEX_INVALID_RANGE

In a character range [x-y], x is greater than y.

U_REGEX_INVALID_STATE

RegexMatcher in invalid state for requested operation.

U_REGEX_LOOK_BEHIND_LIMIT

Look-Behind pattern matches must have a bounded maximum length.

U_REGEX_MAX_LT_MIN

In {min,max}, max is less than min.

U_REGEX_MISMATCHED_PAREN

Incorrectly nested parentheses in regexp pattern.

U_REGEX_MISSING_CLOSE_BRACKET

Missing closing bracket on a bracket expression.

U_REGEX_NUMBER_TOO_BIG

Decimal number is too large.

U_REGEX_OCTAL_TOO_BIG

Octal character constants must be <= 0377.

Deprecated. ICU 54. This error cannot occur.

U_REGEX_PATTERN_TOO_BIG

Pattern exceeds limits on size or complexity.

U_REGEX_PROPERTY_SYNTAX

Incorrect Unicode property.

U_REGEX_RULE_SYNTAX

Syntax error in regexp pattern.

U_REGEX_SET_CONTAINS_STRING

Regexps cannot have UnicodeSets containing strings.

U_REGEX_STACK_OVERFLOW

Regular expression backtrack stack overflow.

U_REGEX_STOPPED_BY_CALLER

Matching operation aborted by user callback fn.

U_REGEX_TIME_OUT

Maximum allowed match time exceeded.

U_REGEX_UNIMPLEMENTED

Use of regexp feature that is not yet implemented.

U_RESOURCE_TYPE_MISMATCH

an operation is requested over a resource that does not support it

U_RULE_MASK_ERROR

A rule is hidden by an earlier more general rule.

U_SAFECLONE_ALLOCATED_WARNING

A SafeClone operation required allocating memory (informational only)

U_SORT_KEY_TOO_SHORT_WARNING

Number of levels requested in getBound is higher than the number of levels in the sort key.

U_STANDARD_ERROR_LIMIT

One more than the highest standard error code.

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

U_STATE_OLD_WARNING

ICU has to use compatibility layer to construct the service.

Expect performance/memory usage degradation. Consider upgrading

U_STATE_TOO_OLD_ERROR

ICU cannot construct a service from this state, as it is no longer supported.

U_STRINGPREP_CHECK_BIDI_ERROR
U_STRINGPREP_PROHIBITED_ERROR
U_STRINGPREP_UNASSIGNED_ERROR
U_STRING_NOT_TERMINATED_WARNING

An output string could not be NUL-terminated because output length==destCapacity.

U_TOO_MANY_ALIASES_ERROR

There are too many aliases in the path to the requested resource.

It is very possible that a circular alias definition has occurred

U_TRAILING_BACKSLASH

A dangling backslash.

U_TRUNCATED_CHAR_FOUND

Character conversion: Incomplete input sequence.

U_UNCLOSED_SEGMENT

A closing ')' is missing.

U_UNDEFINED_KEYWORD

Undefined Plural keyword.

U_UNDEFINED_SEGMENT_REFERENCE

A segment reference does not correspond to a defined segment.

U_UNDEFINED_VARIABLE

A variable reference does not correspond to a defined variable.

U_UNEXPECTED_TOKEN

Syntax error in format pattern.

U_UNMATCHED_BRACES

Braces do not match in message pattern.

U_UNQUOTED_SPECIAL

A special character was not quoted or escaped.

U_UNSUPPORTED_ATTRIBUTE

UNUSED as of ICU 2.4.

U_UNSUPPORTED_ERROR

Requested operation not supported in current context.

U_UNSUPPORTED_ESCAPE_SEQUENCE

ISO-2022 unsupported escape sequence.

U_UNSUPPORTED_PROPERTY

UNUSED as of ICU 2.4.

U_UNTERMINATED_QUOTE

A closing single quote is missing.

U_USELESS_COLLATOR_ERROR

Collator is options only and no base is specified.

U_USING_DEFAULT_WARNING

A resource bundle lookup returned a result from the root locale (not an error)

U_USING_FALLBACK_WARNING

A resource bundle lookup returned a fallback result (not an error)

U_VARIABLE_RANGE_EXHAUSTED

Too many stand-ins generated for the given variable range.

U_VARIABLE_RANGE_OVERLAP

The variable range overlaps characters used in rules.

U_ZERO_ERROR

No error, no warning.

UGraphemeClusterBreak

Declared in unicode/uchar.h
 UGraphemeClusterBreak

Grapheme Cluster Break constants.

See also: UCHAR_GRAPHEME_CLUSTER_BREAK

Properties
U_GCB_CONTROL
U_GCB_COUNT

One more than the highest normal UGraphemeClusterBreak value.

The highest value is available via u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK).

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

U_GCB_CR
U_GCB_EXTEND
U_GCB_E_BASE

U_GCB_E_BASE_GAZ

U_GCB_E_MODIFIER

U_GCB_GLUE_AFTER_ZWJ

U_GCB_L
U_GCB_LF
U_GCB_LV
U_GCB_LVT
U_GCB_OTHER
U_GCB_PREPEND

U_GCB_REGIONAL_INDICATOR

U_GCB_SPACING_MARK

U_GCB_T
U_GCB_V
U_GCB_ZWJ

UHangulSyllableType

Declared in unicode/uchar.h
 UHangulSyllableType

Hangul Syllable Type constants.

See also: UCHAR_HANGUL_SYLLABLE_TYPE

Properties
U_HST_COUNT

One more than the highest normal UHangulSyllableType value.

The highest value is available via u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE).

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

U_HST_LEADING_JAMO
U_HST_LVT_SYLLABLE
U_HST_LV_SYLLABLE
U_HST_NOT_APPLICABLE
U_HST_TRAILING_JAMO
U_HST_VOWEL_JAMO

UIdentifierStatus

Declared in unicode/uchar.h
 UIdentifierStatus

UIndicConjunctBreak

Declared in unicode/uchar.h
 UIndicConjunctBreak

UJoiningGroup

Declared in unicode/uchar.h
 UJoiningGroup

Joining Group constants.

See also: UCHAR_JOINING_GROUP

Properties
U_JG_AFRICAN_FEH

U_JG_AFRICAN_NOON

U_JG_AFRICAN_QAF

U_JG_AIN
U_JG_ALAPH
U_JG_ALEF
U_JG_BEH
U_JG_BETH
U_JG_BURUSHASKI_YEH_BARREE

U_JG_COUNT

One more than the highest normal UJoiningGroup value.

The highest value is available via u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP).

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

U_JG_DAL
U_JG_DALATH_RISH
U_JG_E
U_JG_FARSI_YEH

U_JG_FE

U_JG_FEH
U_JG_FINAL_SEMKATH
U_JG_GAF
U_JG_GAMAL
U_JG_HAH
U_JG_HAMZA_ON_HEH_GOAL
U_JG_HANIFI_ROHINGYA_KINNA_YA

U_JG_HANIFI_ROHINGYA_PA

U_JG_HE
U_JG_HEH
U_JG_HEH_GOAL
U_JG_HETH
U_JG_KAF
U_JG_KAPH
U_JG_KASHMIRI_YEH

U_JG_KHAPH

U_JG_KNOTTED_HEH
U_JG_LAM
U_JG_LAMADH
U_JG_MALAYALAM_BHA

U_JG_MALAYALAM_JA

U_JG_MALAYALAM_LLA

U_JG_MALAYALAM_LLLA

U_JG_MALAYALAM_NGA

U_JG_MALAYALAM_NNA

U_JG_MALAYALAM_NNNA

U_JG_MALAYALAM_NYA

U_JG_MALAYALAM_RA

U_JG_MALAYALAM_SSA

U_JG_MALAYALAM_TTA

U_JG_MANICHAEAN_ALEPH

U_JG_MANICHAEAN_AYIN

U_JG_MANICHAEAN_BETH

U_JG_MANICHAEAN_DALETH

U_JG_MANICHAEAN_DHAMEDH

U_JG_MANICHAEAN_FIVE

U_JG_MANICHAEAN_GIMEL

U_JG_MANICHAEAN_HETH

U_JG_MANICHAEAN_HUNDRED

U_JG_MANICHAEAN_KAPH

U_JG_MANICHAEAN_LAMEDH

U_JG_MANICHAEAN_MEM

U_JG_MANICHAEAN_NUN

U_JG_MANICHAEAN_ONE

U_JG_MANICHAEAN_PE

U_JG_MANICHAEAN_QOPH

U_JG_MANICHAEAN_RESH

U_JG_MANICHAEAN_SADHE

U_JG_MANICHAEAN_SAMEKH

U_JG_MANICHAEAN_TAW

U_JG_MANICHAEAN_TEN

U_JG_MANICHAEAN_TETH

U_JG_MANICHAEAN_THAMEDH

U_JG_MANICHAEAN_TWENTY

U_JG_MANICHAEAN_WAW

U_JG_MANICHAEAN_YODH

U_JG_MANICHAEAN_ZAYIN

U_JG_MEEM
U_JG_MIM
U_JG_NOON
U_JG_NO_JOINING_GROUP
U_JG_NUN
U_JG_NYA

U_JG_PE
U_JG_QAF
U_JG_QAPH
U_JG_REH
U_JG_REVERSED_PE
U_JG_ROHINGYA_YEH

U_JG_SAD
U_JG_SADHE
U_JG_SEEN
U_JG_SEMKATH
U_JG_SHIN
U_JG_STRAIGHT_WAW

U_JG_SWASH_KAF
U_JG_SYRIAC_WAW
U_JG_TAH
U_JG_TAW
U_JG_TEH_MARBUTA
U_JG_TEH_MARBUTA_GOAL

U_JG_TETH
U_JG_THIN_YEH

U_JG_VERTICAL_TAIL

U_JG_WAW
U_JG_YEH
U_JG_YEH_BARREE
U_JG_YEH_WITH_TAIL
U_JG_YUDH
U_JG_YUDH_HE
U_JG_ZAIN
U_JG_ZHAIN

UJoiningType

Declared in unicode/uchar.h
 UJoiningType

Joining Type constants.

See also: UCHAR_JOINING_TYPE

Properties
U_JT_COUNT

One more than the highest normal UJoiningType value.

The highest value is available via u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE).

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

U_JT_DUAL_JOINING
U_JT_JOIN_CAUSING
U_JT_LEFT_JOINING
U_JT_NON_JOINING
U_JT_RIGHT_JOINING
U_JT_TRANSPARENT

ULayoutType

Declared in unicode/uloc.h
 ULayoutType

enums for the return value for the character and line orientation functions.

Properties
ULOC_LAYOUT_BTT
ULOC_LAYOUT_LTR
ULOC_LAYOUT_RTL
ULOC_LAYOUT_TTB
ULOC_LAYOUT_UNKNOWN

ULineBreakTag

Declared in unicode/ubrk.h
 ULineBreakTag

Enum constants for the line break tags returned by getRuleStatus().

A range of values is defined for each category of word, to allow for further subdivisions of a category in future releases. Applications should check for tag values falling within the range, rather than for single individual values.

The numeric values of all of these constants are stable (will not change).

Properties
UBRK_LINE_HARD

Tag value for a hard, or mandatory line break.

UBRK_LINE_HARD_LIMIT

Upper bound for hard line breaks.

UBRK_LINE_SOFT

Tag value for soft line breaks, positions at which a line break is acceptable but not required.

UBRK_LINE_SOFT_LIMIT

Upper bound for soft line breaks.

ULocAvailableType

Declared in unicode/uloc.h
 ULocAvailableType

Types for uloc_getAvailableByType and uloc_countAvailableByType.

Properties
ULOC_AVAILABLE_COUNT

This API is for internal use only.

ULOC_AVAILABLE_DEFAULT

Locales that return data when passed to ICU APIs, but not including legacy or alias locales.

ULOC_AVAILABLE_ONLY_LEGACY_ALIASES

Legacy or alias locales that return data when passed to ICU APIs.

Examples of supported legacy or alias locales:

  • iw (alias to he)
  • mo (alias to ro)
  • zh_CN (alias to zh_Hans_CN)
  • sr_BA (alias to sr_Cyrl_BA)
  • ars (alias to ar_SA)

The locales in this set are disjoint from the ones in ULOC_AVAILABLE_DEFAULT. To get both sets at the same time, use ULOC_AVAILABLE_WITH_LEGACY_ALIASES.

ULOC_AVAILABLE_WITH_LEGACY_ALIASES

The union of the locales in ULOC_AVAILABLE_DEFAULT and ULOC_AVAILABLE_ONLY_LEGACY_ALIAS.

ULocDataLocaleType

Declared in unicode/uloc.h
 ULocDataLocaleType

Constants for *_getLocale() Allow user to select whether she wants information on requested, valid or actual locale.

For example, a collator for "en_US_CALIFORNIA" was requested. In the current state of ICU (2.0), the requested locale is "en_US_CALIFORNIA", the valid locale is "en_US" (most specific locale supported by ICU) and the actual locale is "root" (the collation data comes unmodified from the UCA) The locale is considered supported by ICU if there is a core ICU bundle for that locale (although it may be empty).

Properties
ULOC_ACTUAL_LOCALE

This is locale the data actually comes from.

ULOC_DATA_LOCALE_TYPE_LIMIT

One more than the highest normal ULocDataLocaleType value.

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

ULOC_REQUESTED_LOCALE

This is the requested locale.

Deprecated. ICU 2.8

ULOC_VALID_LOCALE

This is the most specific locale supported by ICU.

UNormalization2Mode

Declared in unicode/unorm2.h
 UNormalization2Mode

Constants for normalization modes.

For details about standard Unicode normalization forms and about the algorithms which are also used with custom mapping tables see http://www.unicode.org/unicode/reports/tr15/

Properties
UNORM2_COMPOSE

Decomposition followed by composition.

Same as standard NFC when using an "nfc" instance. Same as standard NFKC when using an "nfkc" instance. For details about standard Unicode normalization forms see http://www.unicode.org/unicode/reports/tr15/

UNORM2_COMPOSE_CONTIGUOUS

Compose only contiguously.

Also known as "FCC" or "Fast C Contiguous". The result will often but not always be in NFC. The result will conform to FCD which is useful for processing. Not a standard Unicode normalization form. For details see http://www.unicode.org/notes/tn5/#FCC

UNORM2_DECOMPOSE

Map, and reorder canonically.

Same as standard NFD when using an "nfc" instance. Same as standard NFKD when using an "nfkc" instance. For details about standard Unicode normalization forms see http://www.unicode.org/unicode/reports/tr15/

UNORM2_FCD

"Fast C or D" form.

If a string is in this form, then further decomposition without reordering would yield the same form as DECOMPOSE. Text in "Fast C or D" form can be processed efficiently with data tables that are "canonically closed", that is, that provide equivalent data for equivalent text, without having to be fully normalized. Not a standard Unicode normalization form. Not a unique form: Different FCD strings can be canonically equivalent. For details see http://www.unicode.org/notes/tn5/#FCD

UNormalizationCheckResult

Declared in unicode/unorm2.h
 UNormalizationCheckResult

Result values for normalization quick check functions.

For details see http://www.unicode.org/reports/tr15/#Detecting_Normalization_Forms

Properties
UNORM_MAYBE

The input string may or may not be in the normalization form.

This value is only returned for composition forms like NFC and FCC, when a backward-combining character is found for which the surrounding text would have to be analyzed further.

UNORM_NO

The input string is not in the normalization form.

UNORM_YES

The input string is in the normalization form.

UNumericType

Declared in unicode/uchar.h
 UNumericType

Numeric Type constants.

See also: UCHAR_NUMERIC_TYPE

Properties
U_NT_COUNT

One more than the highest normal UNumericType value.

The highest value is available via u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE).

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

U_NT_DECIMAL
U_NT_DIGIT
U_NT_NONE
U_NT_NUMERIC

UProperty

Declared in unicode/uchar.h
 UProperty

Selection constants for Unicode properties.

These constants are used in functions like u_hasBinaryProperty to select one of the Unicode properties.

The properties APIs are intended to reflect Unicode properties as defined in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR).

For details about the properties see UAX #44: Unicode Character Database (http://www.unicode.org/reports/tr44/).

Important: If ICU is built with UCD files from Unicode versions below, e.g., 3.2, then properties marked with "new in Unicode 3.2" are not or not fully available. Check u_getUnicodeVersion to be sure.

See also: u_hasBinaryProperty See also: u_getIntPropertyValue See also: u_getUnicodeVersion

Properties
UCHAR_AGE

String property Age.

Corresponds to u_charAge.

UCHAR_ALPHABETIC

Binary property Alphabetic.

Same as u_isUAlphabetic, different from u_isalpha. Lu+Ll+Lt+Lm+Lo+Nl+Other_Alphabetic

UCHAR_ASCII_HEX_DIGIT

Binary property ASCII_Hex_Digit.

0-9 A-F a-f

UCHAR_BASIC_EMOJI

Binary property of strings Basic_Emoji.

See https://www.unicode.org/reports/tr51/#Emoji_Sets

UCHAR_BIDI_CLASS

Enumerated property Bidi_Class.

Same as u_charDirection, returns UCharDirection values.

UCHAR_BIDI_CONTROL

Binary property Bidi_Control.

Format controls which have specific functions in the Bidi Algorithm.

UCHAR_BIDI_MIRRORED

Binary property Bidi_Mirrored.

Characters that may change display in RTL text. Same as u_isMirrored. See Bidi Algorithm, UTR 9.

UCHAR_BIDI_MIRRORING_GLYPH

String property Bidi_Mirroring_Glyph.

Corresponds to u_charMirror.

UCHAR_BIDI_PAIRED_BRACKET

String property Bidi_Paired_Bracket (new in Unicode 6.3).

Corresponds to u_getBidiPairedBracket.

UCHAR_BIDI_PAIRED_BRACKET_TYPE

Enumerated property Bidi_Paired_Bracket_Type (new in Unicode 6.3).

Used in UAX #9: Unicode Bidirectional Algorithm (http://www.unicode.org/reports/tr9/) Returns UBidiPairedBracketType values.

UCHAR_BINARY_LIMIT

One more than the last constant for binary Unicode properties.

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

UCHAR_BINARY_START

First constant for binary Unicode properties.

UCHAR_BLOCK

Enumerated property Block.

Same as ublock_getCode, returns UBlockCode values.

UCHAR_CANONICAL_COMBINING_CLASS

Enumerated property Canonical_Combining_Class.

Same as u_getCombiningClass, returns 8-bit numeric values.

UCHAR_CASED

Binary property Cased.

For Lowercase, Uppercase and Titlecase characters.

UCHAR_CASE_FOLDING

String property Case_Folding.

Corresponds to u_strFoldCase in ustring.h.

UCHAR_CASE_IGNORABLE

Binary property Case_Ignorable.

Used in context-sensitive case mappings.

UCHAR_CASE_SENSITIVE

Binary property Case_Sensitive.

Either the source of a case mapping or in the target of a case mapping. Not the same as the general category Cased_Letter.

UCHAR_CHANGES_WHEN_CASEFOLDED

Binary property Changes_When_Casefolded.

UCHAR_CHANGES_WHEN_CASEMAPPED

Binary property Changes_When_Casemapped.

UCHAR_CHANGES_WHEN_LOWERCASED

Binary property Changes_When_Lowercased.

UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED

Binary property Changes_When_NFKC_Casefolded.

UCHAR_CHANGES_WHEN_TITLECASED

Binary property Changes_When_Titlecased.

UCHAR_CHANGES_WHEN_UPPERCASED

Binary property Changes_When_Uppercased.

UCHAR_DASH

Binary property Dash.

Variations of dashes.

UCHAR_DECOMPOSITION_TYPE

Enumerated property Decomposition_Type.

Returns UDecompositionType values.

UCHAR_DEFAULT_IGNORABLE_CODE_POINT

Binary property Default_Ignorable_Code_Point (new in Unicode 3.2).

Ignorable in most processing. <2060..206F, FFF0..FFFB, E0000..E0FFF>+Other_Default_Ignorable_Code_Point+(Cf+Cc+Cs-White_Space)

UCHAR_DEPRECATED

Binary property Deprecated (new in Unicode 3.2).

The usage of deprecated characters is strongly discouraged.

UCHAR_DIACRITIC

Binary property Diacritic.

Characters that linguistically modify the meaning of another character to which they apply.

UCHAR_DOUBLE_LIMIT

One more than the last constant for double Unicode properties.

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

UCHAR_DOUBLE_START

First constant for double Unicode properties.

UCHAR_EAST_ASIAN_WIDTH

Enumerated property East_Asian_Width.

See http://www.unicode.org/reports/tr11/ Returns UEastAsianWidth values.

UCHAR_EMOJI

Binary property Emoji.

See http://www.unicode.org/reports/tr51/#Emoji_Properties

UCHAR_EMOJI_COMPONENT

Binary property Emoji_Component.

See http://www.unicode.org/reports/tr51/#Emoji_Properties

UCHAR_EMOJI_KEYCAP_SEQUENCE

Binary property of strings Emoji_Keycap_Sequence.

See https://www.unicode.org/reports/tr51/#Emoji_Sets

UCHAR_EMOJI_MODIFIER

Binary property Emoji_Modifier.

See http://www.unicode.org/reports/tr51/#Emoji_Properties

UCHAR_EMOJI_MODIFIER_BASE

Binary property Emoji_Modifier_Base.

See http://www.unicode.org/reports/tr51/#Emoji_Properties

UCHAR_EMOJI_PRESENTATION

Binary property Emoji_Presentation.

See http://www.unicode.org/reports/tr51/#Emoji_Properties

UCHAR_EXTENDED_PICTOGRAPHIC

Binary property Extended_Pictographic.

See http://www.unicode.org/reports/tr51/#Emoji_Properties

UCHAR_EXTENDER

Binary property Extender.

Extend the value or shape of a preceding alphabetic character, e.g., length and iteration marks.

UCHAR_FULL_COMPOSITION_EXCLUSION

Binary property Full_Composition_Exclusion.

CompositionExclusions.txt+Singleton Decompositions+ Non-Starter Decompositions.

UCHAR_GENERAL_CATEGORY

Enumerated property General_Category.

Same as u_charType, returns UCharCategory values.

UCHAR_GENERAL_CATEGORY_MASK

Bitmask property General_Category_Mask.

This is the General_Category property returned as a bit mask. When used in u_getIntPropertyValue(c), same as U_MASK(u_charType(c)), returns bit masks for UCharCategory values where exactly one bit is set. When used with u_getPropertyValueName() and u_getPropertyValueEnum(), a multi-bit mask is used for sets of categories like "Letters". Mask values should be cast to uint32_t.

UCHAR_GRAPHEME_BASE

Binary property Grapheme_Base (new in Unicode 3.2).

For programmatic determination of grapheme cluster boundaries. [0..10FFFF]-Cc-Cf-Cs-Co-Cn-Zl-Zp-Grapheme_Link-Grapheme_Extend-CGJ

UCHAR_GRAPHEME_CLUSTER_BREAK

Enumerated property Grapheme_Cluster_Break (new in Unicode 4.1).

Used in UAX #29: Text Boundaries (http://www.unicode.org/reports/tr29/) Returns UGraphemeClusterBreak values.

UCHAR_GRAPHEME_EXTEND

Binary property Grapheme_Extend (new in Unicode 3.2).

For programmatic determination of grapheme cluster boundaries. Me+Mn+Mc+Other_Grapheme_Extend-Grapheme_Link-CGJ

UCHAR_GRAPHEME_LINK

Binary property Grapheme_Link (new in Unicode 3.2).

For programmatic determination of grapheme cluster boundaries.

UCHAR_HANGUL_SYLLABLE_TYPE

Enumerated property Hangul_Syllable_Type, new in Unicode 4.

Returns UHangulSyllableType values.

UCHAR_HEX_DIGIT

Binary property Hex_Digit.

Characters commonly used for hexadecimal numbers.

UCHAR_HYPHEN

Binary property Hyphen.

Dashes used to mark connections between pieces of words, plus the Katakana middle dot.

UCHAR_IDENTIFIER_STATUS

Enumerated property Identifier_Status.

Used for UTS #39 General Security Profile for Identifiers (https://www.unicode.org/reports/tr39/#General_Security_Profile).

UCHAR_IDENTIFIER_TYPE

Miscellaneous property Identifier_Type.

Used for UTS #39 General Security Profile for Identifiers (https://www.unicode.org/reports/tr39/#General_Security_Profile).

Corresponds to u_hasIDType() and u_getIDTypes().

Each code point maps to a set of UIdentifierType values.

See also:u_hasIDType See also:u_getIDTypes

UCHAR_IDEOGRAPHIC

Binary property Ideographic.

CJKV ideographs.

UCHAR_IDS_BINARY_OPERATOR

Binary property IDS_Binary_Operator (new in Unicode 3.2).

For programmatic determination of Ideographic Description Sequences.

UCHAR_IDS_TRINARY_OPERATOR

Binary property IDS_Trinary_Operator (new in Unicode 3.2).

For programmatic determination of Ideographic Description Sequences.

UCHAR_IDS_UNARY_OPERATOR

Binary property IDS_Unary_Operator.

For programmatic determination of Ideographic Description Sequences.

UCHAR_ID_COMPAT_MATH_CONTINUE

Binary property ID_Compat_Math_Continue.

Used in mathematical identifier profile in UAX #31.

UCHAR_ID_COMPAT_MATH_START

Binary property ID_Compat_Math_Start.

Used in mathematical identifier profile in UAX #31.

UCHAR_ID_CONTINUE

Binary property ID_Continue.

Characters that can continue an identifier. DerivedCoreProperties.txt also says "NOTE: Cf characters should be filtered out." ID_Start+Mn+Mc+Nd+Pc

UCHAR_ID_START

Binary property ID_Start.

Characters that can start an identifier. Lu+Ll+Lt+Lm+Lo+Nl

UCHAR_INDIC_CONJUNCT_BREAK

Enumerated property Indic_Conjunct_Break.

Used in the grapheme cluster break algorithm in UAX #29.

UCHAR_INDIC_POSITIONAL_CATEGORY

Enumerated property Indic_Positional_Category.

New in Unicode 6.0 as provisional property Indic_Matra_Category; renamed and changed to informative in Unicode 8.0. See http://www.unicode.org/reports/tr44/#IndicPositionalCategory.txt

UCHAR_INDIC_SYLLABIC_CATEGORY

Enumerated property Indic_Syllabic_Category.

New in Unicode 6.0 as provisional; informative since Unicode 8.0. See http://www.unicode.org/reports/tr44/#IndicSyllabicCategory.txt

UCHAR_INT_LIMIT

One more than the last constant for enumerated/integer Unicode properties.

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

UCHAR_INT_START

First constant for enumerated/integer Unicode properties.

UCHAR_INVALID_CODE

Represents a nonexistent or invalid property or property value.

UCHAR_ISO_COMMENT

Deprecated string property ISO_Comment.

Corresponds to u_getISOComment. Deprecated. ICU 49

UCHAR_JOINING_GROUP

Enumerated property Joining_Group.

Returns UJoiningGroup values.

UCHAR_JOINING_TYPE

Enumerated property Joining_Type.

Returns UJoiningType values.

UCHAR_JOIN_CONTROL

Binary property Join_Control.

Format controls for cursive joining and ligation.

UCHAR_LEAD_CANONICAL_COMBINING_CLASS

Enumerated property Lead_Canonical_Combining_Class.

ICU-specific property for the ccc of the first code point of the decomposition, or lccc(c)=ccc(NFD(c)[0]). Useful for checking for canonically ordered text; see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD . Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS.

UCHAR_LINE_BREAK

Enumerated property Line_Break.

Returns ULineBreak values.

UCHAR_LOGICAL_ORDER_EXCEPTION

Binary property Logical_Order_Exception (new in Unicode 3.2).

Characters that do not use logical order and require special handling in most processing.

UCHAR_LOWERCASE

Binary property Lowercase.

Same as u_isULowercase, different from u_islower. Ll+Other_Lowercase

UCHAR_LOWERCASE_MAPPING

String property Lowercase_Mapping.

Corresponds to u_strToLower in ustring.h.

UCHAR_MASK_LIMIT

One more than the last constant for bit-mask Unicode properties.

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

UCHAR_MASK_START

First constant for bit-mask Unicode properties.

UCHAR_MATH

Binary property Math.

Sm+Other_Math

UCHAR_MODIFIER_COMBINING_MARK

Binary property Modifier_Combining_Mark.

Used by the AMTRA algorithm in UAX #53.

UCHAR_NAME

String property Name.

Corresponds to u_charName.

UCHAR_NFC_INERT

Binary property NFC_Inert.

ICU-specific property for characters that are inert under NFC, i.e., they do not interact with adjacent characters. See the documentation for the Normalizer2 class and the Normalizer2::isInert() method.

UCHAR_NFC_QUICK_CHECK

Enumerated property NFC_Quick_Check.

Returns UNormalizationCheckResult values.

UCHAR_NFD_INERT

Binary property NFD_Inert.

ICU-specific property for characters that are inert under NFD, i.e., they do not interact with adjacent characters. See the documentation for the Normalizer2 class and the Normalizer2::isInert() method.

UCHAR_NFD_QUICK_CHECK

Enumerated property NFD_Quick_Check.

Returns UNormalizationCheckResult values.

UCHAR_NFKC_INERT

Binary property NFKC_Inert.

ICU-specific property for characters that are inert under NFKC, i.e., they do not interact with adjacent characters. See the documentation for the Normalizer2 class and the Normalizer2::isInert() method.

UCHAR_NFKC_QUICK_CHECK

Enumerated property NFKC_Quick_Check.

Returns UNormalizationCheckResult values.

UCHAR_NFKD_INERT

Binary property NFKD_Inert.

ICU-specific property for characters that are inert under NFKD, i.e., they do not interact with adjacent characters. See the documentation for the Normalizer2 class and the Normalizer2::isInert() method.

UCHAR_NFKD_QUICK_CHECK

Enumerated property NFKD_Quick_Check.

Returns UNormalizationCheckResult values.

UCHAR_NONCHARACTER_CODE_POINT

Binary property Noncharacter_Code_Point.

Code points that are explicitly defined as illegal for the encoding of characters.

UCHAR_NUMERIC_TYPE

Enumerated property Numeric_Type.

Returns UNumericType values.

UCHAR_NUMERIC_VALUE

Double property Numeric_Value.

Corresponds to u_getNumericValue.

UCHAR_OTHER_PROPERTY_LIMIT

One more than the last constant for Unicode properties with unusual value types.

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

UCHAR_OTHER_PROPERTY_START

First constant for Unicode properties with unusual value types.

UCHAR_PATTERN_SYNTAX

Binary property Pattern_Syntax (new in Unicode 4.1).

See UAX #31 Identifier and Pattern Syntax (http://www.unicode.org/reports/tr31/)

UCHAR_PATTERN_WHITE_SPACE

Binary property Pattern_White_Space (new in Unicode 4.1).

See UAX #31 Identifier and Pattern Syntax (http://www.unicode.org/reports/tr31/)

UCHAR_POSIX_ALNUM

Binary property alnum (a C/POSIX character class).

Implemented according to the UTS #18 Annex C Standard Recommendation. See the uchar.h file documentation.

UCHAR_POSIX_BLANK

Binary property blank (a C/POSIX character class).

Implemented according to the UTS #18 Annex C Standard Recommendation. See the uchar.h file documentation.

UCHAR_POSIX_GRAPH

Binary property graph (a C/POSIX character class).

Implemented according to the UTS #18 Annex C Standard Recommendation. See the uchar.h file documentation.

UCHAR_POSIX_PRINT

Binary property print (a C/POSIX character class).

Implemented according to the UTS #18 Annex C Standard Recommendation. See the uchar.h file documentation.

UCHAR_POSIX_XDIGIT

Binary property xdigit (a C/POSIX character class).

Implemented according to the UTS #18 Annex C Standard Recommendation. See the uchar.h file documentation.

UCHAR_PREPENDED_CONCATENATION_MARK

Binary property Prepended_Concatenation_Mark.

UCHAR_QUOTATION_MARK

Binary property Quotation_Mark.

UCHAR_RADICAL

Binary property Radical (new in Unicode 3.2).

For programmatic determination of Ideographic Description Sequences.

UCHAR_REGIONAL_INDICATOR

Binary property Regional_Indicator.

UCHAR_RGI_EMOJI

Binary property of strings RGI_Emoji.

See https://www.unicode.org/reports/tr51/#Emoji_Sets

UCHAR_RGI_EMOJI_FLAG_SEQUENCE

Binary property of strings RGI_Emoji_Flag_Sequence.

See https://www.unicode.org/reports/tr51/#Emoji_Sets

UCHAR_RGI_EMOJI_MODIFIER_SEQUENCE

Binary property of strings RGI_Emoji_Modifier_Sequence.

See https://www.unicode.org/reports/tr51/#Emoji_Sets

UCHAR_RGI_EMOJI_TAG_SEQUENCE

Binary property of strings RGI_Emoji_Tag_Sequence.

See https://www.unicode.org/reports/tr51/#Emoji_Sets

UCHAR_RGI_EMOJI_ZWJ_SEQUENCE

Binary property of strings RGI_Emoji_ZWJ_Sequence.

See https://www.unicode.org/reports/tr51/#Emoji_Sets

UCHAR_SCRIPT

Enumerated property Script.

Same as uscript_getScript, returns UScriptCode values.

UCHAR_SCRIPT_EXTENSIONS

Miscellaneous property Script_Extensions (new in Unicode 6.0).

Some characters are commonly used in multiple scripts. For more information, see UAX #24: http://www.unicode.org/reports/tr24/. Corresponds to uscript_hasScript and uscript_getScriptExtensions in uscript.h.

UCHAR_SEGMENT_STARTER

Binary Property Segment_Starter.

ICU-specific property for characters that are starters in terms of Unicode normalization and combining character sequences. They have ccc=0 and do not occur in non-initial position of the canonical decomposition of any character (like a-umlaut in NFD and a Jamo T in an NFD(Hangul LVT)). ICU uses this property for segmenting a string for generating a set of canonically equivalent strings, e.g. for canonical closure while processing collation tailoring rules.

UCHAR_SENTENCE_BREAK

Enumerated property Sentence_Break (new in Unicode 4.1).

Used in UAX #29: Text Boundaries (http://www.unicode.org/reports/tr29/) Returns USentenceBreak values.

UCHAR_SIMPLE_CASE_FOLDING

String property Simple_Case_Folding.

Corresponds to u_foldCase.

UCHAR_SIMPLE_LOWERCASE_MAPPING

String property Simple_Lowercase_Mapping.

Corresponds to u_tolower.

UCHAR_SIMPLE_TITLECASE_MAPPING

String property Simple_Titlecase_Mapping.

Corresponds to u_totitle.

UCHAR_SIMPLE_UPPERCASE_MAPPING

String property Simple_Uppercase_Mapping.

Corresponds to u_toupper.

UCHAR_SOFT_DOTTED

Binary property Soft_Dotted (new in Unicode 3.2).

Characters with a "soft dot", like i or j. An accent placed on these characters causes the dot to disappear.

UCHAR_STRING_LIMIT

One more than the last constant for string Unicode properties.

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

UCHAR_STRING_START

First constant for string Unicode properties.

UCHAR_S_TERM

Binary property STerm (new in Unicode 4.0.1).

Sentence Terminal. Used in UAX #29: Text Boundaries (http://www.unicode.org/reports/tr29/)

UCHAR_TERMINAL_PUNCTUATION

Binary property Terminal_Punctuation.

Punctuation characters that generally mark the end of textual units.

UCHAR_TITLECASE_MAPPING

String property Titlecase_Mapping.

Corresponds to u_strToTitle in ustring.h.

UCHAR_TRAIL_CANONICAL_COMBINING_CLASS

Enumerated property Trail_Canonical_Combining_Class.

ICU-specific property for the ccc of the last code point of the decomposition, or tccc(c)=ccc(NFD(c)[last]). Useful for checking for canonically ordered text; see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD . Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS.

UCHAR_UNICODE_1_NAME

String property Unicode_1_Name.

This property is of little practical value. Beginning with ICU 49, ICU APIs return an empty string for this property. Corresponds to u_charName(U_UNICODE_10_CHAR_NAME). Deprecated. ICU 49

UCHAR_UNIFIED_IDEOGRAPH

Binary property Unified_Ideograph (new in Unicode 3.2).

For programmatic determination of Ideographic Description Sequences.

UCHAR_UPPERCASE

Binary property Uppercase.

Same as u_isUUppercase, different from u_isupper. Lu+Other_Uppercase

UCHAR_UPPERCASE_MAPPING

String property Uppercase_Mapping.

Corresponds to u_strToUpper in ustring.h.

UCHAR_VARIATION_SELECTOR

Binary property Variation_Selector (new in Unicode 4.0.1).

Indicates all those characters that qualify as Variation Selectors. For details on the behavior of these characters, see StandardizedVariants.html and 15.6 Variation Selectors.

UCHAR_VERTICAL_ORIENTATION

Enumerated property Vertical_Orientation.

Used for UAX #50 Unicode Vertical Text Layout (https://www.unicode.org/reports/tr50/). New as a UCD property in Unicode 10.0.

UCHAR_WHITE_SPACE

Binary property White_Space.

Same as u_isUWhiteSpace, different from u_isspace and u_isWhitespace. Space characters+TAB+CR+LF-ZWSP-ZWNBSP

UCHAR_WORD_BREAK

Enumerated property Word_Break (new in Unicode 4.1).

Used in UAX #29: Text Boundaries (http://www.unicode.org/reports/tr29/) Returns UWordBreakValues values.

UCHAR_XID_CONTINUE

Binary property XID_Continue.

ID_Continue modified to allow closure under normalization forms NFKC and NFKD.

UCHAR_XID_START

Binary property XID_Start.

ID_Start modified to allow closure under normalization forms NFKC and NFKD.

UPropertyNameChoice

Declared in unicode/uchar.h
 UPropertyNameChoice

Selector constants for u_getPropertyName() and u_getPropertyValueName().

These selectors are used to choose which name is returned for a given property or value. All properties and values have a long name. Most have a short name, but some do not. Unicode allows for additional names, beyond the long and short name, which would be indicated by U_LONG_PROPERTY_NAME + i, where i=1, 2,...

See also: u_getPropertyName() See also: u_getPropertyValueName()

Properties
U_LONG_PROPERTY_NAME
U_PROPERTY_NAME_CHOICE_COUNT

One more than the highest normal UPropertyNameChoice value.

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

U_SHORT_PROPERTY_NAME

UScriptCode

Declared in unicode/uscript.h
 UScriptCode

Constants for ISO 15924 script codes.

The current set of script code constants supports at least all scripts that are encoded in the version of Unicode which ICU currently supports. The names of the constants are usually derived from the Unicode script property value aliases. See UAX #24 Unicode Script Property (http://www.unicode.org/reports/tr24/) and http://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt .

In addition, constants for many ISO 15924 script codes are included, for use with language tags, CLDR data, and similar. Some of those codes are not used in the Unicode Character Database (UCD). For example, there are no characters that have a UCD script property value of Hans or Hant. All Han ideographs have the Hani script property value in Unicode.

Private-use codes Qaaa..Qabx are not included, except as used in the UCD or in CLDR.

Starting with ICU 55, script codes are only added when their scripts have been or will certainly be encoded in Unicode, and have been assigned Unicode script property value aliases, to ensure that their script names are stable and match the names of the constants. Script codes like Latf and Aran that are not subject to separate encoding may be added at any time.

Properties
USCRIPT_ADLAM

USCRIPT_AFAKA

USCRIPT_AHOM

USCRIPT_ANATOLIAN_HIEROGLYPHS

USCRIPT_ARABIC

USCRIPT_ARABIC_NASTALIQ

USCRIPT_ARMENIAN

USCRIPT_AVESTAN

USCRIPT_BALINESE

USCRIPT_BAMUM

USCRIPT_BASSA_VAH

USCRIPT_BATAK

USCRIPT_BENGALI

USCRIPT_BHAIKSUKI

USCRIPT_BLISSYMBOLS

USCRIPT_BOOK_PAHLAVI

USCRIPT_BOPOMOFO

USCRIPT_BRAHMI

USCRIPT_BRAILLE

USCRIPT_BUGINESE

USCRIPT_BUHID

USCRIPT_CANADIAN_ABORIGINAL

Canadian_Aboriginal script.

USCRIPT_CARIAN

USCRIPT_CAUCASIAN_ALBANIAN

USCRIPT_CHAKMA

USCRIPT_CHAM

USCRIPT_CHEROKEE

USCRIPT_CHORASMIAN

USCRIPT_CIRTH

USCRIPT_CODE_LIMIT

One more than the highest normal UScriptCode value.

The highest value is available via u_getIntPropertyMaxValue(UCHAR_SCRIPT).

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

USCRIPT_COMMON

USCRIPT_COPTIC

USCRIPT_CUNEIFORM

USCRIPT_CYPRIOT

USCRIPT_CYPRO_MINOAN

USCRIPT_CYRILLIC

USCRIPT_DEMOTIC_EGYPTIAN

USCRIPT_DESERET

USCRIPT_DEVANAGARI

USCRIPT_DIVES_AKURU

USCRIPT_DOGRA

USCRIPT_DUPLOYAN

USCRIPT_DUPLOYAN_SHORTAND

Deprecated. ICU 54 Typo, use USCRIPT_DUPLOYAN

USCRIPT_EASTERN_SYRIAC

USCRIPT_EGYPTIAN_HIEROGLYPHS

USCRIPT_ELBASAN

USCRIPT_ELYMAIC

USCRIPT_ESTRANGELO_SYRIAC

USCRIPT_ETHIOPIC

USCRIPT_GARAY

USCRIPT_GEORGIAN

USCRIPT_GLAGOLITIC

USCRIPT_GOTHIC

USCRIPT_GRANTHA

USCRIPT_GREEK

USCRIPT_GUJARATI

USCRIPT_GUNJALA_GONDI

USCRIPT_GURMUKHI

USCRIPT_GURUNG_KHEMA

USCRIPT_HAN

USCRIPT_HANGUL

USCRIPT_HANIFI_ROHINGYA

USCRIPT_HANUNOO

USCRIPT_HAN_WITH_BOPOMOFO

USCRIPT_HARAPPAN_INDUS

USCRIPT_HATRAN

USCRIPT_HEBREW

USCRIPT_HIERATIC_EGYPTIAN

USCRIPT_HIRAGANA

USCRIPT_IMPERIAL_ARAMAIC

USCRIPT_INHERITED

USCRIPT_INSCRIPTIONAL_PAHLAVI

USCRIPT_INSCRIPTIONAL_PARTHIAN

USCRIPT_INVALID_CODE

USCRIPT_JAMO

USCRIPT_JAPANESE

USCRIPT_JAVANESE

USCRIPT_JURCHEN

USCRIPT_KAITHI

USCRIPT_KANNADA

USCRIPT_KATAKANA

USCRIPT_KATAKANA_OR_HIRAGANA

New script code in Unicode 4.0.1.

USCRIPT_KAWI

USCRIPT_KAYAH_LI

USCRIPT_KHAROSHTHI

USCRIPT_KHITAN_SMALL_SCRIPT

USCRIPT_KHMER

USCRIPT_KHOJKI

USCRIPT_KHUDAWADI

USCRIPT_KHUTSURI

USCRIPT_KIRAT_RAI

USCRIPT_KOREAN

USCRIPT_KPELLE

USCRIPT_LANNA

USCRIPT_LAO

USCRIPT_LATIN

USCRIPT_LATIN_FRAKTUR

USCRIPT_LATIN_GAELIC

USCRIPT_LEPCHA

USCRIPT_LIMBU

USCRIPT_LINEAR_A

USCRIPT_LINEAR_B

USCRIPT_LISU

USCRIPT_LOMA

USCRIPT_LYCIAN

USCRIPT_LYDIAN

USCRIPT_MAHAJANI

USCRIPT_MAKASAR

USCRIPT_MALAYALAM

USCRIPT_MANDAEAN

USCRIPT_MANDAIC

USCRIPT_MANICHAEAN

USCRIPT_MARCHEN

USCRIPT_MASARAM_GONDI

USCRIPT_MATHEMATICAL_NOTATION

USCRIPT_MAYAN_HIEROGLYPHS

USCRIPT_MEDEFAIDRIN

USCRIPT_MEITEI_MAYEK

USCRIPT_MENDE

Mende Kikakui.

USCRIPT_MEROITIC

USCRIPT_MEROITIC_CURSIVE

USCRIPT_MEROITIC_HIEROGLYPHS

USCRIPT_MIAO

USCRIPT_MODI

USCRIPT_MONGOLIAN

USCRIPT_MOON

USCRIPT_MRO

USCRIPT_MULTANI

USCRIPT_MYANMAR

USCRIPT_NABATAEAN

USCRIPT_NAG_MUNDARI

USCRIPT_NAKHI_GEBA

USCRIPT_NANDINAGARI

USCRIPT_NEWA

USCRIPT_NEW_TAI_LUE

USCRIPT_NKO

USCRIPT_NUSHU

USCRIPT_NYIAKENG_PUACHUE_HMONG

USCRIPT_OGHAM

USCRIPT_OLD_CHURCH_SLAVONIC_CYRILLIC

USCRIPT_OLD_HUNGARIAN

USCRIPT_OLD_ITALIC

USCRIPT_OLD_NORTH_ARABIAN

USCRIPT_OLD_PERMIC

USCRIPT_OLD_PERSIAN

USCRIPT_OLD_SOGDIAN

USCRIPT_OLD_SOUTH_ARABIAN

USCRIPT_OLD_UYGHUR

USCRIPT_OL_CHIKI

USCRIPT_OL_ONAL

USCRIPT_ORIYA

USCRIPT_ORKHON

USCRIPT_OSAGE

USCRIPT_OSMANYA

USCRIPT_PAHAWH_HMONG

USCRIPT_PALMYRENE

USCRIPT_PAU_CIN_HAU

USCRIPT_PHAGS_PA

USCRIPT_PHOENICIAN

USCRIPT_PHONETIC_POLLARD

USCRIPT_PSALTER_PAHLAVI

USCRIPT_REJANG

USCRIPT_RONGORONGO

USCRIPT_RUNIC

USCRIPT_SAMARITAN

USCRIPT_SARATI

USCRIPT_SAURASHTRA

USCRIPT_SHARADA

USCRIPT_SHAVIAN

USCRIPT_SIDDHAM

USCRIPT_SIGN_WRITING

Sutton SignWriting.

USCRIPT_SIMPLIFIED_HAN

USCRIPT_SINDHI

USCRIPT_SINHALA

USCRIPT_SOGDIAN

USCRIPT_SORA_SOMPENG

USCRIPT_SOYOMBO

USCRIPT_SUNDANESE

USCRIPT_SUNUWAR

USCRIPT_SYLOTI_NAGRI

USCRIPT_SYMBOLS

USCRIPT_SYMBOLS_EMOJI

USCRIPT_SYRIAC

USCRIPT_TAGALOG

USCRIPT_TAGBANWA

USCRIPT_TAI_LE

USCRIPT_TAI_VIET

USCRIPT_TAKRI

USCRIPT_TAMIL

USCRIPT_TANGSA

USCRIPT_TANGUT

USCRIPT_TELUGU

USCRIPT_TENGWAR

USCRIPT_THAANA

USCRIPT_THAI

USCRIPT_TIBETAN

USCRIPT_TIFINAGH

USCRIPT_TIRHUTA

USCRIPT_TODHRI

USCRIPT_TOTO

USCRIPT_TRADITIONAL_HAN

USCRIPT_TULU_TIGALARI

USCRIPT_UCAS

Canadian_Aboriginal script (alias).

USCRIPT_UGARITIC

USCRIPT_UNKNOWN

USCRIPT_UNWRITTEN_LANGUAGES

USCRIPT_VAI

USCRIPT_VISIBLE_SPEECH

USCRIPT_VITHKUQI

USCRIPT_WANCHO

USCRIPT_WARANG_CITI

USCRIPT_WESTERN_SYRIAC

USCRIPT_WOLEAI

USCRIPT_YEZIDI

USCRIPT_YI

USCRIPT_ZANABAZAR_SQUARE

UScriptUsage

Declared in unicode/uscript.h
 UScriptUsage

Script usage constants.

See UAX #31 Unicode Identifier and Pattern Syntax. http://www.unicode.org/reports/tr31/#Table_Candidate_Characters_for_Exclusion_from_Identifiers

Properties
USCRIPT_USAGE_ASPIRATIONAL

Aspirational Use script.

USCRIPT_USAGE_EXCLUDED

Candidate for Exclusion from Identifiers.

USCRIPT_USAGE_LIMITED_USE

Limited Use script.

USCRIPT_USAGE_NOT_ENCODED

Not encoded in Unicode.

USCRIPT_USAGE_RECOMMENDED

Recommended script.

USCRIPT_USAGE_UNKNOWN

Unknown script usage.

USentenceBreak

Declared in unicode/uchar.h
 USentenceBreak

Sentence Break constants.

See also: UCHAR_SENTENCE_BREAK

Properties
U_SB_ATERM
U_SB_CLOSE
U_SB_COUNT

One more than the highest normal USentenceBreak value.

The highest value is available via u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK).

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

U_SB_CR
U_SB_EXTEND
U_SB_FORMAT
U_SB_LF
U_SB_LOWER
U_SB_NUMERIC
U_SB_OLETTER
U_SB_OTHER
U_SB_SCONTINUE
U_SB_SEP
U_SB_SP
U_SB_STERM
U_SB_UPPER

USentenceBreakTag

Declared in unicode/ubrk.h
 USentenceBreakTag

Enum constants for the sentence break tags returned by getRuleStatus().

A range of values is defined for each category of sentence, to allow for further subdivisions of a category in future releases. Applications should check for tag values falling within the range, rather than for single individual values.

The numeric values of all of these constants are stable (will not change).

Properties
UBRK_SENTENCE_SEP

Tag value for for sentences that do not contain an ending sentence terminator ('.

', '?', '!', etc.) character, but are ended only by a hard separator (CR, LF, PS, etc.) or end of input.

UBRK_SENTENCE_SEP_LIMIT

Upper bound for tags for sentences ended by a separator.

UBRK_SENTENCE_TERM

Tag value for for sentences ending with a sentence terminator ('.

', '?', '!', etc.) character, possibly followed by a hard separator (CR, LF, PS, etc.)

UBRK_SENTENCE_TERM_LIMIT

Upper bound for tags for sentences ended by sentence terminators.

UTransDirection

Declared in unicode/utrans.h
 UTransDirection

Direction constant indicating the direction in a transliterator, e.g., the forward or reverse rules of a RuleBasedTransliterator.

Specified when a transliterator is opened. An "A-B" transliterator transliterates A to B when operating in the forward direction, and B to A when operating in the reverse direction.

Properties
UTRANS_FORWARD

UTRANS_FORWARD means from to for a transliterator with ID -.

For a transliterator opened using a rule, it means forward direction rules, e.g., "A > B".

UTRANS_REVERSE

UTRANS_REVERSE means from to for a transliterator with ID -.

For a transliterator opened using a rule, it means reverse direction rules, e.g., "A < B".

UVerticalOrientation

Declared in unicode/uchar.h
 UVerticalOrientation

UWordBreak

Declared in unicode/ubrk.h
 UWordBreak

Enum constants for the word break tags returned by getRuleStatus().

A range of values is defined for each category of word, to allow for further subdivisions of a category in future releases. Applications should check for tag values falling within the range, rather than for single individual values.

The numeric values of all of these constants are stable (will not change).

Properties
UBRK_WORD_IDEO

Tag value for words containing ideographic characters, lower limit.

UBRK_WORD_IDEO_LIMIT

Tag value for words containing ideographic characters, upper limit.

UBRK_WORD_KANA

Tag value for words containing kana characters, lower limit.

UBRK_WORD_KANA_LIMIT

Tag value for words containing kana characters, upper limit.

UBRK_WORD_LETTER

Tag value for words that contain letters, excluding hiragana, katakana or ideographic characters, lower limit.

UBRK_WORD_LETTER_LIMIT

Tag value for words containing letters, upper limit.

UBRK_WORD_NONE

Tag value for "words" that do not fit into any of other categories.

Includes spaces and most punctuation.

UBRK_WORD_NONE_LIMIT

Upper bound for tags for uncategorized words.

UBRK_WORD_NUMBER

Tag value for words that appear to be numbers, lower limit.

UBRK_WORD_NUMBER_LIMIT

Tag value for words that appear to be numbers, upper limit.

UWordBreakValues

Declared in unicode/uchar.h
 UWordBreakValues

Word Break constants.

(UWordBreak is a pre-existing enum type in ubrk.h for word break status tags.)

See also: UCHAR_WORD_BREAK

Properties
U_WB_ALETTER
U_WB_COUNT

One more than the highest normal UWordBreakValues value.

The highest value is available via u_getIntPropertyMaxValue(UCHAR_WORD_BREAK).

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

U_WB_CR

U_WB_DOUBLE_QUOTE

U_WB_EXTEND

U_WB_EXTENDNUMLET
U_WB_E_BASE

U_WB_E_BASE_GAZ

U_WB_E_MODIFIER

U_WB_FORMAT
U_WB_GLUE_AFTER_ZWJ

U_WB_HEBREW_LETTER

U_WB_KATAKANA
U_WB_LF

U_WB_MIDLETTER
U_WB_MIDNUM
U_WB_MIDNUMLET

U_WB_NEWLINE

U_WB_NUMERIC
U_WB_OTHER
U_WB_REGIONAL_INDICATOR

U_WB_SINGLE_QUOTE

U_WB_WSEGSPACE

U_WB_ZWJ

Typedefs

OldUChar

Declared in unicode/umachine.h
uint16_t OldUChar

Default ICU 58 definition of UChar.

A base type for UTF-16 code units and pointers. Unsigned 16-bit integer.

Define OldUChar to be wchar_t if that is 16 bits wide. If wchar_t is not 16 bits wide, then define UChar to be uint16_t.

This makes the definition of OldUChar platform-dependent but allows direct string type compatibility with platforms with 16-bit wchar_t types.

This is how UChar was defined in ICU 58, for transition convenience. Exception: ICU 58 UChar was defined to UCHAR_TYPE if that macro was defined. The current UChar responds to UCHAR_TYPE but OldUChar does not.

UBidiPairedBracketType

Declared in unicode/uchar.h
enum UBidiPairedBracketType UBidiPairedBracketType

Bidi Paired Bracket Type constants.

See also: UCHAR_BIDI_PAIRED_BRACKET_TYPE

UBlockCode

Declared in unicode/uchar.h
enum UBlockCode UBlockCode

UBool

Declared in unicode/umachine.h
int8_t UBool

The ICU boolean type, a signed-byte integer.

ICU-specific for historical reasons: The C and C++ standards used to not define type bool. Also provides a fixed type definition, as opposed to type bool whose details (e.g., sizeof) may vary by compiler and between C and C++.

UBreakIteratorType

Declared in unicode/ubrk.h
enum UBreakIteratorType UBreakIteratorType

The possible types of text boundaries.

UCPMap

Declared in unicode/ucpmap.h
struct UCPMap UCPMap

Abstract map from Unicode code points (U+0000..U+10FFFF) to integer values.

See also:UCPTrie See also:UMutableCPTrie

UCPMapValueFilter

Declared in unicode/ucpmap.h
uint32_t U_CALLCONV UCPMapValueFilter(const void *context, uint32_t value)

Callback function type: Modifies a map value.

Optionally called by ucpmap_getRange()/ucptrie_getRange()/umutablecptrie_getRange(). The modified value will be returned by the getRange function.

Can be used to ignore some of the value bits, make a filter for one of several values, return a value index computed from the map value, etc.

Details
Parameters
context
an opaque pointer, as passed into the getRange function
value
a value from the map
Returns
the modified value

UChar

Declared in unicode/umachine.h
char16_t UChar

The base type for UTF-16 code units and pointers.

Unsigned 16-bit integer. Starting with ICU 59, C++ API uses char16_t directly, while C API continues to use UChar.

UChar is configurable by defining the macro UCHAR_TYPE on the preprocessor or compiler command line: -DUCHAR_TYPE=uint16_t or -DUCHAR_TYPE=wchar_t (if U_SIZEOF_WCHAR_T==2) etc. (The UCHAR_TYPE can also be #defined earlier in this file, for outside the ICU library code.) This is for transitional use from application code that uses uint16_t or wchar_t for UTF-16.

The default is UChar=char16_t.

C++11 defines char16_t as bit-compatible with uint16_t, but as a distinct type.

In C, char16_t is a simple typedef of uint_least16_t. ICU requires uint_least16_t=uint16_t for data memory mapping. On macOS, char16_t is not available because the uchar.h standard header is missing.

UChar32

Declared in unicode/umachine.h
int32_t UChar32

Define UChar32 as a type for single Unicode code points.

UChar32 is a signed 32-bit integer (same as int32_t).

The Unicode code point range is 0..0x10ffff. All other values (negative or >=0x110000) are illegal as Unicode code points. They may be used as sentinel values to indicate "done", "error" or similar non-code point conditions.

Before ICU 2.4 (Jitterbug 2146), UChar32 was defined to be wchar_t if that is 32 bits wide (wchar_t may be signed or unsigned) or else to be uint32_t. That is, the definition of UChar32 was platform-dependent.

See also: U_SENTINEL

UCharCategory

Declared in unicode/uchar.h
enum UCharCategory UCharCategory

Data for enumerated Unicode general category types.

See http://www.unicode.org/Public/UNIDATA/UnicodeData.html .

UCharDirection

Declared in unicode/uchar.h
enum UCharDirection UCharDirection

This specifies the language directional property of a character set.

UCharEnumTypeRange

Declared in unicode/uchar.h
UBoolU_CALLCONV UCharEnumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type)

Callback from u_enumCharTypes(), is called for each contiguous range of code points c (where start<=c

The callback function can stop the enumeration by returning false.

See also: UCharCategory See also: u_enumCharTypes

Details
Parameters
context
an opaque pointer, as passed into utrie_enum()
start
the first code point in a contiguous range with value
limit
one past the last code point in a contiguous range with value
type
the general category for all code points in [start..limit[
Returns
false to stop the enumeration

UCharNameChoice

Declared in unicode/uchar.h
enum UCharNameChoice UCharNameChoice

Selector constants for u_charName().

u_charName() returns the "modern" name of a Unicode character; or the name that was defined in Unicode version 1.0, before the Unicode standard merged with ISO-10646; or an "extended" name that gives each Unicode code point a unique name.

See also: u_charName

UColAttribute

Declared in unicode/ucol.h
enum UColAttribute UColAttribute

Attributes that collation service understands.

All the attributes can take UCOL_DEFAULT value, as well as the values specific to each one.

UColAttributeValue

Declared in unicode/ucol.h
enum UColAttributeValue UColAttributeValue

Enum containing attribute values for controlling collation behavior.

Here are all the allowable values. Not every attribute can take every value. The only universal value is UCOL_DEFAULT, which resets the attribute value to the predefined value for that locale

UColBoundMode

Declared in unicode/ucol.h
enum UColBoundMode UColBoundMode

enum that is taken by ucol_getBound API See below for explanation do not change the values assigned to the members of this enum.

Underlying code depends on them having these numbers

UColReorderCode

Declared in unicode/ucol.h
enum UColReorderCode UColReorderCode

Enum containing the codes for reordering segments of the collation table that are not script codes.

These reordering codes are to be used in conjunction with the script codes. See also:ucol_getReorderCodesSee also:ucol_setReorderCodesSee also:ucol_getEquivalentReorderCodesSee also:UScriptCode

UCollationResult

Declared in unicode/ucol.h
enum UCollationResult UCollationResult

UCOL_LESS is returned if source string is compared to be less than target string in the ucol_strcoll() method.

UCOL_EQUAL is returned if source string is compared to be equal to target string in the ucol_strcoll() method. UCOL_GREATER is returned if source string is compared to be greater than target string in the ucol_strcoll() method. See also:ucol_strcoll() Possible values for a comparison result

UCollationStrength

Declared in unicode/ucol.h
UColAttributeValue UCollationStrength

Base letter represents a primary difference.

Set comparison level to UCOL_PRIMARY to ignore secondary and tertiary differences. Use this to set the strength of a Collator object. Example of primary difference, "abc" < "abd"

Diacritical differences on the same base letter represent a secondary difference. Set comparison level to UCOL_SECONDARY to ignore tertiary differences. Use this to set the strength of a Collator object. Example of secondary difference, "ä" >> "a".

Uppercase and lowercase versions of the same character represents a tertiary difference. Set comparison level to UCOL_TERTIARY to include all comparison differences. Use this to set the strength of a Collator object. Example of tertiary difference, "abc" <<< "ABC".

Two characters are considered "identical" when they have the same unicode spellings. UCOL_IDENTICAL. For example, "ä" == "ä".

UCollationStrength is also used to determine the strength of sort keys generated from UCollator objects These values can be now found in the UColAttributeValue enum.

UCollator

Declared in unicode/ucol.h
struct UCollator UCollator

structure representing a collator object instance

UDate

Declared in unicode/utypes.h
double UDate

Date and Time data type.

This is a primitive data type that holds the date and time as the number of milliseconds since 1970-jan-01, 00:00 UTC. UTC leap seconds are ignored.

UDecompositionType

Declared in unicode/uchar.h
enum UDecompositionType UDecompositionType

Decomposition Type constants.

See also: UCHAR_DECOMPOSITION_TYPE

UDisplayContext

Declared in unicode/udisplaycontext.h
enum UDisplayContext UDisplayContext

UDisplayContextType

Declared in unicode/udisplaycontext.h
enum UDisplayContextType UDisplayContextType

UEastAsianWidth

Declared in unicode/uchar.h
enum UEastAsianWidth UEastAsianWidth

East Asian Width constants.

See also: UCHAR_EAST_ASIAN_WIDTH See also: u_getIntPropertyValue

UEnumCharNamesFn

Declared in unicode/uchar.h
UBoolU_CALLCONV UEnumCharNamesFn(void *context, UChar32 code, UCharNameChoice nameChoice, const char *name, int32_t length)

Type of a callback function for u_enumCharNames() that gets called for each Unicode character with the code point value and the character name.

If such a function returns false, then the enumeration is stopped.

See also: UCharNameChoice See also: u_enumCharNames

Details
Parameters
context
The context pointer that was passed to u_enumCharNames().
code
The Unicode code point for the character with this name.
nameChoice
Selector for which kind of names is enumerated.
name
The character's name, zero-terminated.
length
The length of the name.
Returns
true if the enumeration should continue, false to stop it.

UEnumeration

Declared in unicode/uenum.h
struct UEnumeration UEnumeration

structure representing an enumeration object instance

UErrorCode

Declared in unicode/utypes.h
enum UErrorCode UErrorCode

Standard ICU4C error code type, a substitute for exceptions.

Initialize the UErrorCode with U_ZERO_ERROR, and check for success or failure using U_SUCCESS() or U_FAILURE():

UErrorCode errorCode = U_ZERO_ERROR;
// call ICU API that needs an error code parameter.
if (U_FAILURE(errorCode)) {
    // An error occurred. Handle it here.
}

C++ code should use icu::ErrorCode, available in unicode/errorcode.h, or a suitable subclass.

For more information, see: https://unicode-org.github.io/icu/userguide/dev/codingguidelines#details-about-icu-error-codes

Note: By convention, ICU functions that take a reference (C++) or a pointer (C) to a UErrorCode first test:

if (U_FAILURE(errorCode)) { return immediately; }

so that in a chain of such functions the first one that sets an error code causes the following ones to not perform any operations.

UGraphemeClusterBreak

Declared in unicode/uchar.h
enum UGraphemeClusterBreak UGraphemeClusterBreak

Grapheme Cluster Break constants.

See also: UCHAR_GRAPHEME_CLUSTER_BREAK

UHangulSyllableType

Declared in unicode/uchar.h
enum UHangulSyllableType UHangulSyllableType

Hangul Syllable Type constants.

See also: UCHAR_HANGUL_SYLLABLE_TYPE

UIdentifierStatus

Declared in unicode/uchar.h
enum UIdentifierStatus UIdentifierStatus

UIdentifierType

Declared in unicode/uchar.h
enum UIdentifierType UIdentifierType

UIndicConjunctBreak

Declared in unicode/uchar.h
enum UIndicConjunctBreak UIndicConjunctBreak

Indic Conjunct Break constants.

See also: UCHAR_INDIC_CONJUNCT_BREAK

UIndicPositionalCategory

Declared in unicode/uchar.h
enum UIndicPositionalCategory UIndicPositionalCategory

Indic Positional Category constants.

See also: UCHAR_INDIC_POSITIONAL_CATEGORY

UIndicSyllabicCategory

Declared in unicode/uchar.h
enum UIndicSyllabicCategory UIndicSyllabicCategory

Indic Syllabic Category constants.

See also: UCHAR_INDIC_SYLLABIC_CATEGORY

UJoiningGroup

Declared in unicode/uchar.h
enum UJoiningGroup UJoiningGroup

Joining Group constants.

See also: UCHAR_JOINING_GROUP

UJoiningType

Declared in unicode/uchar.h
enum UJoiningType UJoiningType

Joining Type constants.

See also: UCHAR_JOINING_TYPE

ULineBreak

Declared in unicode/uchar.h
enum ULineBreak ULineBreak

Line Break constants.

See also: UCHAR_LINE_BREAK

ULineBreakTag

Declared in unicode/ubrk.h
enum ULineBreakTag ULineBreakTag

Enum constants for the line break tags returned by getRuleStatus().

A range of values is defined for each category of word, to allow for further subdivisions of a category in future releases. Applications should check for tag values falling within the range, rather than for single individual values.

The numeric values of all of these constants are stable (will not change).

ULocAvailableType

Declared in unicode/uloc.h
enum ULocAvailableType ULocAvailableType

Types for uloc_getAvailableByType and uloc_countAvailableByType.

ULocaleData

Declared in unicode/ulocdata.h
struct ULocaleData ULocaleData

A locale data object.

ULocaleDisplayNames

Declared in unicode/uldnames.h
struct ULocaleDisplayNames ULocaleDisplayNames

C typedef for struct ULocaleDisplayNames.

UNormalizationCheckResult

Declared in unicode/unorm2.h
enum UNormalizationCheckResult UNormalizationCheckResult

Result values for normalization quick check functions.

For details see http://www.unicode.org/reports/tr15/#Detecting_Normalization_Forms

UNormalizer2

Declared in unicode/unorm2.h
struct UNormalizer2 UNormalizer2

C typedef for struct UNormalizer2.

UNumericType

Declared in unicode/uchar.h
enum UNumericType UNumericType

Numeric Type constants.

See also: UCHAR_NUMERIC_TYPE

UParseError

Declared in unicode/parseerr.h
struct UParseError UParseError

A UParseError struct is used to returned detailed information about parsing errors.

It is used by ICU parsing engines that parse long rules, patterns, or programs, where the text being parsed is long enough that more information than a UErrorCode is needed to localize the error.

The line, offset, and context fields are optional; parsing engines may choose not to use to use them.

The preContext and postContext strings include some part of the context surrounding the error. If the source text is "let for=7" and "for" is the error (e.g., because it is a reserved word), then some examples of what a parser might produce are the following:

preContext   postContext
""           ""            The parser does not support context
"let "       "=7"          Pre- and post-context only
"let "       "for=7"       Pre- and post-context and error text
""           "for"         Error text only

Examples of engines which use UParseError (or may use it in the future) are Transliterator, RuleBasedBreakIterator, and RegexPattern.

UProperty

Declared in unicode/uchar.h
enum UProperty UProperty

Selection constants for Unicode properties.

These constants are used in functions like u_hasBinaryProperty to select one of the Unicode properties.

The properties APIs are intended to reflect Unicode properties as defined in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR).

For details about the properties see UAX #44: Unicode Character Database (http://www.unicode.org/reports/tr44/).

Important: If ICU is built with UCD files from Unicode versions below, e.g., 3.2, then properties marked with "new in Unicode 3.2" are not or not fully available. Check u_getUnicodeVersion to be sure.

See also: u_hasBinaryProperty See also: u_getIntPropertyValue See also: u_getUnicodeVersion

UPropertyNameChoice

Declared in unicode/uchar.h
enum UPropertyNameChoice UPropertyNameChoice

Selector constants for u_getPropertyName() and u_getPropertyValueName().

These selectors are used to choose which name is returned for a given property or value. All properties and values have a long name. Most have a short name, but some do not. Unicode allows for additional names, beyond the long and short name, which would be indicated by U_LONG_PROPERTY_NAME + i, where i=1, 2,...

See also: u_getPropertyName() See also: u_getPropertyValueName()

UReplaceable

Declared in unicode/urep.h
void * UReplaceable

An opaque replaceable text object.

This will be manipulated only through the caller-supplied UReplaceableFunctor struct. Related to the C++ class Replaceable. This is currently only used in the Transliterator C API, see utrans.h .

UReplaceableCallbacks

Declared in unicode/urep.h
struct UReplaceableCallbacks UReplaceableCallbacks

A set of function pointers that transliterators use to manipulate a UReplaceable.

The caller should supply the required functions to manipulate their text appropriately. Related to the C++ class Replaceable.

UScriptCode

Declared in unicode/uscript.h
enum UScriptCode UScriptCode

Constants for ISO 15924 script codes.

The current set of script code constants supports at least all scripts that are encoded in the version of Unicode which ICU currently supports. The names of the constants are usually derived from the Unicode script property value aliases. See UAX #24 Unicode Script Property (http://www.unicode.org/reports/tr24/) and http://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt .

In addition, constants for many ISO 15924 script codes are included, for use with language tags, CLDR data, and similar. Some of those codes are not used in the Unicode Character Database (UCD). For example, there are no characters that have a UCD script property value of Hans or Hant. All Han ideographs have the Hani script property value in Unicode.

Private-use codes Qaaa..Qabx are not included, except as used in the UCD or in CLDR.

Starting with ICU 55, script codes are only added when their scripts have been or will certainly be encoded in Unicode, and have been assigned Unicode script property value aliases, to ensure that their script names are stable and match the names of the constants. Script codes like Latf and Aran that are not subject to separate encoding may be added at any time.

UScriptUsage

Declared in unicode/uscript.h
enum UScriptUsage UScriptUsage

Script usage constants.

See UAX #31 Unicode Identifier and Pattern Syntax. http://www.unicode.org/reports/tr31/#Table_Candidate_Characters_for_Exclusion_from_Identifiers

USentenceBreak

Declared in unicode/uchar.h
enum USentenceBreak USentenceBreak

Sentence Break constants.

See also: UCHAR_SENTENCE_BREAK

USentenceBreakTag

Declared in unicode/ubrk.h
enum USentenceBreakTag USentenceBreakTag

Enum constants for the sentence break tags returned by getRuleStatus().

A range of values is defined for each category of sentence, to allow for further subdivisions of a category in future releases. Applications should check for tag values falling within the range, rather than for single individual values.

The numeric values of all of these constants are stable (will not change).

UText

Declared in unicode/utext.h
struct UText UText

C typedef for struct UText.

UTransDirection

Declared in unicode/utrans.h
enum UTransDirection UTransDirection

Direction constant indicating the direction in a transliterator, e.g., the forward or reverse rules of a RuleBasedTransliterator.

Specified when a transliterator is opened. An "A-B" transliterator transliterates A to B when operating in the forward direction, and B to A when operating in the reverse direction.

UTransPosition

Declared in unicode/utrans.h
struct UTransPosition UTransPosition

Position structure for utrans_transIncremental() incremental transliteration.

This structure defines two substrings of the text being transliterated. The first region, [contextStart, contextLimit), defines what characters the transliterator will read as context. The second region, [start, limit), defines what characters will actually be transliterated. The second region should be a subset of the first.

After a transliteration operation, some of the indices in this structure will be modified. See the field descriptions for details.

contextStart <= start <= limit <= contextLimit

Note: All index values in this structure must be at code point boundaries. That is, none of them may occur between two code units of a surrogate pair. If any index does split a surrogate pair, results are unspecified.

UTransliterator

Declared in unicode/utrans.h
void * UTransliterator

An opaque transliterator for use in C.

Open with utrans_openxxx() and close with utrans_close() when done. Equivalent to the C++ class Transliterator and its subclasses. See also:Transliterator

UVersionInfo

Declared in unicode/uversion.h
uint8_t UVersionInfo[U_MAX_VERSION_LENGTH]

The binary form of a version on ICU APIs is an array of 4 uint8_t.

To compare two versions, use memcmp(v1,v2,sizeof(UVersionInfo)).

UVerticalOrientation

Declared in unicode/uchar.h
enum UVerticalOrientation UVerticalOrientation

Vertical Orientation constants.

See also: UCHAR_VERTICAL_ORIENTATION

UWordBreak

Declared in unicode/ubrk.h
enum UWordBreak UWordBreak

Enum constants for the word break tags returned by getRuleStatus().

A range of values is defined for each category of word, to allow for further subdivisions of a category in future releases. Applications should check for tag values falling within the range, rather than for single individual values.

The numeric values of all of these constants are stable (will not change).

UWordBreakValues

Declared in unicode/uchar.h
enum UWordBreakValues UWordBreakValues

Word Break constants.

(UWordBreak is a pre-existing enum type in ubrk.h for word break status tags.)

See also: UCHAR_WORD_BREAK

Variables

context

Declared in unicode/ustring.h
U_CDECL_BEGIN typedef void * context

Functions

UChar

Declared in unicode/ustring.h
U_CDECL_BEGIN typedef UChar(
  U_CALLCONV *UNESCAPE_CHAR_AT
)(int32_t offset

Callback function for u_unescapeAt() that returns a character of the source text given an offset and a context pointer.

The context pointer will be whatever is passed into u_unescapeAt().

See also:u_unescapeAt

Details
Parameters
offset
pointer to the offset that will be passed to u_unescapeAt().
context
an opaque pointer passed directly into u_unescapeAt()
Returns
the character represented by the escape sequence at offset

u_charAge

Declared in unicode/uchar.h
U_CAPI void U_EXPORT2 u_charAge(
  UChar32 c,
  UVersionInfo versionArray
)

Get the "age" of the code point.

The "age" is the Unicode version when the code point was first designated (as a non-character or for Private Use) or assigned a character. This can be useful to avoid emitting code points to receiving processes that do not accept newer characters. The data is from the UCD file DerivedAge.txt.

Details
Parameters
c
The code point.
versionArray
The Unicode version number array, to be filled in.

u_charDigitValue

Declared in unicode/uchar.h
U_CAPI int32_t U_EXPORT2 u_charDigitValue(
  UChar32 c
)

Returns the decimal digit value of a decimal digit character.

Such characters have the general category "Nd" (decimal digit numbers) and a Numeric_Type of Decimal.

Unlike ICU releases before 2.6, no digit values are returned for any Han characters because Han number characters are often used with a special Chinese-style number format (with characters for powers of 10 in between) instead of in decimal-positional notation. Unicode 4 explicitly assigns Han number characters the Numeric_Type Numeric instead of Decimal. See Jitterbug 1483 for more details.

Use u_getIntPropertyValue(c, UCHAR_NUMERIC_TYPE) and u_getNumericValue() for complete numeric Unicode properties.

See also: u_getNumericValue

Details
Parameters
c
the code point for which to get the decimal digit value
Returns
the decimal digit value of c, or -1 if c is not a decimal digit character

u_charDirection

Declared in unicode/uchar.h
U_CAPIUCharDirection U_EXPORT2 u_charDirection(
  UChar32 c
)

Returns the bidirectional category value for the code point, which is used in the Unicode bidirectional algorithm (UAX #9 http://www.unicode.org/reports/tr9/).

Note that some unassigned code points have bidi values of R or AL because they are in blocks that are reserved for Right-To-Left scripts.

Same as java.lang.Character.getDirectionality()

See also: UCharDirection

Details
Parameters
c
the code point to be tested
Returns
the bidirectional category (UCharDirection) value

u_charFromName

Declared in unicode/uchar.h
U_CAPIUChar32 U_EXPORT2 u_charFromName(
  UCharNameChoice nameChoice,
  const char *name,
  UErrorCode *pErrorCode
)

Find a Unicode character by its name and return its code point value.

The name is matched exactly and completely. If the name does not correspond to a code point, pErrorCode is set to U_INVALID_CHAR_FOUND. A Unicode 1.0 name is matched only if it differs from the modern name. Unicode names are all uppercase. Extended names are lowercase followed by an uppercase hexadecimal number, and within angle brackets.

See also: UCharNameChoice See also: u_charName See also: u_enumCharNames

Details
Parameters
nameChoice
Selector for which name to match.
name
The name to match.
pErrorCode
Pointer to a UErrorCode variable
Returns
The Unicode value of the code point with the given name, or an undefined value if there is no such code point.

u_charMirror

Declared in unicode/uchar.h
U_CAPIUChar32 U_EXPORT2 u_charMirror(
  UChar32 c
)

Maps the specified character to a "mirror-image" character.

For characters with the Bidi_Mirrored property, implementations sometimes need a "poor man's" mapping to another Unicode character (code point) such that the default glyph may serve as the mirror-image of the default glyph of the specified character. This is useful for text conversion to and from codepages with visual order, and for displays without glyph selection capabilities.

See also: UCHAR_BIDI_MIRRORED See also: u_isMirrored

Details
Parameters
c
the code point to be mapped
Returns
another Unicode code point that may serve as a mirror-image substitute, or c itself if there is no such mapping or c does not have the Bidi_Mirrored property

u_charName

Declared in unicode/uchar.h
U_CAPI int32_t U_EXPORT2 u_charName(
  UChar32 code,
  UCharNameChoice nameChoice,
  char *buffer,
  int32_t bufferLength,
  UErrorCode *pErrorCode
)

Retrieve the name of a Unicode character.

Depending on nameChoice, the character name written into the buffer is the "modern" name or the name that was defined in Unicode version 1.0. The name contains only "invariant" characters like A-Z, 0-9, space, and '-'. Unicode 1.0 names are only retrieved if they are different from the modern names and if the data file contains the data for them. gennames may or may not be called with a command line option to include 1.0 names in unames.dat.

See also: UCharNameChoice See also: u_charFromName See also: u_enumCharNames

Details
Parameters
code
The character (code point) for which to get the name. It must be 0<=code<=0x10ffff.
nameChoice
Selector for which name to get.
buffer
Destination address for copying the name. The name will always be zero-terminated. If there is no name, then the buffer will be set to the empty string.
bufferLength
==sizeof(buffer)
pErrorCode
Pointer to a UErrorCode variable; check for U_SUCCESS() after u_charName() returns.
Returns
The length of the name, or 0 if there is no name for this character. If the bufferLength is less than or equal to the length, then the buffer contains the truncated name and the returned length indicates the full length of the name. The length does not include the zero-termination.

u_charType

Declared in unicode/uchar.h
U_CAPI int8_t U_EXPORT2 u_charType(
  UChar32 c
)

Returns the general category value for the code point.

Same as java.lang.Character.getType().

See also: UCharCategory

Details
Parameters
c
the code point to be tested
Returns
the general category (UCharCategory) value

u_countChar32

Declared in unicode/ustring.h
U_CAPI int32_t U_EXPORT2 u_countChar32(
  const UChar *s,
  int32_t length
)

Count Unicode code points in the length UChar code units of the string.

A code point may occupy either one or two UChar code units. Counting code points involves reading all code units.

This functions is basically the inverse of the U16_FWD_N() macro (see utf.h).

Details
Parameters
s
The input string.
length
The number of UChar code units to be checked, or -1 to count all code points before the first NUL (U+0000).
Returns
The number of code points in the specified code units.

u_digit

Declared in unicode/uchar.h
U_CAPI int32_t U_EXPORT2 u_digit(
  UChar32 ch,
  int8_t radix
)

Returns the decimal digit value of the code point in the specified radix.

If the radix is not in the range 2<=radix<=36 or if the value of c is not a valid digit in the specified radix, -1 is returned. A character is a valid digit if at least one of the following is true:

  • The character has a decimal digit value. Such characters have the general category "Nd" (decimal digit numbers) and a Numeric_Type of Decimal. In this case the value is the character's decimal digit value.
  • The character is one of the uppercase Latin letters 'A' through 'Z'. In this case the value is c-'A'+10.
  • The character is one of the lowercase Latin letters 'a' through 'z'. In this case the value is ch-'a'+10.
  • Latin letters from both the ASCII range (0061..007A, 0041..005A) as well as from the Fullwidth ASCII range (FF41..FF5A, FF21..FF3A) are recognized.

Same as java.lang.Character.digit().

See also: UCHAR_NUMERIC_TYPE See also: u_forDigit See also: u_charDigitValue See also: u_isdigit

Details
Parameters
ch
the code point to be tested.
radix
the radix.
Returns
the numeric value represented by the character in the specified radix, or -1 if there is no value or if the value exceeds the radix.

u_enumCharNames

Declared in unicode/uchar.h
U_CAPI void U_EXPORT2 u_enumCharNames(
  UChar32 start,
  UChar32 limit,
  UEnumCharNamesFn *fn,
  void *context,
  UCharNameChoice nameChoice,
  UErrorCode *pErrorCode
)

Enumerate all assigned Unicode characters between the start and limit code points (start inclusive, limit exclusive) and call a function for each, passing the code point value and the character name.

For Unicode 1.0 names, only those are enumerated that differ from the modern names.

See also: UCharNameChoice See also: UEnumCharNamesFn See also: u_charName See also: u_charFromName

Details
Parameters
start
The first code point in the enumeration range.
limit
One more than the last code point in the enumeration range (the first one after the range).
fn
The function that is to be called for each character name.
context
An arbitrary pointer that is passed to the function.
nameChoice
Selector for which kind of names to enumerate.
pErrorCode
Pointer to a UErrorCode variable

u_enumCharTypes

Declared in unicode/uchar.h
U_CAPI void U_EXPORT2 u_enumCharTypes(
  UCharEnumTypeRange *enumRange,
  const void *context
)

Enumerate efficiently all code points with their Unicode general categories.

This is useful for building data structures (e.g., UnicodeSet's), for enumerating all assigned code points (type!=U_UNASSIGNED), etc.

For each contiguous range of code points with a given general category ("character type"), the UCharEnumTypeRange function is called. Adjacent ranges have different types. The Unicode Standard guarantees that the numeric value of the type is 0..31.

See also: UCharCategory See also: UCharEnumTypeRange

Details
Parameters
enumRange
a pointer to a function that is called for each contiguous range of code points with the same general category
context
an opaque pointer that is passed on to the callback function

u_errorName

Declared in unicode/utypes.h
U_CAPI const char *U_EXPORT2 u_errorName(
  UErrorCode code
)

Return a string for a UErrorCode value.

The string will be the same as the name of the error code constant in the UErrorCode enum above.

u_foldCase

Declared in unicode/uchar.h
U_CAPIUChar32 U_EXPORT2 u_foldCase(
  UChar32 c,
  uint32_t options
)

The given character is mapped to its case folding equivalent according to UnicodeData.txt and CaseFolding.txt; if the character has no case folding equivalent, the character itself is returned.

This function only returns the simple, single-code point case mapping. Full case mappings should be used whenever possible because they produce better results by working on whole strings. They take into account the string context and the language and can map to a result string with a different length as appropriate. Full case mappings are applied by the string case mapping functions, see ustring.h and the UnicodeString class. See also the User Guide chapter on C/POSIX migration: https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings

Details
Parameters
c
the code point to be mapped
options
Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
Returns
the Simple_Case_Folding of the code point, if any; otherwise the code point itself.

u_forDigit

Declared in unicode/uchar.h
U_CAPIUChar32 U_EXPORT2 u_forDigit(
  int32_t digit,
  int8_t radix
)

Determines the character representation for a specific digit in the specified radix.

If the value of radix is not a valid radix, or the value of digit is not a valid digit in the specified radix, the null character (U+0000) is returned.

The radix argument is valid if it is greater than or equal to 2 and less than or equal to 36. The digit argument is valid if 0 <= digit < radix.

If the digit is less than 10, then '0' + digit is returned. Otherwise, the value 'a' + digit - 10 is returned.

Same as java.lang.Character.forDigit().

See also: u_digit See also: u_charDigitValue See also: u_isdigit

Details
Parameters
digit
the number to convert to a character.
radix
the radix.
Returns
the char representation of the specified digit in the specified radix.

u_getBidiPairedBracket

Declared in unicode/uchar.h
U_CAPIUChar32 U_EXPORT2 u_getBidiPairedBracket(
  UChar32 c
)

Maps the specified character to its paired bracket character.

For Bidi_Paired_Bracket_Type!=None, this is the same as u_charMirror(). Otherwise c itself is returned. See http://www.unicode.org/reports/tr9/

See also: UCHAR_BIDI_PAIRED_BRACKET See also: UCHAR_BIDI_PAIRED_BRACKET_TYPE See also: u_charMirror

Details
Parameters
c
the code point to be mapped
Returns
the paired bracket code point, or c itself if there is no such mapping (Bidi_Paired_Bracket_Type=None)

u_getCombiningClass

Declared in unicode/uchar.h
U_CAPI uint8_t U_EXPORT2 u_getCombiningClass(
  UChar32 c
)

Returns the combining class of the code point as specified in UnicodeData.txt.

Details
Parameters
c
the code point of the character
Returns
the combining class of the character

u_getIntPropertyMaxValue

Declared in unicode/uchar.h
U_CAPI int32_t U_EXPORT2 u_getIntPropertyMaxValue(
  UProperty which
)

Get the maximum value for an enumerated/integer/binary Unicode property.

Can be used together with u_getIntPropertyMinValue to allocate arrays of UnicodeSet or similar.

Examples for min/max values (for Unicode 3.2):

  • UCHAR_BIDI_CLASS: 0/18 (U_LEFT_TO_RIGHT/U_BOUNDARY_NEUTRAL)
  • UCHAR_SCRIPT: 0/45 (USCRIPT_COMMON/USCRIPT_TAGBANWA)
  • UCHAR_IDEOGRAPHIC: 0/1 (false/true)

For undefined UProperty constant values, min/max values will be 0/-1.

See also: UProperty See also: u_hasBinaryProperty See also: u_getUnicodeVersion See also: u_getIntPropertyMaxValue See also: u_getIntPropertyValue

Details
Parameters
which
UProperty selector constant, identifies which binary property to check. Must be UCHAR_BINARY_START<=which
Returns
Maximum value returned by u_getIntPropertyValue for a Unicode property. <=0 if the property selector is out of range.

u_getIntPropertyMinValue

Declared in unicode/uchar.h
U_CAPI int32_t U_EXPORT2 u_getIntPropertyMinValue(
  UProperty which
)

Get the minimum value for an enumerated/integer/binary Unicode property.

Can be used together with u_getIntPropertyMaxValue to allocate arrays of UnicodeSet or similar.

See also: UProperty See also: u_hasBinaryProperty See also: u_getUnicodeVersion See also: u_getIntPropertyMaxValue See also: u_getIntPropertyValue

Details
Parameters
which
UProperty selector constant, identifies which binary property to check. Must be UCHAR_BINARY_START<=which
Returns
Minimum value returned by u_getIntPropertyValue for a Unicode property. 0 if the property selector is out of range.

u_getIntPropertyValue

Declared in unicode/uchar.h
U_CAPI int32_t U_EXPORT2 u_getIntPropertyValue(
  UChar32 c,
  UProperty which
)

Get the property value for an enumerated or integer Unicode property for a code point.

Also returns binary and mask property values.

Unicode, especially in version 3.2, defines many more properties than the original set in UnicodeData.txt.

The properties APIs are intended to reflect Unicode properties as defined in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR). For details about the properties see http://www.unicode.org/ . For names of Unicode properties see the UCD file PropertyAliases.txt.

Sample usage: UEastAsianWidth ea=(UEastAsianWidth)u_getIntPropertyValue(c, UCHAR_EAST_ASIAN_WIDTH); UBool b=(UBool)u_getIntPropertyValue(c, UCHAR_IDEOGRAPHIC);

See also:UPropertySee also:u_hasBinaryPropertySee also:u_getIntPropertyMinValueSee also:u_getIntPropertyMaxValueSee also:u_getIntPropertyMap See also:u_getUnicodeVersion

Details
Parameters
c
Code point to test.
which
UProperty selector constant, identifies which property to check. Must be UCHAR_BINARY_START<=which
Returns
Numeric value that is directly the property value or, for enumerated properties, corresponds to the numeric value of the enumerated constant of the respective property value enumeration type (cast to enum type if necessary). Returns 0 or 1 (for false/true) for binary Unicode properties. Returns a bit-mask for mask properties. Returns 0 if 'which' is out of bounds or if the Unicode version does not have data for the property at all, or not for this code point.

u_getNumericValue

Declared in unicode/uchar.h
U_CAPI double U_EXPORT2 u_getNumericValue(
  UChar32 c
)

Get the numeric value for a Unicode code point as defined in the Unicode Character Database.

A "double" return type is necessary because some numeric values are fractions, negative, or too large for int32_t.

For characters without any numeric values in the Unicode Character Database, this function will return U_NO_NUMERIC_VALUE. Note: This is different from the Unicode Standard which specifies NaN as the default value. (NaN is not available on all platforms.)

Similar to java.lang.Character.getNumericValue(), but u_getNumericValue() also supports negative values, large values, and fractions, while Java's getNumericValue() returns values 10..35 for ASCII letters.

See also: U_NO_NUMERIC_VALUE

Details
Parameters
c
Code point to get the numeric value for.
Returns
Numeric value of c, or U_NO_NUMERIC_VALUE if none is defined.

u_getPropertyEnum

Declared in unicode/uchar.h
U_CAPIUProperty U_EXPORT2 u_getPropertyEnum(
  const char *alias
)

Return the UProperty enum for a given property name, as specified in the Unicode database file PropertyAliases.txt.

Short, long, and any other variants are recognized.

In addition, this function maps the synthetic names "gcm" / "General_Category_Mask" to the property UCHAR_GENERAL_CATEGORY_MASK. These names are not in PropertyAliases.txt.

See also: UProperty

Details
Parameters
alias
the property name to be matched. The name is compared using "loose matching" as described in PropertyAliases.txt.
Returns
a UProperty enum, or UCHAR_INVALID_CODE if the given name does not match any property.

u_getPropertyName

Declared in unicode/uchar.h
U_CAPI const char *U_EXPORT2 u_getPropertyName(
  UProperty property,
  UPropertyNameChoice nameChoice
)

Return the Unicode name for a given property, as given in the Unicode database file PropertyAliases.txt.

In addition, this function maps the property UCHAR_GENERAL_CATEGORY_MASK to the synthetic names "gcm" / "General_Category_Mask". These names are not in PropertyAliases.txt.

See also: UProperty See also: UPropertyNameChoice

Details
Parameters
property
UProperty selector other than UCHAR_INVALID_CODE. If out of range, NULL is returned.
nameChoice
selector for which name to get. If out of range, NULL is returned. All properties have a long name. Most have a short name, but some do not. Unicode allows for additional names; if present these will be returned by U_LONG_PROPERTY_NAME + i, where i=1, 2,...
Returns
a pointer to the name, or NULL if either the property or the nameChoice is out of range. If a given nameChoice returns NULL, then all larger values of nameChoice will return NULL, with one exception: if NULL is returned for U_SHORT_PROPERTY_NAME, then U_LONG_PROPERTY_NAME (and higher) may still return a non-NULL value. The returned pointer is valid until u_cleanup() is called.

u_getPropertyValueEnum

Declared in unicode/uchar.h
U_CAPI int32_t U_EXPORT2 u_getPropertyValueEnum(
  UProperty property,
  const char *alias
)

Return the property value integer for a given value name, as specified in the Unicode database file PropertyValueAliases.txt.

Short, long, and any other variants are recognized.

Note: Some of the names in PropertyValueAliases.txt will only be recognized with UCHAR_GENERAL_CATEGORY_MASK, not UCHAR_GENERAL_CATEGORY. These include: "C" / "Other", "L" / "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P" / "Punctuation", "S" / "Symbol", and "Z" / "Separator".

See also: UProperty

Details
Parameters
property
UProperty selector constant. Must be UCHAR_BINARY_START<=which
alias
the value name to be matched. The name is compared using "loose matching" as described in PropertyValueAliases.txt.
Returns
a value integer or UCHAR_INVALID_CODE if the given name does not match any value of the given property, or if the property is invalid. Note: UCHAR_GENERAL_CATEGORY_MASK values are not values of UCharCategory, but rather mask values produced by U_GET_GC_MASK(). This allows grouped categories such as [:L:] to be represented.

u_getPropertyValueName

Declared in unicode/uchar.h
U_CAPI const char *U_EXPORT2 u_getPropertyValueName(
  UProperty property,
  int32_t value,
  UPropertyNameChoice nameChoice
)

Return the Unicode name for a given property value, as given in the Unicode database file PropertyValueAliases.txt.

Note: Some of the names in PropertyValueAliases.txt can only be retrieved using UCHAR_GENERAL_CATEGORY_MASK, not UCHAR_GENERAL_CATEGORY. These include: "C" / "Other", "L" / "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P" / "Punctuation", "S" / "Symbol", and "Z" / "Separator".

See also: UProperty See also: UPropertyNameChoice

Details
Parameters
property
UProperty selector constant. Must be UCHAR_BINARY_START<=which
value
selector for a value for the given property. If out of range, NULL is returned. In general, valid values range from 0 up to some maximum. There are a few exceptions: (1.) UCHAR_BLOCK values begin at the non-zero value UBLOCK_BASIC_LATIN. (2.) UCHAR_CANONICAL_COMBINING_CLASS values are not contiguous and range from 0..240. (3.) UCHAR_GENERAL_CATEGORY_MASK values are not values of UCharCategory, but rather mask values produced by U_GET_GC_MASK(). This allows grouped categories such as [:L:] to be represented. Mask values range non-contiguously from 1..U_GC_P_MASK.
nameChoice
selector for which name to get. If out of range, NULL is returned. All values have a long name. Most have a short name, but some do not. Unicode allows for additional names; if present these will be returned by U_LONG_PROPERTY_NAME + i, where i=1, 2,...
Returns
a pointer to the name, or NULL if either the property or the nameChoice is out of range. If a given nameChoice returns NULL, then all larger values of nameChoice will return NULL, with one exception: if NULL is returned for U_SHORT_PROPERTY_NAME, then U_LONG_PROPERTY_NAME (and higher) may still return a non-NULL value. The returned pointer is valid until u_cleanup() is called.

u_getUnicodeVersion

Declared in unicode/uchar.h
U_CAPI void U_EXPORT2 u_getUnicodeVersion(
  UVersionInfo versionArray
)

Gets the Unicode version information.

The version array is filled in with the version information for the Unicode standard that is currently used by ICU. For example, Unicode version 3.1.1 is represented as an array with the values { 3, 1, 1, 0 }.

Details
Parameters
versionArray
an output array that will be filled in with the Unicode version number

u_getVersion

Declared in unicode/uversion.h
U_CAPI void U_EXPORT2 u_getVersion(
  UVersionInfo versionArray
)

Gets the ICU release version.

The version array stores the version information for ICU. For example, release "1.3.31.2" is then represented as 0x01031F02. Definition of this function lives in putil.c

Details
Parameters
versionArray
the version # information, the result will be filled in

u_hasBinaryProperty

Declared in unicode/uchar.h
U_CAPIUBool U_EXPORT2 u_hasBinaryProperty(
  UChar32 c,
  UProperty which
)

Check a binary Unicode property for a code point.

Unicode, especially in version 3.2, defines many more properties than the original set in UnicodeData.txt.

The properties APIs are intended to reflect Unicode properties as defined in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR). For details about the properties see http://www.unicode.org/ucd/ . For names of Unicode properties see the UCD file PropertyAliases.txt.

Important: If ICU is built with UCD files from Unicode versions below 3.2, then properties marked with "new in Unicode 3.2" are not or not fully available.

See also:UPropertySee also:u_getBinaryPropertySet See also:u_getIntPropertyValueSee also:u_getUnicodeVersion

Details
Parameters
c
Code point to test.
which
UProperty selector constant, identifies which binary property to check. Must be UCHAR_BINARY_START<=which
Returns
true or false according to the binary Unicode property value for c. Also false if 'which' is out of bounds or if the Unicode version does not have data for the property at all.

u_isIDIgnorable

Declared in unicode/uchar.h
U_CAPIUBool U_EXPORT2 u_isIDIgnorable(
  UChar32 c
)

Determines if the specified character should be regarded as an ignorable character in an identifier, according to Java.

True for characters with general category "Cf" (format controls) as well as non-whitespace ISO controls (U+0000..U+0008, U+000E..U+001B, U+007F..U+009F).

Same as java.lang.Character.isIdentifierIgnorable().

Note that Unicode just recommends to ignore Cf (format controls).

See also: UCHAR_DEFAULT_IGNORABLE_CODE_POINT See also: u_isIDStart See also: u_isIDPart

Details
Parameters
c
the code point to be tested
Returns
true if the code point is ignorable in identifiers according to Java

u_isIDPart

Declared in unicode/uchar.h
U_CAPIUBool U_EXPORT2 u_isIDPart(
  UChar32 c
)

Determines if the specified character is permissible as a non-initial character of an identifier according to UAX #31 Unicode Identifier and Pattern Syntax.

Same as Unicode ID_Continue (UCHAR_ID_CONTINUE).

See also: UCHAR_ID_CONTINUE See also: u_isIDStart See also: u_isIDIgnorable

Details
Parameters
c
the code point to be tested
Returns
true if the code point may occur as a non-initial character of an identifier

u_isIDStart

Declared in unicode/uchar.h
U_CAPIUBool U_EXPORT2 u_isIDStart(
  UChar32 c
)

Determines if the specified character is permissible as the first character in an identifier according to UAX #31 Unicode Identifier and Pattern Syntax.

Same as Unicode ID_Start (UCHAR_ID_START).

See also: UCHAR_ID_START See also: u_isalpha See also: u_isIDPart

Details
Parameters
c
the code point to be tested
Returns
true if the code point may start an identifier

u_isISOControl

Declared in unicode/uchar.h
U_CAPIUBool U_EXPORT2 u_isISOControl(
  UChar32 c
)

Determines whether the specified code point is an ISO control code.

True for U+0000..U+001f and U+007f..U+009f (general category "Cc").

Same as java.lang.Character.isISOControl().

See also: u_iscntrl

Details
Parameters
c
the code point to be tested
Returns
true if the code point is an ISO control code

u_isJavaIDPart

Declared in unicode/uchar.h
U_CAPIUBool U_EXPORT2 u_isJavaIDPart(
  UChar32 c
)

Determines if the specified character is permissible in a Java identifier.

In addition to u_isIDPart(c), true for characters with general category "Sc" (currency symbols).

Same as java.lang.Character.isJavaIdentifierPart().

See also: u_isIDIgnorable See also: u_isJavaIDStart See also: u_isalpha See also: u_isdigit See also: u_isIDPart

Details
Parameters
c
the code point to be tested
Returns
true if the code point may occur in a Java identifier

u_isJavaIDStart

Declared in unicode/uchar.h
U_CAPIUBool U_EXPORT2 u_isJavaIDStart(
  UChar32 c
)

Determines if the specified character is permissible as the first character in a Java identifier.

In addition to u_isIDStart(c), true for characters with general categories "Sc" (currency symbols) and "Pc" (connecting punctuation).

Same as java.lang.Character.isJavaIdentifierStart().

See also: u_isJavaIDPart See also: u_isalpha See also: u_isIDStart

Details
Parameters
c
the code point to be tested
Returns
true if the code point may start a Java identifier

u_isJavaSpaceChar

Declared in unicode/uchar.h
U_CAPIUBool U_EXPORT2 u_isJavaSpaceChar(
  UChar32 c
)

Determine if the specified code point is a space character according to Java.

True for characters with general categories "Z" (separators), which does not include control codes (e.g., TAB or Line Feed).

Same as java.lang.Character.isSpaceChar().

Note: There are several ICU whitespace functions; please see the uchar.h file documentation for a detailed comparison.

See also: u_isspace See also: u_isWhitespace See also: u_isUWhiteSpace

Details
Parameters
c
the code point to be tested
Returns
true if the code point is a space character according to Character.isSpaceChar()

u_isMirrored

Declared in unicode/uchar.h
U_CAPIUBool U_EXPORT2 u_isMirrored(
  UChar32 c
)

Determines whether the code point has the Bidi_Mirrored property.

This property is set for characters that are commonly used in Right-To-Left contexts and need to be displayed with a "mirrored" glyph.

Same as java.lang.Character.isMirrored(). Same as UCHAR_BIDI_MIRRORED

See also: UCHAR_BIDI_MIRRORED

Details
Parameters
c
the code point to be tested
Returns
true if the character has the Bidi_Mirrored property

u_isUAlphabetic

Declared in unicode/uchar.h
U_CAPIUBool U_EXPORT2 u_isUAlphabetic(
  UChar32 c
)

Check if a code point has the Alphabetic Unicode property.

Same as u_hasBinaryProperty(c, UCHAR_ALPHABETIC). This is different from u_isalpha! See also:UCHAR_ALPHABETICSee also:u_isalphaSee also:u_hasBinaryProperty

Details
Parameters
c
Code point to test
Returns
true if the code point has the Alphabetic Unicode property, false otherwise

u_isULowercase

Declared in unicode/uchar.h
U_CAPIUBool U_EXPORT2 u_isULowercase(
  UChar32 c
)

Check if a code point has the Lowercase Unicode property.

Same as u_hasBinaryProperty(c, UCHAR_LOWERCASE). This is different from u_islower! See also:UCHAR_LOWERCASESee also:u_islowerSee also:u_hasBinaryProperty

Details
Parameters
c
Code point to test
Returns
true if the code point has the Lowercase Unicode property, false otherwise

u_isUUppercase

Declared in unicode/uchar.h
U_CAPIUBool U_EXPORT2 u_isUUppercase(
  UChar32 c
)

Check if a code point has the Uppercase Unicode property.

Same as u_hasBinaryProperty(c, UCHAR_UPPERCASE). This is different from u_isupper! See also:UCHAR_UPPERCASESee also:u_isupperSee also:u_hasBinaryProperty

Details
Parameters
c
Code point to test
Returns
true if the code point has the Uppercase Unicode property, false otherwise

u_isUWhiteSpace

Declared in unicode/uchar.h
U_CAPIUBool U_EXPORT2 u_isUWhiteSpace(
  UChar32 c
)

Check if a code point has the White_Space Unicode property.

Same as u_hasBinaryProperty(c, UCHAR_WHITE_SPACE). This is different from both u_isspace and u_isWhitespace!

Note: There are several ICU whitespace functions; please see the uchar.h file documentation for a detailed comparison.

See also: UCHAR_WHITE_SPACE See also: u_isWhitespace See also: u_isspace See also: u_isJavaSpaceChar See also: u_hasBinaryProperty

Details
Parameters
c
Code point to test
Returns
true if the code point has the White_Space Unicode property, false otherwise.

u_isWhitespace

Declared in unicode/uchar.h
U_CAPIUBool U_EXPORT2 u_isWhitespace(
  UChar32 c
)

Determines if the specified code point is a whitespace character according to Java/ICU.

A character is considered to be a Java whitespace character if and only if it satisfies one of the following criteria:

  • It is a Unicode Separator character (categories "Z" = "Zs" or "Zl" or "Zp"), but is not also a non-breaking space (U+00A0 NBSP or U+2007 Figure Space or U+202F Narrow NBSP).
  • It is U+0009 HORIZONTAL TABULATION.
  • It is U+000A LINE FEED.
  • It is U+000B VERTICAL TABULATION.
  • It is U+000C FORM FEED.
  • It is U+000D CARRIAGE RETURN.
  • It is U+001C FILE SEPARATOR.
  • It is U+001D GROUP SEPARATOR.
  • It is U+001E RECORD SEPARATOR.
  • It is U+001F UNIT SEPARATOR.

This API tries to sync with the semantics of Java's java.lang.Character.isWhitespace(), but it may not return the exact same results because of the Unicode version difference.

Note: Unicode 4.0.1 changed U+200B ZERO WIDTH SPACE from a Space Separator (Zs) to a Format Control (Cf). Since then, isWhitespace(0x200b) returns false. See http://www.unicode.org/versions/Unicode4.0.1/

Note: There are several ICU whitespace functions; please see the uchar.h file documentation for a detailed comparison.

See also: u_isspace See also: u_isJavaSpaceChar See also: u_isUWhiteSpace

Details
Parameters
c
the code point to be tested
Returns
true if the code point is a whitespace character according to Java/ICU

u_isalnum

Declared in unicode/uchar.h
U_CAPIUBool U_EXPORT2 u_isalnum(
  UChar32 c
)

Determines whether the specified code point is an alphanumeric character (letter or digit) according to Java.

True for characters with general categories "L" (letters) and "Nd" (decimal digit numbers).

Same as java.lang.Character.isLetterOrDigit().

In addition to being equivalent to a Java function, this also serves as a C/POSIX migration function. See the comments about C/POSIX character classification functions in the documentation at the top of this header file.

Details
Parameters
c
the code point to be tested
Returns
true if the code point is an alphanumeric character according to Character.isLetterOrDigit()

u_isalpha

Declared in unicode/uchar.h
U_CAPIUBool U_EXPORT2 u_isalpha(
  UChar32 c
)

Determines whether the specified code point is a letter character.

True for general categories "L" (letters).

Same as java.lang.Character.isLetter().

In addition to being equivalent to a Java function, this also serves as a C/POSIX migration function. See the comments about C/POSIX character classification functions in the documentation at the top of this header file.

See also: u_isdigit See also: u_isalnum

Details
Parameters
c
the code point to be tested
Returns
true if the code point is a letter character

u_isbase

Declared in unicode/uchar.h
U_CAPIUBool U_EXPORT2 u_isbase(
  UChar32 c
)

Non-standard: Determines whether the specified code point is a base character.

True for general categories "L" (letters), "N" (numbers), "Mc" (spacing combining marks), and "Me" (enclosing marks).

Note that this is different from the Unicode Standard definition in chapter 3.6, conformance clause D51 “Base character”, which defines base characters as the code points with general categories Letter (L), Number (N), Punctuation (P), Symbol (S), or Space Separator (Zs).

See also: u_isalpha See also: u_isdigit

Details
Parameters
c
the code point to be tested
Returns
true if the code point is a base character according to this function

u_isblank

Declared in unicode/uchar.h
U_CAPIUBool U_EXPORT2 u_isblank(
  UChar32 c
)

Determines whether the specified code point is a "blank" or "horizontal space", a character that visibly separates words on a line.

The following are equivalent definitions:

true for Unicode White_Space characters except for "vertical space controls" where "vertical space controls" are the following characters: U+000A (LF) U+000B (VT) U+000C (FF) U+000D (CR) U+0085 (NEL) U+2028 (LS) U+2029 (PS)

same as

true for U+0009 (TAB) and characters with general category "Zs" (space separators).

Note: There are several ICU whitespace functions; please see the uchar.h file documentation for a detailed comparison.

This is a C/POSIX migration function. See the comments about C/POSIX character classification functions in the documentation at the top of this header file.

Details
Parameters
c
the code point to be tested
Returns
true if the code point is a "blank"

u_iscntrl

Declared in unicode/uchar.h
U_CAPIUBool U_EXPORT2 u_iscntrl(
  UChar32 c
)

Determines whether the specified code point is a control character (as defined by this function).

A control character is one of the following:

  • ISO 8-bit control character (U+0000..U+001f and U+007f..U+009f)
  • U_CONTROL_CHAR (Cc)
  • U_FORMAT_CHAR (Cf)
  • U_LINE_SEPARATOR (Zl)
  • U_PARAGRAPH_SEPARATOR (Zp)

This is a C/POSIX migration function. See the comments about C/POSIX character classification functions in the documentation at the top of this header file.

See also: UCHAR_DEFAULT_IGNORABLE_CODE_POINT See also: u_isprint

Details
Parameters
c
the code point to be tested
Returns
true if the code point is a control character

u_isdefined

Declared in unicode/uchar.h
U_CAPIUBool U_EXPORT2 u_isdefined(
  UChar32 c
)

Determines whether the specified code point is "defined", which usually means that it is assigned a character.

True for general categories other than "Cn" (other, not assigned), i.e., true for all code points mentioned in UnicodeData.txt.

Note that non-character code points (e.g., U+FDD0) are not "defined" (they are Cn), but surrogate code points are "defined" (Cs).

Same as java.lang.Character.isDefined().

See also: u_isdigit See also: u_isalpha See also: u_isalnum See also: u_isupper See also: u_islower See also: u_istitle

Details
Parameters
c
the code point to be tested
Returns
true if the code point is assigned a character

u_isdigit

Declared in unicode/uchar.h
U_CAPIUBool U_EXPORT2 u_isdigit(
  UChar32 c
)

Determines whether the specified code point is a digit character according to Java.

True for characters with general category "Nd" (decimal digit numbers). Beginning with Unicode 4, this is the same as testing for the Numeric_Type of Decimal.

Same as java.lang.Character.isDigit().

In addition to being equivalent to a Java function, this also serves as a C/POSIX migration function. See the comments about C/POSIX character classification functions in the documentation at the top of this header file.

Details
Parameters
c
the code point to be tested
Returns
true if the code point is a digit character according to Character.isDigit()

u_isgraph

Declared in unicode/uchar.h
U_CAPIUBool U_EXPORT2 u_isgraph(
  UChar32 c
)

Determines whether the specified code point is a "graphic" character (printable, excluding spaces).

true for all characters except those with general categories "Cc" (control codes), "Cf" (format controls), "Cs" (surrogates), "Cn" (unassigned), and "Z" (separators).

This is a C/POSIX migration function. See the comments about C/POSIX character classification functions in the documentation at the top of this header file.

Details
Parameters
c
the code point to be tested
Returns
true if the code point is a "graphic" character

u_islower

Declared in unicode/uchar.h
U_CAPIUBool U_EXPORT2 u_islower(
  UChar32 c
)

Determines whether the specified code point has the general category "Ll" (lowercase letter).

Same as java.lang.Character.isLowerCase().

This misses some characters that are also lowercase but have a different general category value. In order to include those, use UCHAR_LOWERCASE.

In addition to being equivalent to a Java function, this also serves as a C/POSIX migration function. See the comments about C/POSIX character classification functions in the documentation at the top of this header file.

See also: UCHAR_LOWERCASE See also: u_isupper See also: u_istitle

Details
Parameters
c
the code point to be tested
Returns
true if the code point is an Ll lowercase letter

u_isprint

Declared in unicode/uchar.h
U_CAPIUBool U_EXPORT2 u_isprint(
  UChar32 c
)

Determines whether the specified code point is a printable character.

True for general categories other than "C" (controls).

This is a C/POSIX migration function. See the comments about C/POSIX character classification functions in the documentation at the top of this header file.

See also: UCHAR_DEFAULT_IGNORABLE_CODE_POINT See also: u_iscntrl

Details
Parameters
c
the code point to be tested
Returns
true if the code point is a printable character

u_ispunct

Declared in unicode/uchar.h
U_CAPIUBool U_EXPORT2 u_ispunct(
  UChar32 c
)

Determines whether the specified code point is a punctuation character.

True for characters with general categories "P" (punctuation).

This is a C/POSIX migration function. See the comments about C/POSIX character classification functions in the documentation at the top of this header file.

Details
Parameters
c
the code point to be tested
Returns
true if the code point is a punctuation character

u_isspace

Declared in unicode/uchar.h
U_CAPIUBool U_EXPORT2 u_isspace(
  UChar32 c
)

Determines if the specified character is a space character or not.

Note: There are several ICU whitespace functions; please see the uchar.h file documentation for a detailed comparison.

This is a C/POSIX migration function. See the comments about C/POSIX character classification functions in the documentation at the top of this header file.

See also: u_isJavaSpaceChar See also: u_isWhitespace See also: u_isUWhiteSpace

Details
Parameters
c
the character to be tested
Returns
true if the character is a space character; false otherwise.

u_istitle

Declared in unicode/uchar.h
U_CAPIUBool U_EXPORT2 u_istitle(
  UChar32 c
)

Determines whether the specified code point is a titlecase letter.

True for general category "Lt" (titlecase letter).

Same as java.lang.Character.isTitleCase().

See also: u_isupper See also: u_islower See also: u_totitle

Details
Parameters
c
the code point to be tested
Returns
true if the code point is an Lt titlecase letter

u_isupper

Declared in unicode/uchar.h
U_CAPIUBool U_EXPORT2 u_isupper(
  UChar32 c
)

Determines whether the specified code point has the general category "Lu" (uppercase letter).

Same as java.lang.Character.isUpperCase().

This misses some characters that are also uppercase but have a different general category value. In order to include those, use UCHAR_UPPERCASE.

In addition to being equivalent to a Java function, this also serves as a C/POSIX migration function. See the comments about C/POSIX character classification functions in the documentation at the top of this header file.

See also: UCHAR_UPPERCASE See also: u_islower See also: u_istitle See also: u_tolower

Details
Parameters
c
the code point to be tested
Returns
true if the code point is an Lu uppercase letter

u_isxdigit

Declared in unicode/uchar.h
U_CAPIUBool U_EXPORT2 u_isxdigit(
  UChar32 c
)

Determines whether the specified code point is a hexadecimal digit.

This is equivalent to u_digit(c, 16)>=0. True for characters with general category "Nd" (decimal digit numbers) as well as Latin letters a-f and A-F in both ASCII and Fullwidth ASCII. (That is, for letters with code points 0041..0046, 0061..0066, FF21..FF26, FF41..FF46.)

In order to narrow the definition of hexadecimal digits to only ASCII characters, use (c<=0x7f && u_isxdigit(c)).

This is a C/POSIX migration function. See the comments about C/POSIX character classification functions in the documentation at the top of this header file.

Details
Parameters
c
the code point to be tested
Returns
true if the code point is a hexadecimal digit

u_memcasecmp

Declared in unicode/ustring.h
U_CAPI int32_t U_EXPORT2 u_memcasecmp(
  const UChar *s1,
  const UChar *s2,
  int32_t length,
  uint32_t options
)

Compare two strings case-insensitively using full case folding.

This is equivalent to u_strcmp(u_strFoldCase(s1, n, options), u_strFoldCase(s2, n, options)).

Details
Parameters
s1
A string to compare.
s2
A string to compare.
length
The number of characters in each string to case-fold and then compare.
options
A bit set of options:
  • U_FOLD_CASE_DEFAULT or 0 is used for default options: Comparison in code unit order with default case folding.
  • U_COMPARE_CODE_POINT_ORDER Set to choose code point order instead of code unit order (see u_strCompare for details).
  • U_FOLD_CASE_EXCLUDE_SPECIAL_I
Returns
A negative, zero, or positive integer indicating the comparison result.

u_memchr

Declared in unicode/ustring.h
U_CAPIUChar *U_EXPORT2 u_memchr(
  const UChar *s,
  UChar c,
  int32_t count
)

Find the first occurrence of a BMP code point in a string.

A surrogate code point is found only if its match in the text is not part of a surrogate pair. A NUL character is found at the string terminator.

Details
Parameters
s
The string to search (contains count UChars).
c
The BMP code point to find.
count
The length of the string.
Returns
A pointer to the first occurrence of c in s or NULL if c is not in s.

See also: u_strchr See also: u_memchr32 See also: u_strFindFirst

u_memchr32

Declared in unicode/ustring.h
U_CAPIUChar *U_EXPORT2 u_memchr32(
  const UChar *s,
  UChar32 c,
  int32_t count
)

Find the first occurrence of a code point in a string.

A surrogate code point is found only if its match in the text is not part of a surrogate pair. A NUL character is found at the string terminator.

Details
Parameters
s
The string to search (contains count UChars).
c
The code point to find.
count
The length of the string.
Returns
A pointer to the first occurrence of c in s or NULL if c is not in s.

See also: u_strchr32 See also: u_memchr See also: u_strFindFirst

u_memcmp

Declared in unicode/ustring.h
U_CAPI int32_t U_EXPORT2 u_memcmp(
  const UChar *buf1,
  const UChar *buf2,
  int32_t count
)

Compare the first count UChars of each buffer.

Details
Parameters
buf1
The first string to compare.
buf2
The second string to compare.
count
The maximum number of UChars to compare.
Returns
When buf1 < buf2, a negative number is returned. When buf1 == buf2, 0 is returned. When buf1 > buf2, a positive number is returned.

u_memcmpCodePointOrder

Declared in unicode/ustring.h
U_CAPI int32_t U_EXPORT2 u_memcmpCodePointOrder(
  const UChar *s1,
  const UChar *s2,
  int32_t count
)

Compare two Unicode strings in code point order.

This is different in UTF-16 from u_memcmp() if supplementary characters are present. For details, see u_strCompare().

Details
Parameters
s1
A string to compare.
s2
A string to compare.
count
The maximum number of characters to compare.
Returns
a negative/zero/positive integer corresponding to whether the first string is less than/equal to/greater than the second one in code point order

u_memcpy

Declared in unicode/ustring.h
U_CAPIUChar *U_EXPORT2 u_memcpy(
  UChar *dest,
  const UChar *src,
  int32_t count
)

Synonym for memcpy(), but with UChars only.

Details
Parameters
dest
The destination string
src
The source string (can be NULL/invalid if count<=0)
count
The number of characters to copy; no-op if <=0
Returns
A pointer to dest

u_memmove

Declared in unicode/ustring.h
U_CAPIUChar *U_EXPORT2 u_memmove(
  UChar *dest,
  const UChar *src,
  int32_t count
)

Synonym for memmove(), but with UChars only.

Details
Parameters
dest
The destination string
src
The source string (can be NULL/invalid if count<=0)
count
The number of characters to move; no-op if <=0
Returns
A pointer to dest

u_memrchr

Declared in unicode/ustring.h
U_CAPIUChar *U_EXPORT2 u_memrchr(
  const UChar *s,
  UChar c,
  int32_t count
)

Find the last occurrence of a BMP code point in a string.

A surrogate code point is found only if its match in the text is not part of a surrogate pair. A NUL character is found at the string terminator.

Details
Parameters
s
The string to search (contains count UChars).
c
The BMP code point to find.
count
The length of the string.
Returns
A pointer to the last occurrence of c in s or NULL if c is not in s.

See also: u_strrchr See also: u_memrchr32 See also: u_strFindLast

u_memrchr32

Declared in unicode/ustring.h
U_CAPIUChar *U_EXPORT2 u_memrchr32(
  const UChar *s,
  UChar32 c,
  int32_t count
)

Find the last occurrence of a code point in a string.

A surrogate code point is found only if its match in the text is not part of a surrogate pair. A NUL character is found at the string terminator.

Details
Parameters
s
The string to search (contains count UChars).
c
The code point to find.
count
The length of the string.
Returns
A pointer to the last occurrence of c in s or NULL if c is not in s.

See also: u_strrchr32 See also: u_memrchr See also: u_strFindLast

u_memset

Declared in unicode/ustring.h
U_CAPIUChar *U_EXPORT2 u_memset(
  UChar *dest,
  UChar c,
  int32_t count
)

Initialize count characters of dest to c.

Details
Parameters
dest
The destination string.
c
The character to initialize the string.
count
The maximum number of characters to set.
Returns
A pointer to dest.

u_strCaseCompare

Declared in unicode/ustring.h
U_CAPI int32_t U_EXPORT2 u_strCaseCompare(
  const UChar *s1,
  int32_t length1,
  const UChar *s2,
  int32_t length2,
  uint32_t options,
  UErrorCode *pErrorCode
)

Compare two strings case-insensitively using full case folding.

This is equivalent to u_strCompare(u_strFoldCase(s1, options), u_strFoldCase(s2, options), (options&U_COMPARE_CODE_POINT_ORDER)!=0).

The comparison can be done in UTF-16 code unit order or in code point order. They differ only when comparing supplementary code points (U+10000..U+10ffff) to BMP code points near the end of the BMP (i.e., U+e000..U+ffff). In code unit order, high BMP code points sort after supplementary code points because they are stored as pairs of surrogates which are at U+d800..U+dfff.

This functions works with strings of different explicitly specified lengths unlike the ANSI C-like u_strcmp() and u_memcmp() etc. NUL-terminated strings are possible with length arguments of -1.

Details
Parameters
s1
First source string.
length1
Length of first source string, or -1 if NUL-terminated.
s2
Second source string.
length2
Length of second source string, or -1 if NUL-terminated.
options
A bit set of options:
  • U_FOLD_CASE_DEFAULT or 0 is used for default options: Comparison in code unit order with default case folding.
  • U_COMPARE_CODE_POINT_ORDER Set to choose code point order instead of code unit order (see u_strCompare for details).
  • U_FOLD_CASE_EXCLUDE_SPECIAL_I
pErrorCode
Must be a valid pointer to an error code value, which must not indicate a failure before the function call.
Returns
<0 or 0 or >0 as usual for string comparisons

u_strCompare

Declared in unicode/ustring.h
U_CAPI int32_t U_EXPORT2 u_strCompare(
  const UChar *s1,
  int32_t length1,
  const UChar *s2,
  int32_t length2,
  UBool codePointOrder
)

Compare two Unicode strings (binary order).

The comparison can be done in code unit order or in code point order. They differ only in UTF-16 when comparing supplementary code points (U+10000..U+10ffff) to BMP code points near the end of the BMP (i.e., U+e000..U+ffff). In code unit order, high BMP code points sort after supplementary code points because they are stored as pairs of surrogates which are at U+d800..U+dfff.

This functions works with strings of different explicitly specified lengths unlike the ANSI C-like u_strcmp() and u_memcmp() etc. NUL-terminated strings are possible with length arguments of -1.

Details
Parameters
s1
First source string.
length1
Length of first source string, or -1 if NUL-terminated.
s2
Second source string.
length2
Length of second source string, or -1 if NUL-terminated.
codePointOrder
Choose between code unit order (false) and code point order (true).
Returns
<0 or 0 or >0 as usual for string comparisons

u_strFindFirst

Declared in unicode/ustring.h
U_CAPIUChar *U_EXPORT2 u_strFindFirst(
  const UChar *s,
  int32_t length,
  const UChar *substring,
  int32_t subLength
)

Find the first occurrence of a substring in a string.

The substring is found at code point boundaries. That means that if the substring begins with a trail surrogate or ends with a lead surrogate, then it is found only if these surrogates stand alone in the text. Otherwise, the substring edge units would be matched against halves of surrogate pairs.

Details
Parameters
s
The string to search.
length
The length of s (number of UChars), or -1 if it is NUL-terminated.
substring
The substring to find (NUL-terminated).
subLength
The length of substring (number of UChars), or -1 if it is NUL-terminated.
Returns
A pointer to the first occurrence of substring in s, or s itself if the substring is empty, or NULL if substring is not in s.

See also: u_strstr See also: u_strFindLast

u_strFindLast

Declared in unicode/ustring.h
U_CAPIUChar *U_EXPORT2 u_strFindLast(
  const UChar *s,
  int32_t length,
  const UChar *substring,
  int32_t subLength
)

Find the last occurrence of a substring in a string.

The substring is found at code point boundaries. That means that if the substring begins with a trail surrogate or ends with a lead surrogate, then it is found only if these surrogates stand alone in the text. Otherwise, the substring edge units would be matched against halves of surrogate pairs.

Details
Parameters
s
The string to search.
length
The length of s (number of UChars), or -1 if it is NUL-terminated.
substring
The substring to find (NUL-terminated).
subLength
The length of substring (number of UChars), or -1 if it is NUL-terminated.
Returns
A pointer to the last occurrence of substring in s, or s itself if the substring is empty, or NULL if substring is not in s.

See also: u_strstr See also: u_strFindLast

u_strFoldCase

Declared in unicode/ustring.h
U_CAPI int32_t U_EXPORT2 u_strFoldCase(
  UChar *dest,
  int32_t destCapacity,
  const UChar *src,
  int32_t srcLength,
  uint32_t options,
  UErrorCode *pErrorCode
)

Case-folds the characters in a string.

Case-folding is locale-independent and not context-sensitive, but there is an option for whether to include or exclude mappings for dotted I and dotless i that are marked with 'T' in CaseFolding.txt.

The result may be longer or shorter than the original. The source string and the destination buffer are allowed to overlap.

Details
Parameters
dest
A buffer for the result string. The result will be zero-terminated if the buffer is large enough.
destCapacity
The size of the buffer (number of UChars). If it is 0, then dest may be NULL and the function will only return the length of the result without writing any of the result string.
src
The original string
srcLength
The length of the original string. If -1, then src must be zero-terminated.
options
Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
pErrorCode
Must be a valid pointer to an error code value, which must not indicate a failure before the function call.
Returns
The length of the result string. It may be greater than destCapacity. In that case, only some of the result was written to the destination buffer.

u_strFromUTF32

Declared in unicode/ustring.h
U_CAPIUChar *U_EXPORT2 u_strFromUTF32(
  UChar *dest,
  int32_t destCapacity,
  int32_t *pDestLength,
  const UChar32 *src,
  int32_t srcLength,
  UErrorCode *pErrorCode
)

Convert a UTF-32 string to UTF-16.

If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set.

See also: u_strFromUTF32WithSub See also: u_strToUTF32

Details
Parameters
dest
A buffer for the result string. The result will be zero-terminated if the buffer is large enough.
destCapacity
The size of the buffer (number of UChars). If it is 0, then dest may be NULL and the function will only return the length of the result without writing any of the result string (pre-flighting).
pDestLength
A pointer to receive the number of units written to the destination. If pDestLength!=NULL then *pDestLength is always set to the number of output units corresponding to the transformation of all the input units, even in case of a buffer overflow.
src
The original source string
srcLength
The length of the original string. If -1, then src must be zero-terminated.
pErrorCode
Must be a valid pointer to an error code value, which must not indicate a failure before the function call.
Returns
The pointer to destination buffer.

u_strFromUTF32WithSub

Declared in unicode/ustring.h
U_CAPIUChar *U_EXPORT2 u_strFromUTF32WithSub(
  UChar *dest,
  int32_t destCapacity,
  int32_t *pDestLength,
  const UChar32 *src,
  int32_t srcLength,
  UChar32 subchar,
  int32_t *pNumSubstitutions,
  UErrorCode *pErrorCode
)

Convert a UTF-32 string to UTF-16.

Same as u_strFromUTF32() except for the additional subchar which is output for illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code. With subchar==U_SENTINEL, this function behaves exactly like u_strFromUTF32().

See also: u_strFromUTF32 See also: u_strToUTF32WithSub

Details
Parameters
dest
A buffer for the result string. The result will be zero-terminated if the buffer is large enough.
destCapacity
The size of the buffer (number of UChars). If it is 0, then dest may be NULL and the function will only return the length of the result without writing any of the result string (pre-flighting).
pDestLength
A pointer to receive the number of units written to the destination. If pDestLength!=NULL then *pDestLength is always set to the number of output units corresponding to the transformation of all the input units, even in case of a buffer overflow.
src
The original source string
srcLength
The length of the original string. If -1, then src must be zero-terminated.
subchar
The substitution character to use in place of an illegal input sequence, or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead. A substitution character can be any valid Unicode code point (up to U+10FFFF) except for surrogate code points (U+D800..U+DFFF). The recommended value is U+FFFD "REPLACEMENT CHARACTER".
pNumSubstitutions
Output parameter receiving the number of substitutions if subchar>=0. Set to 0 if no substitutions occur or subchar<0. pNumSubstitutions can be NULL.
pErrorCode
Pointer to a standard ICU error code. Its input value must pass the U_SUCCESS() test, or else the function returns immediately. Check for U_FAILURE() on output or use with function chaining. (See User Guide for details.)
Returns
The pointer to destination buffer.

u_strFromUTF8

Declared in unicode/ustring.h
U_CAPIUChar *U_EXPORT2 u_strFromUTF8(
  UChar *dest,
  int32_t destCapacity,
  int32_t *pDestLength,
  const char *src,
  int32_t srcLength,
  UErrorCode *pErrorCode
)

Convert a UTF-8 string to UTF-16.

If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set.

See also: u_strFromUTF8WithSub See also: u_strFromUTF8Lenient

Details
Parameters
dest
A buffer for the result string. The result will be zero-terminated if the buffer is large enough.
destCapacity
The size of the buffer (number of UChars). If it is 0, then dest may be NULL and the function will only return the length of the result without writing any of the result string (pre-flighting).
pDestLength
A pointer to receive the number of units written to the destination. If pDestLength!=NULL then *pDestLength is always set to the number of output units corresponding to the transformation of all the input units, even in case of a buffer overflow.
src
The original source string
srcLength
The length of the original string. If -1, then src must be zero-terminated.
pErrorCode
Must be a valid pointer to an error code value, which must not indicate a failure before the function call.
Returns
The pointer to destination buffer.

u_strFromUTF8Lenient

Declared in unicode/ustring.h
U_CAPIUChar *U_EXPORT2 u_strFromUTF8Lenient(
  UChar *dest,
  int32_t destCapacity,
  int32_t *pDestLength,
  const char *src,
  int32_t srcLength,
  UErrorCode *pErrorCode
)

Convert a UTF-8 string to UTF-16.

Same as u_strFromUTF8() except that this function is designed to be very fast, which it achieves by being lenient about malformed UTF-8 sequences. This function is intended for use in environments where UTF-8 text is expected to be well-formed.

Its semantics are:

  • Well-formed UTF-8 text is correctly converted to well-formed UTF-16 text.
  • The function will not read beyond the input string, nor write beyond the destCapacity.
  • Malformed UTF-8 results in "garbage" 16-bit Unicode strings which may not be well-formed UTF-16. The function will resynchronize to valid code point boundaries within a small number of code points after an illegal sequence.
  • Non-shortest forms are not detected and will result in "spoofing" output.

For further performance improvement, if srcLength is given (>=0), then it must be destCapacity>=srcLength.

There is no inverse u_strToUTF8Lenient() function because there is practically no performance gain from not checking that a UTF-16 string is well-formed.

See also: u_strFromUTF8 See also: u_strFromUTF8WithSub See also: u_strToUTF8WithSub

Details
Parameters
dest
A buffer for the result string. The result will be zero-terminated if the buffer is large enough.
destCapacity
The size of the buffer (number of UChars). If it is 0, then dest may be NULL and the function will only return the length of the result without writing any of the result string (pre-flighting). Unlike for other ICU functions, if srcLength>=0 then it must be destCapacity>=srcLength.
pDestLength
A pointer to receive the number of units written to the destination. If pDestLength!=NULL then *pDestLength is always set to the number of output units corresponding to the transformation of all the input units, even in case of a buffer overflow. Unlike for other ICU functions, if srcLength>=0 but destCapacity
src
The original source string
srcLength
The length of the original string. If -1, then src must be zero-terminated.
pErrorCode
Pointer to a standard ICU error code. Its input value must pass the U_SUCCESS() test, or else the function returns immediately. Check for U_FAILURE() on output or use with function chaining. (See User Guide for details.)
Returns
The pointer to destination buffer.

u_strFromUTF8WithSub

Declared in unicode/ustring.h
U_CAPIUChar *U_EXPORT2 u_strFromUTF8WithSub(
  UChar *dest,
  int32_t destCapacity,
  int32_t *pDestLength,
  const char *src,
  int32_t srcLength,
  UChar32 subchar,
  int32_t *pNumSubstitutions,
  UErrorCode *pErrorCode
)

Convert a UTF-8 string to UTF-16.

Same as u_strFromUTF8() except for the additional subchar which is output for illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code. With subchar==U_SENTINEL, this function behaves exactly like u_strFromUTF8().

See also: u_strFromUTF8 See also: u_strFromUTF8Lenient See also: u_strToUTF8WithSub

Details
Parameters
dest
A buffer for the result string. The result will be zero-terminated if the buffer is large enough.
destCapacity
The size of the buffer (number of UChars). If it is 0, then dest may be NULL and the function will only return the length of the result without writing any of the result string (pre-flighting).
pDestLength
A pointer to receive the number of units written to the destination. If pDestLength!=NULL then *pDestLength is always set to the number of output units corresponding to the transformation of all the input units, even in case of a buffer overflow.
src
The original source string
srcLength
The length of the original string. If -1, then src must be zero-terminated.
subchar
The substitution character to use in place of an illegal input sequence, or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead. A substitution character can be any valid Unicode code point (up to U+10FFFF) except for surrogate code points (U+D800..U+DFFF). The recommended value is U+FFFD "REPLACEMENT CHARACTER".
pNumSubstitutions
Output parameter receiving the number of substitutions if subchar>=0. Set to 0 if no substitutions occur or subchar<0. pNumSubstitutions can be NULL.
pErrorCode
Pointer to a standard ICU error code. Its input value must pass the U_SUCCESS() test, or else the function returns immediately. Check for U_FAILURE() on output or use with function chaining. (See User Guide for details.)
Returns
The pointer to destination buffer.

u_strHasMoreChar32Than

Declared in unicode/ustring.h
U_CAPIUBool U_EXPORT2 u_strHasMoreChar32Than(
  const UChar *s,
  int32_t length,
  int32_t number
)

Check if the string contains more Unicode code points than a certain number.

This is more efficient than counting all code points in the entire string and comparing that number with a threshold. This function may not need to scan the string at all if the length is known (not -1 for NUL-termination) and falls within a certain range, and never needs to count more than 'number+1' code points. Logically equivalent to (u_countChar32(s, length)>number). A Unicode code point may occupy either one or two UChar code units.

Details
Parameters
s
The input string.
length
The length of the string, or -1 if it is NUL-terminated.
number
The number of code points in the string is compared against the 'number' parameter.
Returns
Boolean value for whether the string contains more Unicode code points than 'number'. Same as (u_countChar32(s, length)>number).

u_strToLower

Declared in unicode/ustring.h
U_CAPI int32_t U_EXPORT2 u_strToLower(
  UChar *dest,
  int32_t destCapacity,
  const UChar *src,
  int32_t srcLength,
  const char *locale,
  UErrorCode *pErrorCode
)

Lowercase the characters in a string.

Casing is locale-dependent and context-sensitive. The result may be longer or shorter than the original. The source string and the destination buffer are allowed to overlap.

Details
Parameters
dest
A buffer for the result string. The result will be zero-terminated if the buffer is large enough.
destCapacity
The size of the buffer (number of UChars). If it is 0, then dest may be NULL and the function will only return the length of the result without writing any of the result string.
src
The original string
srcLength
The length of the original string. If -1, then src must be zero-terminated.
locale
The locale to consider, or "" for the root locale or NULL for the default locale.
pErrorCode
Must be a valid pointer to an error code value, which must not indicate a failure before the function call.
Returns
The length of the result string. It may be greater than destCapacity. In that case, only some of the result was written to the destination buffer.

u_strToTitle

Declared in unicode/ustring.h
U_CAPI int32_t U_EXPORT2 u_strToTitle(
  UChar *dest,
  int32_t destCapacity,
  const UChar *src,
  int32_t srcLength,
  UBreakIterator *titleIter,
  const char *locale,
  UErrorCode *pErrorCode
)

Titlecase a string.

Casing is locale-dependent and context-sensitive. Titlecasing uses a break iterator to find the first characters of words that are to be titlecased. It titlecases those characters and lowercases all others.

The titlecase break iterator can be provided to customize for arbitrary styles, using rules and dictionaries beyond the standard iterators. It may be more efficient to always provide an iterator to avoid opening and closing one for each string. If the break iterator passed in is null, the default Unicode algorithm will be used to determine the titlecase positions.

This function uses only the setText(), first() and next() methods of the provided break iterator.

The result may be longer or shorter than the original. The source string and the destination buffer are allowed to overlap.

Details
Parameters
dest
A buffer for the result string. The result will be zero-terminated if the buffer is large enough.
destCapacity
The size of the buffer (number of UChars). If it is 0, then dest may be NULL and the function will only return the length of the result without writing any of the result string.
src
The original string
srcLength
The length of the original string. If -1, then src must be zero-terminated.
titleIter
A break iterator to find the first characters of words that are to be titlecased. If none is provided (NULL), then a standard titlecase break iterator is opened.
locale
The locale to consider, or "" for the root locale or NULL for the default locale.
pErrorCode
Must be a valid pointer to an error code value, which must not indicate a failure before the function call.
Returns
The length of the result string. It may be greater than destCapacity. In that case, only some of the result was written to the destination buffer.

u_strToUTF32

Declared in unicode/ustring.h
U_CAPIUChar32 *U_EXPORT2 u_strToUTF32(
  UChar32 *dest,
  int32_t destCapacity,
  int32_t *pDestLength,
  const UChar *src,
  int32_t srcLength,
  UErrorCode *pErrorCode
)

Convert a UTF-16 string to UTF-32.

If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set.

See also: u_strToUTF32WithSub See also: u_strFromUTF32

Details
Parameters
dest
A buffer for the result string. The result will be zero-terminated if the buffer is large enough.
destCapacity
The size of the buffer (number of UChar32s). If it is 0, then dest may be NULL and the function will only return the length of the result without writing any of the result string (pre-flighting).
pDestLength
A pointer to receive the number of units written to the destination. If pDestLength!=NULL then *pDestLength is always set to the number of output units corresponding to the transformation of all the input units, even in case of a buffer overflow.
src
The original source string
srcLength
The length of the original string. If -1, then src must be zero-terminated.
pErrorCode
Must be a valid pointer to an error code value, which must not indicate a failure before the function call.
Returns
The pointer to destination buffer.

u_strToUTF32WithSub

Declared in unicode/ustring.h
U_CAPIUChar32 *U_EXPORT2 u_strToUTF32WithSub(
  UChar32 *dest,
  int32_t destCapacity,
  int32_t *pDestLength,
  const UChar *src,
  int32_t srcLength,
  UChar32 subchar,
  int32_t *pNumSubstitutions,
  UErrorCode *pErrorCode
)

Convert a UTF-16 string to UTF-32.

Same as u_strToUTF32() except for the additional subchar which is output for illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code. With subchar==U_SENTINEL, this function behaves exactly like u_strToUTF32().

See also: u_strToUTF32 See also: u_strFromUTF32WithSub

Details
Parameters
dest
A buffer for the result string. The result will be zero-terminated if the buffer is large enough.
destCapacity
The size of the buffer (number of UChar32s). If it is 0, then dest may be NULL and the function will only return the length of the result without writing any of the result string (pre-flighting).
pDestLength
A pointer to receive the number of units written to the destination. If pDestLength!=NULL then *pDestLength is always set to the number of output units corresponding to the transformation of all the input units, even in case of a buffer overflow.
src
The original source string
srcLength
The length of the original string. If -1, then src must be zero-terminated.
subchar
The substitution character to use in place of an illegal input sequence, or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead. A substitution character can be any valid Unicode code point (up to U+10FFFF) except for surrogate code points (U+D800..U+DFFF). The recommended value is U+FFFD "REPLACEMENT CHARACTER".
pNumSubstitutions
Output parameter receiving the number of substitutions if subchar>=0. Set to 0 if no substitutions occur or subchar<0. pNumSubstitutions can be NULL.
pErrorCode
Pointer to a standard ICU error code. Its input value must pass the U_SUCCESS() test, or else the function returns immediately. Check for U_FAILURE() on output or use with function chaining. (See User Guide for details.)
Returns
The pointer to destination buffer.

u_strToUTF8

Declared in unicode/ustring.h
U_CAPI char *U_EXPORT2 u_strToUTF8(
  char *dest,
  int32_t destCapacity,
  int32_t *pDestLength,
  const UChar *src,
  int32_t srcLength,
  UErrorCode *pErrorCode
)

Convert a UTF-16 string to UTF-8.

If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set.

See also: u_strToUTF8WithSub See also: u_strFromUTF8

Details
Parameters
dest
A buffer for the result string. The result will be zero-terminated if the buffer is large enough.
destCapacity
The size of the buffer (number of chars). If it is 0, then dest may be NULL and the function will only return the length of the result without writing any of the result string (pre-flighting).
pDestLength
A pointer to receive the number of units written to the destination. If pDestLength!=NULL then *pDestLength is always set to the number of output units corresponding to the transformation of all the input units, even in case of a buffer overflow.
src
The original source string
srcLength
The length of the original string. If -1, then src must be zero-terminated.
pErrorCode
Must be a valid pointer to an error code value, which must not indicate a failure before the function call.
Returns
The pointer to destination buffer.

u_strToUTF8WithSub

Declared in unicode/ustring.h
U_CAPI char *U_EXPORT2 u_strToUTF8WithSub(
  char *dest,
  int32_t destCapacity,
  int32_t *pDestLength,
  const UChar *src,
  int32_t srcLength,
  UChar32 subchar,
  int32_t *pNumSubstitutions,
  UErrorCode *pErrorCode
)

Convert a UTF-16 string to UTF-8.

Same as u_strToUTF8() except for the additional subchar which is output for illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code. With subchar==U_SENTINEL, this function behaves exactly like u_strToUTF8().

See also: u_strToUTF8 See also: u_strFromUTF8WithSub

Details
Parameters
dest
A buffer for the result string. The result will be zero-terminated if the buffer is large enough.
destCapacity
The size of the buffer (number of chars). If it is 0, then dest may be NULL and the function will only return the length of the result without writing any of the result string (pre-flighting).
pDestLength
A pointer to receive the number of units written to the destination. If pDestLength!=NULL then *pDestLength is always set to the number of output units corresponding to the transformation of all the input units, even in case of a buffer overflow.
src
The original source string
srcLength
The length of the original string. If -1, then src must be zero-terminated.
subchar
The substitution character to use in place of an illegal input sequence, or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead. A substitution character can be any valid Unicode code point (up to U+10FFFF) except for surrogate code points (U+D800..U+DFFF). The recommended value is U+FFFD "REPLACEMENT CHARACTER".
pNumSubstitutions
Output parameter receiving the number of substitutions if subchar>=0. Set to 0 if no substitutions occur or subchar<0. pNumSubstitutions can be NULL.
pErrorCode
Pointer to a standard ICU error code. Its input value must pass the U_SUCCESS() test, or else the function returns immediately. Check for U_FAILURE() on output or use with function chaining. (See User Guide for details.)
Returns
The pointer to destination buffer.

u_strToUpper

Declared in unicode/ustring.h
U_CDECL_ENDU_CAPI int32_t U_EXPORT2 u_strToUpper(
  UChar *dest,
  int32_t destCapacity,
  const UChar *src,
  int32_t srcLength,
  const char *locale,
  UErrorCode *pErrorCode
)

Uppercase the characters in a string.

Casing is locale-dependent and context-sensitive. The result may be longer or shorter than the original. The source string and the destination buffer are allowed to overlap.

Details
Parameters
dest
A buffer for the result string. The result will be zero-terminated if the buffer is large enough.
destCapacity
The size of the buffer (number of UChars). If it is 0, then dest may be NULL and the function will only return the length of the result without writing any of the result string.
src
The original string
srcLength
The length of the original string. If -1, then src must be zero-terminated.
locale
The locale to consider, or "" for the root locale or NULL for the default locale.
pErrorCode
Must be a valid pointer to an error code value, which must not indicate a failure before the function call.
Returns
The length of the result string. It may be greater than destCapacity. In that case, only some of the result was written to the destination buffer.

u_strcasecmp

Declared in unicode/ustring.h
U_CAPI int32_t U_EXPORT2 u_strcasecmp(
  const UChar *s1,
  const UChar *s2,
  uint32_t options
)

Compare two strings case-insensitively using full case folding.

This is equivalent to u_strcmp(u_strFoldCase(s1, options), u_strFoldCase(s2, options)).

Details
Parameters
s1
A string to compare.
s2
A string to compare.
options
A bit set of options:
  • U_FOLD_CASE_DEFAULT or 0 is used for default options: Comparison in code unit order with default case folding.
  • U_COMPARE_CODE_POINT_ORDER Set to choose code point order instead of code unit order (see u_strCompare for details).
  • U_FOLD_CASE_EXCLUDE_SPECIAL_I
Returns
A negative, zero, or positive integer indicating the comparison result.

u_strcat

Declared in unicode/ustring.h
U_CAPIUChar *U_EXPORT2 u_strcat(
  UChar *dst,
  const UChar *src
)

Concatenate two ustrings.

Appends a copy of src, including the null terminator, to dst. The initial copied character from src overwrites the null terminator in dst.

Details
Parameters
dst
The destination string.
src
The source string.
Returns
A pointer to dst.

u_strchr

Declared in unicode/ustring.h
U_CAPIUChar *U_EXPORT2 u_strchr(
  const UChar *s,
  UChar c
)

Find the first occurrence of a BMP code point in a string.

A surrogate code point is found only if its match in the text is not part of a surrogate pair. A NUL character is found at the string terminator.

Details
Parameters
s
The string to search (NUL-terminated).
c
The BMP code point to find.
Returns
A pointer to the first occurrence of c in s or NULL if c is not in s.

See also: u_strchr32 See also: u_memchr See also: u_strstr See also: u_strFindFirst

u_strchr32

Declared in unicode/ustring.h
U_CAPIUChar *U_EXPORT2 u_strchr32(
  const UChar *s,
  UChar32 c
)

Find the first occurrence of a code point in a string.

A surrogate code point is found only if its match in the text is not part of a surrogate pair. A NUL character is found at the string terminator.

Details
Parameters
s
The string to search (NUL-terminated).
c
The code point to find.
Returns
A pointer to the first occurrence of c in s or NULL if c is not in s.

See also: u_strchr See also: u_memchr32 See also: u_strstr See also: u_strFindFirst

u_strcmp

Declared in unicode/ustring.h
U_CAPI int32_t U_EXPORT2 u_strcmp(
  const UChar *s1,
  const UChar *s2
)

Compare two Unicode strings for bitwise equality (code unit order).

Details
Parameters
s1
A string to compare.
s2
A string to compare.
Returns
0 if s1 and s2 are bitwise equal; a negative value if s1 is bitwise less than s2,; a positive value if s1 is bitwise greater than s2.

u_strcmpCodePointOrder

Declared in unicode/ustring.h
U_CAPI int32_t U_EXPORT2 u_strcmpCodePointOrder(
  const UChar *s1,
  const UChar *s2
)

Compare two Unicode strings in code point order.

See u_strCompare for details.

Details
Parameters
s1
A string to compare.
s2
A string to compare.
Returns
a negative/zero/positive integer corresponding to whether the first string is less than/equal to/greater than the second one in code point order

u_strcpy

Declared in unicode/ustring.h
U_CAPIUChar *U_EXPORT2 u_strcpy(
  UChar *dst,
  const UChar *src
)

Copy a ustring.

Adds a null terminator.

Details
Parameters
dst
The destination string.
src
The source string.
Returns
A pointer to dst.

u_strcspn

Declared in unicode/ustring.h
U_CAPI int32_t U_EXPORT2 u_strcspn(
  const UChar *string,
  const UChar *matchSet
)

Returns the number of consecutive characters in string, beginning with the first, that do not occur somewhere in matchSet.

Works just like C's strcspn but with Unicode.

See also: u_strspn

Details
Parameters
string
The string in which to search, NUL-terminated.
matchSet
A NUL-terminated string defining a set of code points for which to search in the text string.
Returns
The number of initial characters in string that do not occur in matchSet.

u_strlen

Declared in unicode/ustring.h
U_CAPI int32_t U_EXPORT2 u_strlen(
  const UChar *s
)

Determine the length of an array of UChar.

Details
Parameters
s
The array of UChars, NULL (U+0000) terminated.
Returns
The number of UChars in chars, minus the terminator.

u_strncasecmp

Declared in unicode/ustring.h
U_CAPI int32_t U_EXPORT2 u_strncasecmp(
  const UChar *s1,
  const UChar *s2,
  int32_t n,
  uint32_t options
)

Compare two strings case-insensitively using full case folding.

This is equivalent to u_strcmp(u_strFoldCase(s1, at most n, options), u_strFoldCase(s2, at most n, options)).

Details
Parameters
s1
A string to compare.
s2
A string to compare.
n
The maximum number of characters each string to case-fold and then compare.
options
A bit set of options:
  • U_FOLD_CASE_DEFAULT or 0 is used for default options: Comparison in code unit order with default case folding.
  • U_COMPARE_CODE_POINT_ORDER Set to choose code point order instead of code unit order (see u_strCompare for details).
  • U_FOLD_CASE_EXCLUDE_SPECIAL_I
Returns
A negative, zero, or positive integer indicating the comparison result.

u_strncat

Declared in unicode/ustring.h
U_CAPIUChar *U_EXPORT2 u_strncat(
  UChar *dst,
  const UChar *src,
  int32_t n
)

Concatenate two ustrings.

Appends at most n characters from src to dst. Adds a terminating NUL. If src is too long, then only n-1 characters will be copied before the terminating NUL. If n<=0 then dst is not modified.

Details
Parameters
dst
The destination string.
src
The source string (can be NULL/invalid if n<=0).
n
The maximum number of characters to append; no-op if <=0.
Returns
A pointer to dst.

u_strncmp

Declared in unicode/ustring.h
U_CAPI int32_t U_EXPORT2 u_strncmp(
  const UChar *ucs1,
  const UChar *ucs2,
  int32_t n
)

Compare two ustrings for bitwise equality.

Compares at most n characters.

Details
Parameters
ucs1
A string to compare (can be NULL/invalid if n<=0).
ucs2
A string to compare (can be NULL/invalid if n<=0).
n
The maximum number of characters to compare; always returns 0 if n<=0.
Returns
0 if s1 and s2 are bitwise equal; a negative value if s1 is bitwise less than s2; a positive value if s1 is bitwise greater than s2.

u_strncmpCodePointOrder

Declared in unicode/ustring.h
U_CAPI int32_t U_EXPORT2 u_strncmpCodePointOrder(
  const UChar *s1,
  const UChar *s2,
  int32_t n
)

Compare two Unicode strings in code point order.

This is different in UTF-16 from u_strncmp() if supplementary characters are present. For details, see u_strCompare().

Details
Parameters
s1
A string to compare.
s2
A string to compare.
n
The maximum number of characters to compare.
Returns
a negative/zero/positive integer corresponding to whether the first string is less than/equal to/greater than the second one in code point order

u_strncpy

Declared in unicode/ustring.h
U_CAPIUChar *U_EXPORT2 u_strncpy(
  UChar *dst,
  const UChar *src,
  int32_t n
)

Copy a ustring.

Copies at most n characters. The result will be null terminated if the length of src is less than n.

Details
Parameters
dst
The destination string.
src
The source string (can be NULL/invalid if n<=0).
n
The maximum number of characters to copy; no-op if <=0.
Returns
A pointer to dst.

u_strpbrk

Declared in unicode/ustring.h
U_CAPIUChar *U_EXPORT2 u_strpbrk(
  const UChar *string,
  const UChar *matchSet
)

Locates the first occurrence in the string string of any of the characters in the string matchSet.

Works just like C's strpbrk but with Unicode.

Details
Parameters
string
The string in which to search, NUL-terminated.
matchSet
A NUL-terminated string defining a set of code points for which to search in the text string.
Returns
A pointer to the character in string that matches one of the characters in matchSet, or NULL if no such character is found.

u_strrchr

Declared in unicode/ustring.h
U_CAPIUChar *U_EXPORT2 u_strrchr(
  const UChar *s,
  UChar c
)

Find the last occurrence of a BMP code point in a string.

A surrogate code point is found only if its match in the text is not part of a surrogate pair. A NUL character is found at the string terminator.

Details
Parameters
s
The string to search (NUL-terminated).
c
The BMP code point to find.
Returns
A pointer to the last occurrence of c in s or NULL if c is not in s.

See also: u_strrchr32 See also: u_memrchr See also: u_strrstr See also: u_strFindLast

u_strrchr32

Declared in unicode/ustring.h
U_CAPIUChar *U_EXPORT2 u_strrchr32(
  const UChar *s,
  UChar32 c
)

Find the last occurrence of a code point in a string.

A surrogate code point is found only if its match in the text is not part of a surrogate pair. A NUL character is found at the string terminator.

Details
Parameters
s
The string to search (NUL-terminated).
c
The code point to find.
Returns
A pointer to the last occurrence of c in s or NULL if c is not in s.

See also: u_strrchr See also: u_memchr32 See also: u_strrstr See also: u_strFindLast

u_strrstr

Declared in unicode/ustring.h
U_CAPIUChar *U_EXPORT2 u_strrstr(
  const UChar *s,
  const UChar *substring
)

Find the last occurrence of a substring in a string.

The substring is found at code point boundaries. That means that if the substring begins with a trail surrogate or ends with a lead surrogate, then it is found only if these surrogates stand alone in the text. Otherwise, the substring edge units would be matched against halves of surrogate pairs.

Details
Parameters
s
The string to search (NUL-terminated).
substring
The substring to find (NUL-terminated).
Returns
A pointer to the last occurrence of substring in s, or s itself if the substring is empty, or NULL if substring is not in s.

See also: u_strstr See also: u_strFindFirst See also: u_strFindLast

u_strspn

Declared in unicode/ustring.h
U_CAPI int32_t U_EXPORT2 u_strspn(
  const UChar *string,
  const UChar *matchSet
)

Returns the number of consecutive characters in string, beginning with the first, that occur somewhere in matchSet.

Works just like C's strspn but with Unicode.

See also: u_strcspn

Details
Parameters
string
The string in which to search, NUL-terminated.
matchSet
A NUL-terminated string defining a set of code points for which to search in the text string.
Returns
The number of initial characters in string that do occur in matchSet.

u_strstr

Declared in unicode/ustring.h
U_CAPIUChar *U_EXPORT2 u_strstr(
  const UChar *s,
  const UChar *substring
)

Find the first occurrence of a substring in a string.

The substring is found at code point boundaries. That means that if the substring begins with a trail surrogate or ends with a lead surrogate, then it is found only if these surrogates stand alone in the text. Otherwise, the substring edge units would be matched against halves of surrogate pairs.

Details
Parameters
s
The string to search (NUL-terminated).
substring
The substring to find (NUL-terminated).
Returns
A pointer to the first occurrence of substring in s, or s itself if the substring is empty, or NULL if substring is not in s.

See also: u_strrstr See also: u_strFindFirst See also: u_strFindLast

u_strtok_r

Declared in unicode/ustring.h
U_CAPIUChar *U_EXPORT2 u_strtok_r(
  UChar *src,
  const UChar *delim,
  UChar **saveState
)

The string tokenizer API allows an application to break a string into tokens.

Unlike strtok(), the saveState (the current pointer within the original string) is maintained in saveState. In the first call, the argument src is a pointer to the string. In subsequent calls to return successive tokens of that string, src must be specified as NULL. The value saveState is set by this function to maintain the function's position within the string, and on each subsequent call you must give this argument the same variable. This function does handle surrogate pairs. This function is similar to the strtok_r() the POSIX Threads Extension (1003.1c-1995) version.

Details
Parameters
src
String containing token(s). This string will be modified. After the first call to u_strtok_r(), this argument must be NULL to get to the next token.
delim
Set of delimiter characters (Unicode code points).
saveState
The current pointer within the original string, which is set by this function. The saveState parameter should the address of a local variable of type UChar *. (i.e. defined "UChar *myLocalSaveState" and use &myLocalSaveState for this parameter).
Returns
A pointer to the next token found in src, or NULL when there are no more tokens.

u_tolower

Declared in unicode/uchar.h
U_CAPIUChar32 U_EXPORT2 u_tolower(
  UChar32 c
)

The given character is mapped to its lowercase equivalent according to UnicodeData.txt; if the character has no lowercase equivalent, the character itself is returned.

Same as java.lang.Character.toLowerCase().

This function only returns the simple, single-code point case mapping. Full case mappings should be used whenever possible because they produce better results by working on whole strings. They take into account the string context and the language and can map to a result string with a different length as appropriate. Full case mappings are applied by the string case mapping functions, see ustring.h and the UnicodeString class. See also the User Guide chapter on C/POSIX migration: https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings

Details
Parameters
c
the code point to be mapped
Returns
the Simple_Lowercase_Mapping of the code point, if any; otherwise the code point itself.

u_totitle

Declared in unicode/uchar.h
U_CAPIUChar32 U_EXPORT2 u_totitle(
  UChar32 c
)

The given character is mapped to its titlecase equivalent according to UnicodeData.txt; if none is defined, the character itself is returned.

Same as java.lang.Character.toTitleCase().

This function only returns the simple, single-code point case mapping. Full case mappings should be used whenever possible because they produce better results by working on whole strings. They take into account the string context and the language and can map to a result string with a different length as appropriate. Full case mappings are applied by the string case mapping functions, see ustring.h and the UnicodeString class. See also the User Guide chapter on C/POSIX migration: https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings

Details
Parameters
c
the code point to be mapped
Returns
the Simple_Titlecase_Mapping of the code point, if any; otherwise the code point itself.

u_toupper

Declared in unicode/uchar.h
U_CAPIUChar32 U_EXPORT2 u_toupper(
  UChar32 c
)

The given character is mapped to its uppercase equivalent according to UnicodeData.txt; if the character has no uppercase equivalent, the character itself is returned.

Same as java.lang.Character.toUpperCase().

This function only returns the simple, single-code point case mapping. Full case mappings should be used whenever possible because they produce better results by working on whole strings. They take into account the string context and the language and can map to a result string with a different length as appropriate. Full case mappings are applied by the string case mapping functions, see ustring.h and the UnicodeString class. See also the User Guide chapter on C/POSIX migration: https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings

Details
Parameters
c
the code point to be mapped
Returns
the Simple_Uppercase_Mapping of the code point, if any; otherwise the code point itself.

u_versionToString

Declared in unicode/uversion.h
U_CAPI void U_EXPORT2 u_versionToString(
  const UVersionInfo versionArray,
  char *versionString
)

Write a string with dotted-decimal version information according to the input UVersionInfo.

Definition of this function lives in putil.c

Details
Parameters
versionArray
The version information to be written as a string.
versionString
A string buffer that will be filled in with a string corresponding to the numeric version information in versionArray. The buffer size must be at least U_MAX_VERSION_STRING_LENGTH.

ubrk_clone

Declared in unicode/ubrk.h
U_CAPIUBreakIterator *U_EXPORT2 ubrk_clone(
  const UBreakIterator *bi,
  UErrorCode *status
)

Thread safe cloning operation.

Details
Parameters
bi
iterator to be cloned
status
to indicate whether the operation went on smoothly or there were errors
Returns
pointer to the new clone

ubrk_close

Declared in unicode/ubrk.h
U_CAPI void U_EXPORT2 ubrk_close(
  UBreakIterator *bi
)

Close a UBreakIterator.

Once closed, a UBreakIterator may no longer be used.

Details
Parameters
bi
The break iterator to close.

ubrk_countAvailable

Declared in unicode/ubrk.h
U_CAPI int32_t U_EXPORT2 ubrk_countAvailable(
  void
)

Determine how many locales have text breaking information available.

This function is most useful as determining the loop ending condition for calls to ubrk_getAvailable. See also:ubrk_getAvailable

Details
Returns
The number of locales for which text breaking information is available.

ubrk_current

Declared in unicode/ubrk.h
U_CAPI int32_t U_EXPORT2 ubrk_current(
  const UBreakIterator *bi
)

Determine the most recently-returned text boundary.

Details
Parameters
bi
The break iterator to use.
Returns
The character index most recently returned by ubrk_next, ubrk_previous, ubrk_first, or ubrk_last.

ubrk_first

Declared in unicode/ubrk.h
U_CAPI int32_t U_EXPORT2 ubrk_first(
  UBreakIterator *bi
)

Set the iterator position to zero, the start of the text being scanned.

See also: ubrk_last

Details
Parameters
bi
The break iterator to use.
Returns
The new iterator position (zero).

ubrk_following

Declared in unicode/ubrk.h
U_CAPI int32_t U_EXPORT2 ubrk_following(
  UBreakIterator *bi,
  int32_t offset
)

Advance the iterator to the first boundary following the specified offset.

The value returned is always greater than offset, or UBRK_DONE. See also:ubrk_preceding

Details
Parameters
bi
The break iterator to use.
offset
The offset to begin scanning.
Returns
The text boundary following offset, or UBRK_DONE.

ubrk_getAvailable

Declared in unicode/ubrk.h
U_CAPI const char *U_EXPORT2 ubrk_getAvailable(
  int32_t index
)

Get a locale for which text breaking information is available.

A UBreakIterator in a locale returned by this function will perform the correct text breaking for the locale. See also:ubrk_countAvailable

Details
Parameters
index
The index of the desired locale.
Returns
A locale for which number text breaking information is available, or 0 if none.

ubrk_getRuleStatus

Declared in unicode/ubrk.h
U_CAPI int32_t U_EXPORT2 ubrk_getRuleStatus(
  UBreakIterator *bi
)

Return the status from the break rule that determined the most recently returned break position.

The values appear in the rule source within brackets, {123}, for example. For rules that do not specify a status, a default value of 0 is returned.

For word break iterators, the possible values are defined in enum UWordBreak.

ubrk_getRuleStatusVec

Declared in unicode/ubrk.h
U_CAPI int32_t U_EXPORT2 ubrk_getRuleStatusVec(
  UBreakIterator *bi,
  int32_t *fillInVec,
  int32_t capacity,
  UErrorCode *status
)

Get the statuses from the break rules that determined the most recently returned break position.

The values appear in the rule source within brackets, {123}, for example. The default status value for rules that do not explicitly provide one is zero.

For word break iterators, the possible values are defined in enum UWordBreak.

Details
Parameters
bi
The break iterator to use
fillInVec
an array to be filled in with the status values.
capacity
the length of the supplied vector. A length of zero causes the function to return the number of status values, in the normal way, without attempting to store any values.
status
receives error codes.
Returns
The number of rule status values from rules that determined the most recent boundary returned by the break iterator.

ubrk_isBoundary

Declared in unicode/ubrk.h
U_CAPIUBool U_EXPORT2 ubrk_isBoundary(
  UBreakIterator *bi,
  int32_t offset
)

Returns true if the specified position is a boundary position.

As a side effect, leaves the iterator pointing to the first boundary position at or after "offset".

Details
Parameters
bi
The break iterator to use.
offset
the offset to check.
Returns
True if "offset" is a boundary position.

ubrk_last

Declared in unicode/ubrk.h
U_CAPI int32_t U_EXPORT2 ubrk_last(
  UBreakIterator *bi
)

Set the iterator position to the index immediately beyond the last character in the text being scanned.

This is not the same as the last character. See also:ubrk_first

Details
Parameters
bi
The break iterator to use.
Returns
The character offset immediately beyond the last character in the text being scanned.

ubrk_next

Declared in unicode/ubrk.h
U_CAPI int32_t U_EXPORT2 ubrk_next(
  UBreakIterator *bi
)

Advance the iterator to the boundary following the current boundary.

See also: ubrk_previous

Details
Parameters
bi
The break iterator to use.
Returns
The character index of the next text boundary, or UBRK_DONE if all text boundaries have been returned.

ubrk_open

Declared in unicode/ubrk.h
U_CAPIUBreakIterator *U_EXPORT2 ubrk_open(
  UBreakIteratorType type,
  const char *locale,
  const UChar *text,
  int32_t textLength,
  UErrorCode *status
)

Open a new UBreakIterator for locating text boundaries for a specified locale.

A UBreakIterator may be used for detecting character, line, word, and sentence breaks in text. See also:ubrk_openRules

Details
Parameters
type
The type of UBreakIterator to open: one of UBRK_CHARACTER, UBRK_WORD, UBRK_LINE, UBRK_SENTENCE
locale
The locale specifying the text-breaking conventions. Note that locale keys such as "lb" and "ss" may be used to modify text break behavior, see general discussion of BreakIterator C API.
text
The text to be iterated over. May be null, in which case ubrk_setText() is used to specify the text to be iterated.
textLength
The number of characters in text, or -1 if null-terminated.
status
A UErrorCode to receive any errors.
Returns
A UBreakIterator for the specified locale.

ubrk_preceding

Declared in unicode/ubrk.h
U_CAPI int32_t U_EXPORT2 ubrk_preceding(
  UBreakIterator *bi,
  int32_t offset
)

Set the iterator position to the first boundary preceding the specified offset.

The new position is always smaller than offset, or UBRK_DONE. See also:ubrk_following

Details
Parameters
bi
The break iterator to use.
offset
The offset to begin scanning.
Returns
The text boundary preceding offset, or UBRK_DONE.

ubrk_previous

Declared in unicode/ubrk.h
U_CAPI int32_t U_EXPORT2 ubrk_previous(
  UBreakIterator *bi
)

Set the iterator position to the boundary preceding the current boundary.

See also: ubrk_next

Details
Parameters
bi
The break iterator to use.
Returns
The character index of the preceding text boundary, or UBRK_DONE if all text boundaries have been returned.

ubrk_setText

Declared in unicode/ubrk.h
U_CAPI void U_EXPORT2 ubrk_setText(
  UBreakIterator *bi,
  const UChar *text,
  int32_t textLength,
  UErrorCode *status
)

Sets an existing iterator to point to a new piece of text.

The break iterator retains a pointer to the supplied text. The caller must not modify or delete the text while the BreakIterator retains the reference.

Details
Parameters
bi
The iterator to use
text
The text to be set
textLength
The length of the text
status
The error code

ubrk_setUText

Declared in unicode/ubrk.h
U_CAPI void U_EXPORT2 ubrk_setUText(
  UBreakIterator *bi,
  UText *text,
  UErrorCode *status
)

Sets an existing iterator to point to a new piece of text.

All index positions returned by break iterator functions are native indices from the UText. For example, when breaking UTF-8 encoded text, the break positions returned by ubrk_next, ubrk_previous, etc. will be UTF-8 string indices, not UTF-16 positions.

Details
Parameters
bi
The iterator to use
text
The text to be set. This function makes a shallow clone of the supplied UText. This means that the caller is free to immediately close or otherwise reuse the UText that was passed as a parameter, but that the underlying text itself must not be altered while being referenced by the break iterator.
status
The error code

ucol_clone

Declared in unicode/ucol.h
U_CAPIUCollator *U_EXPORT2 ucol_clone(
  const UCollator *coll,
  UErrorCode *status
)

Thread safe cloning operation.

The result is a clone of a given collator. See also:ucol_openSee also:ucol_openRulesSee also:ucol_close

Details
Parameters
coll
collator to be cloned
status
to indicate whether the operation went on smoothly or there were errors
Returns
pointer to the new clone

ucol_close

Declared in unicode/ucol.h
U_CAPI void U_EXPORT2 ucol_close(
  UCollator *coll
)

Close a UCollator.

Once closed, a UCollator should not be used. Every open collator should be closed. Otherwise, a memory leak will result. See also:ucol_openSee also:ucol_openRulesSee also:ucol_clone

Details
Parameters
coll
The UCollator to close.

ucol_countAvailable

Declared in unicode/ucol.h
U_CAPI int32_t U_EXPORT2 ucol_countAvailable(
  void
)

Determine how many locales have collation rules available.

This function is most useful as determining the loop ending condition for calls to ucol_getAvailable. See also:ucol_getAvailable

Details
Returns
The number of locales for which collation rules are available.

ucol_getAttribute

Declared in unicode/ucol.h
U_CAPIUColAttributeValue U_EXPORT2 ucol_getAttribute(
  const UCollator *coll,
  UColAttribute attr,
  UErrorCode *status
)

Universal attribute getter.

See also: UColAttribute See also: UColAttributeValue See also: ucol_setAttribute

Details
Parameters
coll
collator which attributes are to be changed
attr
attribute type
Parameters
status
to indicate whether the operation went on smoothly or there were errors
Returns
attribute value

ucol_getAvailable

Declared in unicode/ucol.h
U_CAPI const char *U_EXPORT2 ucol_getAvailable(
  int32_t localeIndex
)

Get a locale for which collation rules are available.

A UCollator in a locale returned by this function will perform the correct collation for the locale. See also:ucol_countAvailable

Details
Parameters
localeIndex
The index of the desired locale.
Returns
A locale for which collation rules are available, or 0 if none.

ucol_getDisplayName

Declared in unicode/ucol.h
U_CAPI int32_t U_EXPORT2 ucol_getDisplayName(
  const char *objLoc,
  const char *dispLoc,
  UChar *result,
  int32_t resultLength,
  UErrorCode *status
)

Get the display name for a UCollator.

The display name is suitable for presentation to a user.

Details
Parameters
objLoc
The locale of the collator in question.
dispLoc
The locale for display.
result
A pointer to a buffer to receive the attribute.
resultLength
The maximum size of result.
status
A pointer to a UErrorCode to receive any errors
Returns
The total buffer size needed; if greater than resultLength, the output was truncated.

ucol_getEquivalentReorderCodes

Declared in unicode/ucol.h
U_CAPI int32_t U_EXPORT2 ucol_getEquivalentReorderCodes(
  int32_t reorderCode,
  int32_t *dest,
  int32_t destCapacity,
  UErrorCode *pErrorCode
)

Retrieves the reorder codes that are grouped with the given reorder code.

Some reorder codes will be grouped and must reorder together. Beginning with ICU 55, scripts only reorder together if they are primary-equal, for example Hiragana and Katakana.

See also: ucol_setReorderCodes See also: ucol_getReorderCodes See also: UScriptCode See also: UColReorderCode

Details
Parameters
reorderCode
The reorder code to determine equivalence for.
dest
The array to fill with the script ordering.
destCapacity
The length of dest. If it is 0, then dest may be NULL and the function will only return the length of the result without writing any codes (pre-flighting).
pErrorCode
Must be a valid pointer to an error code value, which must not indicate a failure before the function call.
Returns
The number of reordering codes written to the dest array.

ucol_getFunctionalEquivalent

Declared in unicode/ucol.h
U_CAPI int32_t U_EXPORT2 ucol_getFunctionalEquivalent(
  char *result,
  int32_t resultCapacity,
  const char *keyword,
  const char *locale,
  UBool *isAvailable,
  UErrorCode *status
)

Return the functionally equivalent locale for the specified input locale, with respect to given keyword, for the collation service.

If two different input locale + keyword combinations produce the same result locale, then collators instantiated for these two different input locales will behave equivalently. The converse is not always true; two collators may in fact be equivalent, but return different results, due to internal details. The return result has no other meaning than that stated above, and implies nothing as to the relationship between the two locales. This is intended for use by applications who wish to cache collators, or otherwise reuse collators when possible. The functional equivalent may change over time. For more information, please see the Locales and Services section of the ICU User Guide.

Details
Parameters
result
fillin for the functionally equivalent result locale
resultCapacity
capacity of the fillin buffer
keyword
a particular keyword as enumerated by ucol_getKeywords.
locale
the specified input locale
isAvailable
if non-NULL, pointer to a fillin parameter that on return indicates whether the specified input locale was 'available' to the collation service. A locale is defined as 'available' if it physically exists within the collation locale data.
status
pointer to input-output error code
Returns
the actual buffer size needed for the locale. If greater than resultCapacity, the returned full name will be truncated and an error code will be returned.

ucol_getKeywordValues

Declared in unicode/ucol.h
U_CAPIUEnumeration *U_EXPORT2 ucol_getKeywordValues(
  const char *keyword,
  UErrorCode *status
)

Given a keyword, create a string enumeration of all values for that keyword that are currently in use.

Details
Parameters
keyword
a particular keyword as enumerated by ucol_getKeywords. If any other keyword is passed in, *status is set to U_ILLEGAL_ARGUMENT_ERROR.
status
input-output error code
Returns
a string enumeration over collation keyword values, or NULL upon error. The caller is responsible for closing the result.

ucol_getKeywordValuesForLocale

Declared in unicode/ucol.h
U_CAPIUEnumeration *U_EXPORT2 ucol_getKeywordValuesForLocale(
  const char *key,
  const char *locale,
  UBool commonlyUsed,
  UErrorCode *status
)

Given a key and a locale, returns an array of string values in a preferred order that would make a difference.

These are all and only those values where the open (creation) of the service with the locale formed from the input locale plus input keyword and that value has different behavior than creation with the input locale alone.

Details
Parameters
key
one of the keys supported by this service. For now, only "collation" is supported.
locale
the locale
commonlyUsed
if set to true it will return only commonly used values with the given locale in preferred order. Otherwise, it will return all the available values for the locale.
status
error status
Returns
a string enumeration over keyword values for the given key and the locale.

ucol_getKeywords

Declared in unicode/ucol.h
U_CAPIUEnumeration *U_EXPORT2 ucol_getKeywords(
  UErrorCode *status
)

Create a string enumerator of all possible keywords that are relevant to collation.

At this point, the only recognized keyword for this service is "collation".

Details
Parameters
status
input-output error code
Returns
a string enumeration over locale strings. The caller is responsible for closing the result.

ucol_getMaxVariable

Declared in unicode/ucol.h
U_CAPIUColReorderCode U_EXPORT2 ucol_getMaxVariable(
  const UCollator *coll
)

Returns the maximum reordering group whose characters are affected by UCOL_ALTERNATE_HANDLING.

See also: ucol_setMaxVariable

Details
Parameters
coll
the collator
Returns
the maximum variable reordering group.

ucol_getReorderCodes

Declared in unicode/ucol.h
U_CAPI int32_t U_EXPORT2 ucol_getReorderCodes(
  const UCollator *coll,
  int32_t *dest,
  int32_t destCapacity,
  UErrorCode *pErrorCode
)

Retrieves the reordering codes for this collator.

These reordering codes are a combination of UScript codes and UColReorderCode entries. See also:ucol_setReorderCodesSee also:ucol_getEquivalentReorderCodesSee also:UScriptCodeSee also:UColReorderCode

Details
Parameters
coll
The UCollator to query.
dest
The array to fill with the script ordering.
destCapacity
The length of dest. If it is 0, then dest may be NULL and the function will only return the length of the result without writing any codes (pre-flighting).
pErrorCode
Must be a valid pointer to an error code value, which must not indicate a failure before the function call.
Returns
The number of reordering codes written to the dest array.

ucol_getSortKey

Declared in unicode/ucol.h
U_CAPI int32_t U_EXPORT2 ucol_getSortKey(
  const UCollator *coll,
  const UChar *source,
  int32_t sourceLength,
  uint8_t *result,
  int32_t resultLength
)

Get a sort key for a string from a UCollator.

Sort keys may be compared using strcmp.

Note that sort keys are often less efficient than simply doing comparison. For more details, see the ICU User Guide.

Like ICU functions that write to an output buffer, the buffer contents is undefined if the buffer capacity (resultLength parameter) is too small. Unlike ICU functions that write a string to an output buffer, the terminating zero byte is counted in the sort key length. See also:ucol_keyHashCode

Details
Parameters
coll
The UCollator containing the collation rules.
source
The string to transform.
sourceLength
The length of source, or -1 if null-terminated.
result
A pointer to a buffer to receive the attribute.
resultLength
The maximum size of result.
Returns
The size needed to fully store the sort key. If there was an internal error generating the sort key, a zero value is returned.

ucol_getStrength

Declared in unicode/ucol.h
U_CAPIUCollationStrength U_EXPORT2 ucol_getStrength(
  const UCollator *coll
)

Get the collation strength used in a UCollator.

The strength influences how strings are compared. See also:ucol_setStrength

Details
Parameters
coll
The UCollator to query.
Returns
The collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY, UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL

ucol_mergeSortkeys

Declared in unicode/ucol.h
U_CAPI int32_t U_EXPORT2 ucol_mergeSortkeys(
  const uint8_t *src1,
  int32_t src1Length,
  const uint8_t *src2,
  int32_t src2Length,
  uint8_t *dest,
  int32_t destCapacity
)

Merges two sort keys.

The levels are merged with their corresponding counterparts (primaries with primaries, secondaries with secondaries etc.). Between the values from the same level a separator is inserted.

This is useful, for example, for combining sort keys from first and last names to sort such pairs. See http://www.unicode.org/reports/tr10/#Merging_Sort_Keys

The recommended way to achieve "merged" sorting is by concatenating strings with U+FFFE between them. The concatenation has the same sort order as the merged sort keys, but merge(getSortKey(str1), getSortKey(str2)) may differ from getSortKey(str1 + '\uFFFE' + str2). Using strings with U+FFFE may yield shorter sort keys.

For details about Sort Key Features see https://unicode-org.github.io/icu/userguide/collation/api#sort-key-features

It is possible to merge multiple sort keys by consecutively merging another one with the intermediate result.

The length of the merge result is the sum of the lengths of the input sort keys.

Example (uncompressed):

191B1D 01 050505 01 910505 00
1F2123 01 050505 01 910505 00
will be merged as
191B1D 02 1F2123 01 050505 02 050505 01 910505 02 910505 00

If the destination buffer is not big enough, then its contents are undefined. If any of source lengths are zero or any of the source pointers are NULL/undefined, the result is of size zero.

Details
Parameters
src1
the first sort key
src1Length
the length of the first sort key, including the zero byte at the end; can be -1 if the function is to find the length
src2
the second sort key
src2Length
the length of the second sort key, including the zero byte at the end; can be -1 if the function is to find the length
dest
the buffer where the merged sort key is written, can be NULL if destCapacity==0
destCapacity
the number of bytes in the dest buffer
Returns
the length of the merged sort key, src1Length+src2Length; can be larger than destCapacity, or 0 if an error occurs (only for illegal arguments), in which cases the contents of dest is undefined

ucol_open

Declared in unicode/ucol.h
U_CAPIUCollator *U_EXPORT2 ucol_open(
  const char *loc,
  UErrorCode *status
)

Open a UCollator for comparing strings.

For some languages, multiple collation types are available; for example, "de@collation=phonebook". Starting with ICU 54, collation attributes can be specified via locale keywords as well, in the old locale extension syntax ("el@colCaseFirst=upper") or in language tag syntax ("el-u-kf-upper"). See User Guide: Collation API.

The UCollator pointer is used in all the calls to the Collation service. After finished, collator must be disposed of by calling ucol_close. See also:ucol_openRulesSee also:ucol_cloneSee also:ucol_close

Details
Parameters
loc
The locale containing the required collation rules. Special values for locales can be passed in - if NULL is passed for the locale, the default locale collation rules will be used. If empty string ("") or "root" are passed, the root collator will be returned.
status
A pointer to a UErrorCode to receive any errors
Returns
A pointer to a UCollator, or 0 if an error occurred.

ucol_openAvailableLocales

Declared in unicode/ucol.h
U_CAPIUEnumeration *U_EXPORT2 ucol_openAvailableLocales(
  UErrorCode *status
)

Create a string enumerator of all locales for which a valid collator may be opened.

Details
Parameters
status
input-output error code
Returns
a string enumeration over locale strings. The caller is responsible for closing the result.

ucol_openRules

Declared in unicode/ucol.h
U_CAPIUCollator *U_EXPORT2 ucol_openRules(
  const UChar *rules,
  int32_t rulesLength,
  UColAttributeValue normalizationMode,
  UCollationStrength strength,
  UParseError *parseError,
  UErrorCode *status
)

Produce a UCollator instance according to the rules supplied.

The rules are used to change the default ordering, defined in the UCA in a process called tailoring. The resulting UCollator pointer can be used in the same way as the one obtained by ucol_open. See also:ucol_openSee also:ucol_cloneSee also:ucol_close

Details
Parameters
rules
A string describing the collation rules. For the syntax of the rules please see users guide.
rulesLength
The length of rules, or -1 if null-terminated.
normalizationMode
The normalization mode: One of UCOL_OFF (expect the text to not need normalization), UCOL_ON (normalize), or UCOL_DEFAULT (set the mode according to the rules)
strength
The default collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY, UCOL_TERTIARY, UCOL_IDENTICAL,UCOL_DEFAULT_STRENGTH - can be also set in the rules.
parseError
A pointer to UParseError to receive information about errors occurred during parsing. This argument can currently be set to NULL, but at users own risk. Please provide a real structure.
status
A pointer to a UErrorCode to receive any errors
Returns
A pointer to a UCollator. It is not guaranteed that NULL be returned in case of error - please use status argument to check for errors.

ucol_setAttribute

Declared in unicode/ucol.h
U_CAPI void U_EXPORT2 ucol_setAttribute(
  UCollator *coll,
  UColAttribute attr,
  UColAttributeValue value,
  UErrorCode *status
)

Universal attribute setter.

See also: UColAttribute See also: UColAttributeValue See also: ucol_getAttribute

Details
Parameters
coll
collator which attributes are to be changed
attr
attribute type
value
attribute value
status
to indicate whether the operation went on smoothly or there were errors

ucol_setMaxVariable

Declared in unicode/ucol.h
U_CAPI void U_EXPORT2 ucol_setMaxVariable(
  UCollator *coll,
  UColReorderCode group,
  UErrorCode *pErrorCode
)

Sets the variable top to the top of the specified reordering group.

The variable top determines the highest-sorting character which is affected by UCOL_ALTERNATE_HANDLING. If that attribute is set to UCOL_NON_IGNORABLE, then the variable top has no effect. See also:ucol_getMaxVariable

Details
Parameters
coll
the collator
group
one of UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_CURRENCY; or UCOL_REORDER_CODE_DEFAULT to restore the default max variable group
pErrorCode
Standard ICU error code. Its input value must pass the U_SUCCESS() test, or else the function returns immediately. Check for U_FAILURE() on output or use with function chaining. (See User Guide for details.)

ucol_setReorderCodes

Declared in unicode/ucol.h
U_CAPI void U_EXPORT2 ucol_setReorderCodes(
  UCollator *coll,
  const int32_t *reorderCodes,
  int32_t reorderCodesLength,
  UErrorCode *pErrorCode
)

Sets the reordering codes for this collator.

Collation reordering allows scripts and some other groups of characters to be moved relative to each other. This reordering is done on top of the DUCET/CLDR standard collation order. Reordering can specify groups to be placed at the start and/or the end of the collation order. These groups are specified using UScript codes and UColReorderCode entries.

By default, reordering codes specified for the start of the order are placed in the order given after several special non-script blocks. These special groups of characters are space, punctuation, symbol, currency, and digit. These special groups are represented with UColReorderCode entries. Script groups can be intermingled with these special non-script groups if those special groups are explicitly specified in the reordering.

The special code OTHERS stands for any script that is not explicitly mentioned in the list of reordering codes given. Anything that is after OTHERS will go at the very end of the reordering in the order given.

The special reorder code DEFAULT will reset the reordering for this collator to the default for this collator. The default reordering may be the DUCET/CLDR order or may be a reordering that was specified when this collator was created from resource data or from rules. The DEFAULT code must be the sole code supplied when it is used. If not, then U_ILLEGAL_ARGUMENT_ERROR will be set.

The special reorder code NONE will remove any reordering for this collator. The result of setting no reordering will be to have the DUCET/CLDR ordering used. The NONE code must be the sole code supplied when it is used.

See also: ucol_getReorderCodes See also: ucol_getEquivalentReorderCodes See also: UScriptCode See also: UColReorderCode

Details
Parameters
coll
The UCollator to set.
reorderCodes
An array of script codes in the new order. This can be NULL if the length is also set to 0. An empty array will clear any reordering codes on the collator.
reorderCodesLength
The length of reorderCodes.
pErrorCode
Must be a valid pointer to an error code value, which must not indicate a failure before the function call.

ucol_setStrength

Declared in unicode/ucol.h
U_CAPI void U_EXPORT2 ucol_setStrength(
  UCollator *coll,
  UCollationStrength strength
)

Set the collation strength used in a UCollator.

The strength influences how strings are compared. See also:ucol_getStrength

Details
Parameters
coll
The UCollator to set.
strength
The desired collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY, UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL, UCOL_DEFAULT

ucol_strcoll

Declared in unicode/ucol.h
U_CAPIUCollationResult U_EXPORT2 ucol_strcoll(
  const UCollator *coll,
  const UChar *source,
  int32_t sourceLength,
  const UChar *target,
  int32_t targetLength
)

Compare two strings.

The strings will be compared using the options already specified. See also:ucol_greater See also:ucol_greaterOrEqual See also:ucol_equal

Details
Parameters
coll
The UCollator containing the comparison rules.
source
The source string.
sourceLength
The length of source, or -1 if null-terminated.
target
The target string.
targetLength
The length of target, or -1 if null-terminated.
Returns
The result of comparing the strings; one of UCOL_EQUAL, UCOL_GREATER, UCOL_LESS

ucol_strcollUTF8

Declared in unicode/ucol.h
U_CAPIUCollationResult U_EXPORT2 ucol_strcollUTF8(
  const UCollator *coll,
  const char *source,
  int32_t sourceLength,
  const char *target,
  int32_t targetLength,
  UErrorCode *status
)

Compare two strings in UTF-8.

The strings will be compared using the options already specified. Note: When input string contains malformed a UTF-8 byte sequence, this function treats these bytes as REPLACEMENT CHARACTER (U+FFFD). See also:ucol_greater See also:ucol_greaterOrEqual See also:ucol_equal

Details
Parameters
coll
The UCollator containing the comparison rules.
source
The source UTF-8 string.
sourceLength
The length of source, or -1 if null-terminated.
target
The target UTF-8 string.
targetLength
The length of target, or -1 if null-terminated.
status
A pointer to a UErrorCode to receive any errors
Returns
The result of comparing the strings; one of UCOL_EQUAL, UCOL_GREATER, UCOL_LESS

uenum_close

Declared in unicode/uenum.h
U_CAPI void U_EXPORT2 uenum_close(
  UEnumeration *en
)

Disposes of resources in use by the iterator.

If en is NULL, does nothing. After this call, any char* or UChar* pointer returned by uenum_unext() or uenum_next() is invalid.

Details
Parameters
en
UEnumeration structure pointer

uenum_count

Declared in unicode/uenum.h
U_CAPI int32_t U_EXPORT2 uenum_count(
  UEnumeration *en,
  UErrorCode *status
)

Returns the number of elements that the iterator traverses.

If the iterator is out-of-sync with its service, status is set to U_ENUM_OUT_OF_SYNC_ERROR. This is a convenience function. It can end up being very expensive as all the items might have to be pre-fetched (depending on the type of data being traversed). Use with caution and only when necessary.

Details
Parameters
en
UEnumeration structure pointer
status
error code, can be U_ENUM_OUT_OF_SYNC_ERROR if the iterator is out of sync.
Returns
number of elements in the iterator

uenum_next

Declared in unicode/uenum.h
U_CAPI const char *U_EXPORT2 uenum_next(
  UEnumeration *en,
  int32_t *resultLength,
  UErrorCode *status
)

Returns the next element in the iterator's list.

If there are no more elements, returns NULL. If the iterator is out-of-sync with its service, status is set to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned. If the native service string is a UChar* string, it is converted to char* with the invariant converter. The result is terminated by (char)0. If the conversion fails (because a character cannot be converted) then status is set to U_INVARIANT_CONVERSION_ERROR and the return value is undefined (but non-NULL).

Details
Parameters
en
the iterator object
resultLength
pointer to receive the length of the result (not including the terminating \0). If the pointer is NULL it is ignored.
status
the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if the iterator is out of sync with its service. Set to U_INVARIANT_CONVERSION_ERROR if the underlying native string is UChar* and conversion to char* with the invariant converter fails. This error pertains only to current string, so iteration might be able to continue successfully.
Returns
a pointer to the string. The string will be zero-terminated. The return pointer is owned by this iterator and must not be deleted by the caller. The pointer is valid until the next call to any uenum_... method, including uenum_next() or uenum_unext(). When all strings have been traversed, returns NULL.

uenum_openCharStringsEnumeration

Declared in unicode/uenum.h
U_CAPIUEnumeration *U_EXPORT2 uenum_openCharStringsEnumeration(
  const char *const strings[],
  int32_t count,
  UErrorCode *ec
)

Given an array of const char* strings (invariant chars only), return a UEnumeration.

String pointers from 0..count-1 must not be null. Do not free or modify either the string array or the characters it points to until this object has been destroyed with uenum_close.

See also:uenum_close

Details
Parameters
strings
array of char* strings (each null terminated). All storage is owned by the caller.
count
length of the array
ec
error code
Returns
the new UEnumeration object. Caller is responsible for calling uenum_close to free memory

uenum_openUCharStringsEnumeration

Declared in unicode/uenum.h
U_CAPIUEnumeration *U_EXPORT2 uenum_openUCharStringsEnumeration(
  const UChar *const strings[],
  int32_t count,
  UErrorCode *ec
)

Given an array of const UChar* strings, return a UEnumeration.

String pointers from 0..count-1 must not be null. Do not free or modify either the string array or the characters it points to until this object has been destroyed with uenum_close.

See also:uenum_close

Details
Parameters
strings
array of const UChar* strings (each null terminated). All storage is owned by the caller.
count
length of the array
ec
error code
Returns
the new UEnumeration object. Caller is responsible for calling uenum_close to free memory.

uenum_reset

Declared in unicode/uenum.h
U_CAPI void U_EXPORT2 uenum_reset(
  UEnumeration *en,
  UErrorCode *status
)

Resets the iterator to the current list of service IDs.

This re-establishes sync with the service and rewinds the iterator to start at the first element.

Details
Parameters
en
the iterator object
status
the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if the iterator is out of sync with its service.

uenum_unext

Declared in unicode/uenum.h
U_CAPI const UChar *U_EXPORT2 uenum_unext(
  UEnumeration *en,
  int32_t *resultLength,
  UErrorCode *status
)

Returns the next element in the iterator's list.

If there are no more elements, returns NULL. If the iterator is out-of-sync with its service, status is set to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned. If the native service string is a char* string, it is converted to UChar* with the invariant converter. The result is terminated by (UChar)0.

Details
Parameters
en
the iterator object
resultLength
pointer to receive the length of the result (not including the terminating \0). If the pointer is NULL it is ignored.
status
the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if the iterator is out of sync with its service.
Returns
a pointer to the string. The string will be zero-terminated. The return pointer is owned by this iterator and must not be deleted by the caller. The pointer is valid until the next call to any uenum_... method, including uenum_next() or uenum_unext(). When all strings have been traversed, returns NULL.

uldn_close

Declared in unicode/uldnames.h
U_CAPI void U_EXPORT2 uldn_close(
  ULocaleDisplayNames *ldn
)

Closes a ULocaleDisplayNames instance obtained from uldn_open().

Details
Parameters
ldn
the ULocaleDisplayNames instance to be closed

uldn_getContext

Declared in unicode/uldnames.h
U_CAPIUDisplayContext U_EXPORT2 uldn_getContext(
  const ULocaleDisplayNames *ldn,
  UDisplayContextType type,
  UErrorCode *pErrorCode
)

Returns the UDisplayContext value for the specified UDisplayContextType.

Details
Parameters
ldn
the ULocaleDisplayNames instance
type
the UDisplayContextType whose value to return
pErrorCode
Pointer to UErrorCode input/output status. If at entry this indicates a failure status, the function will do nothing; otherwise this will be updated with any new status from the function.
Returns
the UDisplayContextValue for the specified type.

uldn_getDialectHandling

Declared in unicode/uldnames.h
U_CAPIUDialectHandling U_EXPORT2 uldn_getDialectHandling(
  const ULocaleDisplayNames *ldn
)

Returns the dialect handling used in the display names.

Details
Parameters
ldn
the LocaleDisplayNames instance
Returns
the dialect handling enum

uldn_getLocale

Declared in unicode/uldnames.h
U_CAPI const char *U_EXPORT2 uldn_getLocale(
  const ULocaleDisplayNames *ldn
)

Returns the locale used to determine the display names.

This is not necessarily the same locale passed to uldn_open.

Details
Parameters
ldn
the LocaleDisplayNames instance
Returns
the display locale

uldn_keyDisplayName

Declared in unicode/uldnames.h
U_CAPI int32_t U_EXPORT2 uldn_keyDisplayName(
  const ULocaleDisplayNames *ldn,
  const char *key,
  UChar *result,
  int32_t maxResultSize,
  UErrorCode *pErrorCode
)

Returns the display name of the provided locale key.

Details
Parameters
ldn
the LocaleDisplayNames instance
key
the locale key whose display name to return
result
receives the display name
maxResultSize
the size of the result buffer
pErrorCode
the status code
Returns
the actual buffer size needed for the display name. If it's greater than maxResultSize, the returned name will be truncated.

uldn_keyValueDisplayName

Declared in unicode/uldnames.h
U_CAPI int32_t U_EXPORT2 uldn_keyValueDisplayName(
  const ULocaleDisplayNames *ldn,
  const char *key,
  const char *value,
  UChar *result,
  int32_t maxResultSize,
  UErrorCode *pErrorCode
)

Returns the display name of the provided value (used with the provided key).

Details
Parameters
ldn
the LocaleDisplayNames instance
key
the locale key
value
the locale key's value
result
receives the display name
maxResultSize
the size of the result buffer
pErrorCode
the status code
Returns
the actual buffer size needed for the display name. If it's greater than maxResultSize, the returned name will be truncated.

uldn_languageDisplayName

Declared in unicode/uldnames.h
U_CAPI int32_t U_EXPORT2 uldn_languageDisplayName(
  const ULocaleDisplayNames *ldn,
  const char *lang,
  UChar *result,
  int32_t maxResultSize,
  UErrorCode *pErrorCode
)

Returns the display name of the provided language code.

Details
Parameters
ldn
the LocaleDisplayNames instance
lang
the language code whose display name to return
result
receives the display name
maxResultSize
the size of the result buffer
pErrorCode
the status code
Returns
the actual buffer size needed for the display name. If it's greater than maxResultSize, the returned name will be truncated.

uldn_localeDisplayName

Declared in unicode/uldnames.h
U_CAPI int32_t U_EXPORT2 uldn_localeDisplayName(
  const ULocaleDisplayNames *ldn,
  const char *locale,
  UChar *result,
  int32_t maxResultSize,
  UErrorCode *pErrorCode
)

Returns the display name of the provided locale.

Details
Parameters
ldn
the LocaleDisplayNames instance
locale
the locale whose display name to return
result
receives the display name
maxResultSize
the size of the result buffer
pErrorCode
the status code
Returns
the actual buffer size needed for the display name. If it's greater than maxResultSize, the returned name will be truncated.

uldn_open

Declared in unicode/uldnames.h
U_CAPIULocaleDisplayNames *U_EXPORT2 uldn_open(
  const char *locale,
  UDialectHandling dialectHandling,
  UErrorCode *pErrorCode
)

Returns an instance of LocaleDisplayNames that returns names formatted for the provided locale, using the provided dialectHandling.

The usual value for dialectHandling is ULOC_STANDARD_NAMES.

Details
Parameters
locale
the display locale
dialectHandling
how to select names for locales
Parameters
pErrorCode
the status code
Returns
a ULocaleDisplayNames instance

uldn_openForContext

Declared in unicode/uldnames.h
U_CAPIULocaleDisplayNames *U_EXPORT2 uldn_openForContext(
  const char *locale,
  UDisplayContext *contexts,
  int32_t length,
  UErrorCode *pErrorCode
)

Returns an instance of LocaleDisplayNames that returns names formatted for the provided locale, using the provided UDisplayContext settings.

Details
Parameters
locale
The display locale
contexts
List of one or more context settings (e.g. for dialect handling, capitalization, etc.
length
Number of items in the contexts list
pErrorCode
Pointer to UErrorCode input/output status. If at entry this indicates a failure status, the function will do nothing; otherwise this will be updated with any new status from the function.
Returns
a ULocaleDisplayNames instance

uldn_regionDisplayName

Declared in unicode/uldnames.h
U_CAPI int32_t U_EXPORT2 uldn_regionDisplayName(
  const ULocaleDisplayNames *ldn,
  const char *region,
  UChar *result,
  int32_t maxResultSize,
  UErrorCode *pErrorCode
)

Returns the display name of the provided region code.

Details
Parameters
ldn
the LocaleDisplayNames instance
region
the region code whose display name to return
result
receives the display name
maxResultSize
the size of the result buffer
pErrorCode
the status code
Returns
the actual buffer size needed for the display name. If it's greater than maxResultSize, the returned name will be truncated.

uldn_scriptCodeDisplayName

Declared in unicode/uldnames.h
U_CAPI int32_t U_EXPORT2 uldn_scriptCodeDisplayName(
  const ULocaleDisplayNames *ldn,
  UScriptCode scriptCode,
  UChar *result,
  int32_t maxResultSize,
  UErrorCode *pErrorCode
)

Returns the display name of the provided script code.

Details
Parameters
ldn
the LocaleDisplayNames instance
scriptCode
the script code whose display name to return
result
receives the display name
maxResultSize
the size of the result buffer
pErrorCode
the status code
Returns
the actual buffer size needed for the display name. If it's greater than maxResultSize, the returned name will be truncated.

uldn_scriptDisplayName

Declared in unicode/uldnames.h
U_CAPI int32_t U_EXPORT2 uldn_scriptDisplayName(
  const ULocaleDisplayNames *ldn,
  const char *script,
  UChar *result,
  int32_t maxResultSize,
  UErrorCode *pErrorCode
)

Returns the display name of the provided script.

Details
Parameters
ldn
the LocaleDisplayNames instance
script
the script whose display name to return
result
receives the display name
maxResultSize
the size of the result buffer
pErrorCode
the status code
Returns
the actual buffer size needed for the display name. If it's greater than maxResultSize, the returned name will be truncated.

uldn_variantDisplayName

Declared in unicode/uldnames.h
U_CAPI int32_t U_EXPORT2 uldn_variantDisplayName(
  const ULocaleDisplayNames *ldn,
  const char *variant,
  UChar *result,
  int32_t maxResultSize,
  UErrorCode *pErrorCode
)

Returns the display name of the provided variant.

Details
Parameters
ldn
the LocaleDisplayNames instance
variant
the variant whose display name to return
result
receives the display name
maxResultSize
the size of the result buffer
pErrorCode
the status code
Returns
the actual buffer size needed for the display name. If it's greater than maxResultSize, the returned name will be truncated.

uloc_acceptLanguage

Declared in unicode/uloc.h
U_CAPI int32_t U_EXPORT2 uloc_acceptLanguage(
  char *result,
  int32_t resultAvailable,
  UAcceptResult *outResult,
  const char **acceptList,
  int32_t acceptListCount,
  UEnumeration *availableLocales,
  UErrorCode *status
)

Based on a list of available locales, determine an acceptable locale for the user.

This is a thin wrapper over C++ class LocaleMatcher.

Details
Parameters
result
- buffer to accept the result locale
resultAvailable
the size of the result buffer.
outResult
- An out parameter that contains the fallback status
acceptList
- list of acceptable languages
acceptListCount
- count of acceptList items
availableLocales
- list of available locales to match
status
ICU error code. Its input value must pass the U_SUCCESS() test, or else the function returns immediately. Check for U_FAILURE() on output or use with function chaining. (See User Guide for details.)
Returns
length needed for the locale.

uloc_addLikelySubtags

Declared in unicode/uloc.h
U_CAPI int32_t U_EXPORT2 uloc_addLikelySubtags(
  const char *localeID,
  char *maximizedLocaleID,
  int32_t maximizedLocaleIDCapacity,
  UErrorCode *err
)

Add the likely subtags for a provided locale ID, per the algorithm described in the following CLDR technical report:

http://www.unicode.org/reports/tr35/#Likely_Subtags

If localeID is already in the maximal form, or there is no data available for maximization, it will be copied to the output buffer. For example, "sh" cannot be maximized, since there is no reasonable maximization.

Examples:

"und_Zzzz" maximizes to "en_Latn_US"

"en" maximizes to "en_Latn_US"

"de" maximizes to "de_Latn_DE"

"sr" maximizes to "sr_Cyrl_RS"

"zh_Hani" maximizes to "zh_Hani_CN"

Details
Parameters
localeID
The locale to maximize
maximizedLocaleID
The maximized locale
maximizedLocaleIDCapacity
The capacity of the maximizedLocaleID buffer
err
Error information if maximizing the locale failed. If the length of the localeID and the null-terminator is greater than the maximum allowed size, or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
Returns
The actual buffer size needed for the maximized locale. If it's greater than maximizedLocaleIDCapacity, the returned ID will be truncated. On error, the return value is -1.

uloc_canonicalize

Declared in unicode/uloc.h
U_CAPI int32_t U_EXPORT2 uloc_canonicalize(
  const char *localeID,
  char *name,
  int32_t nameCapacity,
  UErrorCode *err
)

Gets the full name for the specified locale.

Note: This has the effect of 'canonicalizing' the string to a certain extent. Upper and lower case are set as needed, and if the components were in 'POSIX' format they are changed to ICU format. It does NOT map aliased names in any way. See the top of this header file.

Details
Parameters
localeID
the locale to get the full name with
name
the full name for localeID
nameCapacity
the size of the name buffer to store the full name with
err
error information if retrieving the full name failed
Returns
the actual buffer size needed for the full name. If it's greater than nameCapacity, the returned full name will be truncated.

uloc_countAvailable

Declared in unicode/uloc.h
U_CAPI int32_t U_EXPORT2 uloc_countAvailable(
  void
)

Gets the size of the all available locale list.

Details
Returns
the size of the locale list

uloc_forLanguageTag

Declared in unicode/uloc.h
U_CAPI int32_t U_EXPORT2 uloc_forLanguageTag(
  const char *langtag,
  char *localeID,
  int32_t localeIDCapacity,
  int32_t *parsedLength,
  UErrorCode *err
)

Returns a locale ID for the specified BCP47 language tag string.

If the specified language tag contains any ill-formed subtags, the first such subtag and all following subtags are ignored.

This implements the 'Language-Tag' production of BCP 47, and so supports legacy language tags (marked as “Type: grandfathered” in BCP 47) (regular and irregular) as well as private use language tags.

Private use tags are represented as 'x-whatever', and legacy tags are converted to their canonical replacements where they exist.

Note that a few legacy tags have no modern replacement; these will be converted using the fallback described in the first paragraph, so some information might be lost.

Details
Parameters
langtag
the input BCP47 language tag.
localeID
the output buffer receiving a locale ID for the specified BCP47 language tag.
localeIDCapacity
the size of the locale ID output buffer.
parsedLength
if not NULL, successfully parsed length for the input language tag is set.
err
error information if receiving the locald ID failed.
Returns
the length of the locale ID.

uloc_getAvailable

Declared in unicode/uloc.h
U_CAPI const char *U_EXPORT2 uloc_getAvailable(
  int32_t n
)

Gets the specified locale from a list of available locales.

This method corresponds to uloc_openAvailableByType called with the ULOC_AVAILABLE_DEFAULT type argument.

The return value is a pointer to an item of a locale name array. Both this array and the pointers it contains are owned by ICU and should not be deleted or written through by the caller. The locale name is terminated by a null pointer.

Details
Parameters
n
the specific locale name index of the available locale list; should not exceed the number returned by uloc_countAvailable.
Returns
a specified locale name of all available locales

uloc_getBaseName

Declared in unicode/uloc.h
U_CAPI int32_t U_EXPORT2 uloc_getBaseName(
  const char *localeID,
  char *name,
  int32_t nameCapacity,
  UErrorCode *err
)

Gets the full name for the specified locale, like uloc_getName(), but without keywords.

Note: This has the effect of 'canonicalizing' the string to a certain extent. Upper and lower case are set as needed, and if the components were in 'POSIX' format they are changed to ICU format. It does NOT map aliased names in any way. See the top of this header file.

This API strips off the keyword part, so "de_DE\@collation=phonebook" will become "de_DE". This API supports preflighting.

Details
Parameters
localeID
the locale to get the full name with
name
fill in buffer for the name without keywords.
nameCapacity
capacity of the fill in buffer.
err
error information if retrieving the full name failed
Returns
the actual buffer size needed for the full name. If it's greater than nameCapacity, the returned full name will be truncated.

uloc_getCharacterOrientation

Declared in unicode/uloc.h
U_CAPIULayoutType U_EXPORT2 uloc_getCharacterOrientation(
  const char *localeId,
  UErrorCode *status
)

Get the layout character orientation for the specified locale.

Details
Parameters
localeId
locale name
status
Error status
Returns
an enum indicating the layout orientation for characters.

uloc_getCountry

Declared in unicode/uloc.h
U_CAPI int32_t U_EXPORT2 uloc_getCountry(
  const char *localeID,
  char *country,
  int32_t countryCapacity,
  UErrorCode *err
)

Gets the country code for the specified locale.

This function may return with a failure error code for certain kinds of inputs but does not fully check for well-formed locale IDs / language tags.

Details
Parameters
localeID
the locale to get the country code with
country
the country code for localeID
countryCapacity
the size of the country buffer to store the country code with
err
error information if retrieving the country code failed
Returns
the actual buffer size needed for the country code. If it's greater than countryCapacity, the returned country code will be truncated.

uloc_getDefault

Declared in unicode/uloc.h
U_CAPI const char *U_EXPORT2 uloc_getDefault(
  void
)

Gets ICU's default locale.

The returned string is a snapshot in time, and will remain valid and unchanged even when uloc_setDefault() is called. The returned storage is owned by ICU, and must not be altered or deleted by the caller. On Android, uloc_setDefault() is not visible because the default Locale in ICU4C, ICU4J and java.util.Locale are synchronized. To set a default locale, call java.util.Locale::setDefault in java or by reverse JNI.

Details
Returns
the ICU default locale

uloc_getDisplayCountry

Declared in unicode/uloc.h
U_CAPI int32_t U_EXPORT2 uloc_getDisplayCountry(
  const char *locale,
  const char *displayLocale,
  UChar *country,
  int32_t countryCapacity,
  UErrorCode *status
)

Gets the country name suitable for display for the specified locale.

Warning: this is for the region part of a valid locale ID; it cannot just be the region code (like "FR"). To get the display name for a region alone, or for other options, use ULocaleDisplayNames instead.

Details
Parameters
locale
the locale to get the displayable country code with. NULL may be used to specify the default.
displayLocale
Specifies the locale to be used to display the name. In other words, if the locale's language code is "en", passing Locale::getFrench() for inLocale would result in "Anglais", while passing Locale::getGerman() for inLocale would result in "Englisch". NULL may be used to specify the default.
country
the displayable country code for localeID.
countryCapacity
the size of the country buffer to store the displayable country code with.
status
error information if retrieving the displayable country code failed. U_USING_DEFAULT_WARNING indicates that no data was found from the locale resources and a case canonicalized country code is placed into country as fallback.
Returns
the actual buffer size needed for the displayable country code. If it's greater than countryCapacity, the returned displayable country code will be truncated.

uloc_getDisplayKeyword

Declared in unicode/uloc.h
U_CAPI int32_t U_EXPORT2 uloc_getDisplayKeyword(
  const char *keyword,
  const char *displayLocale,
  UChar *dest,
  int32_t destCapacity,
  UErrorCode *status
)

Gets the keyword name suitable for display for the specified locale.

E.g: for the locale string de_DE@collation=PHONEBOOK, this API gets the display string for the keyword collation. Usage: UErrorCode status = U_ZERO_ERROR; const char* keyword =NULL; int32_t keywordLen = 0; int32_t keywordCount = 0; UChar displayKeyword[256]; int32_t displayKeywordLen = 0; UEnumeration* keywordEnum = uloc_openKeywords("de_DE@collation=PHONEBOOK;calendar=TRADITIONAL", &status); for(keywordCount = uenum_count(keywordEnum, &status); keywordCount > 0 ; keywordCount){ if(U_FAILURE(status)){ ...something went wrong so handle the error... break; } // the uenum_next returns NUL terminated string keyword = uenum_next(keywordEnum, &keywordLen, &status); displayKeywordLen = uloc_getDisplayKeyword(keyword, "en_US", displayKeyword, 256); ... do something interesting ..... } uenum_close(keywordEnum); See also:uloc_openKeywords

Details
Parameters
keyword
The keyword whose display string needs to be returned.
displayLocale
Specifies the locale to be used to display the name. In other words, if the locale's language code is "en", passing Locale::getFrench() for inLocale would result in "Anglais", while passing Locale::getGerman() for inLocale would result in "Englisch". NULL may be used to specify the default.
dest
the buffer to which the displayable keyword should be written.
destCapacity
The size of the buffer (number of UChars). If it is 0, then dest may be NULL and the function will only return the length of the result without writing any of the result string (pre-flighting).
status
error information if retrieving the displayable string failed. Should not be NULL and should not indicate failure on entry. U_USING_DEFAULT_WARNING indicates that no data was found from the locale resources and the keyword is placed into dest as fallback.
Returns
the actual buffer size needed for the displayable variant code.

uloc_getDisplayKeywordValue

Declared in unicode/uloc.h
U_CAPI int32_t U_EXPORT2 uloc_getDisplayKeywordValue(
  const char *locale,
  const char *keyword,
  const char *displayLocale,
  UChar *dest,
  int32_t destCapacity,
  UErrorCode *status
)

Gets the value of the keyword suitable for display for the specified locale.

E.g: for the locale string de_DE@collation=PHONEBOOK, this API gets the display string for PHONEBOOK, in the display locale, when "collation" is specified as the keyword.

Details
Parameters
locale
The locale to get the displayable variant code with. NULL may be used to specify the default.
keyword
The keyword for whose value should be used.
displayLocale
Specifies the locale to be used to display the name. In other words, if the locale's language code is "en", passing Locale::getFrench() for inLocale would result in "Anglais", while passing Locale::getGerman() for inLocale would result in "Englisch". NULL may be used to specify the default.
dest
the buffer to which the displayable keyword should be written.
destCapacity
The size of the buffer (number of UChars). If it is 0, then dest may be NULL and the function will only return the length of the result without writing any of the result string (pre-flighting).
status
error information if retrieving the displayable string failed. Should not be NULL and must not indicate failure on entry. U_USING_DEFAULT_WARNING indicates that no data was found from the locale resources and the value of the keyword is placed into dest as fallback.
Returns
the actual buffer size needed for the displayable variant code.

uloc_getDisplayLanguage

Declared in unicode/uloc.h
U_CAPI int32_t U_EXPORT2 uloc_getDisplayLanguage(
  const char *locale,
  const char *displayLocale,
  UChar *language,
  int32_t languageCapacity,
  UErrorCode *status
)

Gets the language name suitable for display for the specified locale.

Details
Parameters
locale
the locale to get the ISO language code with
displayLocale
Specifies the locale to be used to display the name. In other words, if the locale's language code is "en", passing Locale::getFrench() for inLocale would result in "Anglais", while passing Locale::getGerman() for inLocale would result in "Englisch".
language
the displayable language code for localeID
languageCapacity
the size of the language buffer to store the displayable language code with.
status
error information if retrieving the displayable language code failed. U_USING_DEFAULT_WARNING indicates that no data was found from the locale resources and a case canonicalized language code is placed into language as fallback.
Returns
the actual buffer size needed for the displayable language code. If it's greater than languageCapacity, the returned language code will be truncated.

uloc_getDisplayName

Declared in unicode/uloc.h
U_CAPI int32_t U_EXPORT2 uloc_getDisplayName(
  const char *localeID,
  const char *inLocaleID,
  UChar *result,
  int32_t maxResultSize,
  UErrorCode *err
)

Gets the full name suitable for display for the specified locale.

Details
Parameters
localeID
the locale to get the displayable name with. NULL may be used to specify the default.
inLocaleID
Specifies the locale to be used to display the name. In other words, if the locale's language code is "en", passing Locale::getFrench() for inLocale would result in "Anglais", while passing Locale::getGerman() for inLocale would result in "Englisch". NULL may be used to specify the default.
result
the displayable name for localeID
maxResultSize
the size of the name buffer to store the displayable full name with
err
error information if retrieving the displayable name failed
Returns
the actual buffer size needed for the displayable name. If it's greater than maxResultSize, the returned displayable name will be truncated.

uloc_getDisplayScript

Declared in unicode/uloc.h
U_CAPI int32_t U_EXPORT2 uloc_getDisplayScript(
  const char *locale,
  const char *displayLocale,
  UChar *script,
  int32_t scriptCapacity,
  UErrorCode *status
)

Gets the script name suitable for display for the specified locale.

Details
Parameters
locale
the locale to get the displayable script code with. NULL may be used to specify the default.
displayLocale
Specifies the locale to be used to display the name. In other words, if the locale's language code is "en", passing Locale::getFrench() for inLocale would result in "", while passing Locale::getGerman() for inLocale would result in "". NULL may be used to specify the default.
script
the displayable script for the localeID.
scriptCapacity
the size of the script buffer to store the displayable script code with.
status
error information if retrieving the displayable script code failed. U_USING_DEFAULT_WARNING indicates that no data was found from the locale resources and a case canonicalized script code is placed into script as fallback.
Returns
the actual buffer size needed for the displayable script code. If it's greater than scriptCapacity, the returned displayable script code will be truncated.

uloc_getDisplayVariant

Declared in unicode/uloc.h
U_CAPI int32_t U_EXPORT2 uloc_getDisplayVariant(
  const char *locale,
  const char *displayLocale,
  UChar *variant,
  int32_t variantCapacity,
  UErrorCode *status
)

Gets the variant name suitable for display for the specified locale.

Details
Parameters
locale
the locale to get the displayable variant code with. NULL may be used to specify the default.
displayLocale
Specifies the locale to be used to display the name. In other words, if the locale's language code is "en", passing Locale::getFrench() for inLocale would result in "Anglais", while passing Locale::getGerman() for inLocale would result in "Englisch". NULL may be used to specify the default.
variant
the displayable variant code for localeID.
variantCapacity
the size of the variant buffer to store the displayable variant code with.
status
error information if retrieving the displayable variant code failed. U_USING_DEFAULT_WARNING indicates that no data was found from the locale resources and a case canonicalized variant code is placed into variant as fallback.
Returns
the actual buffer size needed for the displayable variant code. If it's greater than variantCapacity, the returned displayable variant code will be truncated.

uloc_getISO3Country

Declared in unicode/uloc.h
U_CAPI const char *U_EXPORT2 uloc_getISO3Country(
  const char *localeID
)

Gets the ISO country code for the specified locale.

Details
Parameters
localeID
the locale to get the ISO country code with
Returns
country the ISO country code for localeID

uloc_getISO3Language

Declared in unicode/uloc.h
U_CAPI const char *U_EXPORT2 uloc_getISO3Language(
  const char *localeID
)

Gets the ISO language code for the specified locale.

Details
Parameters
localeID
the locale to get the ISO language code with
Returns
language the ISO language code for localeID

uloc_getISOCountries

Declared in unicode/uloc.h
U_CAPI const char *const *U_EXPORT2 uloc_getISOCountries(
  void
)

Gets a list of all available 2-letter country codes defined in ISO 639.

This is a pointer to an array of pointers to arrays of char. All of these pointers are owned by ICU do not delete them, and do not write through them. The array is terminated with a null pointer.

Details
Returns
a list of all available country codes

uloc_getISOLanguages

Declared in unicode/uloc.h
U_CAPI const char *const *U_EXPORT2 uloc_getISOLanguages(
  void
)

Gets a list of all available 2-letter language codes defined in ISO 639, plus additional 3-letter codes determined to be useful for locale generation as defined by Unicode CLDR.

This is a pointer to an array of pointers to arrays of char. All of these pointers are owned by ICU do not delete them, and do not write through them. The array is terminated with a null pointer.

Details
Returns
a list of all available language codes

uloc_getKeywordValue

Declared in unicode/uloc.h
U_CAPI int32_t U_EXPORT2 uloc_getKeywordValue(
  const char *localeID,
  const char *keywordName,
  char *buffer,
  int32_t bufferCapacity,
  UErrorCode *status
)

Get the value for a keyword.

Locale name does not need to be normalized.

Details
Parameters
localeID
locale name containing the keyword ("de_DE@currency=EURO;collation=PHONEBOOK")
keywordName
name of the keyword for which we want the value; must not be NULL or empty, and must consist only of [A-Za-z0-9]. Case insensitive.
buffer
receiving buffer
bufferCapacity
capacity of receiving buffer
status
containing error code: e.g. buffer not big enough or ill-formed localeID or keywordName parameters.
Returns
the length of keyword value

uloc_getLanguage

Declared in unicode/uloc.h
U_CAPI int32_t U_EXPORT2 uloc_getLanguage(
  const char *localeID,
  char *language,
  int32_t languageCapacity,
  UErrorCode *err
)

Gets the language code for the specified locale.

This function may return with a failure error code for certain kinds of inputs but does not fully check for well-formed locale IDs / language tags.

Details
Parameters
localeID
the locale to get the ISO language code with
language
the language code for localeID
languageCapacity
the size of the language buffer to store the language code with
err
error information if retrieving the language code failed
Returns
the actual buffer size needed for the language code. If it's greater than languageCapacity, the returned language code will be truncated.

uloc_getLineOrientation

Declared in unicode/uloc.h
U_CAPIULayoutType U_EXPORT2 uloc_getLineOrientation(
  const char *localeId,
  UErrorCode *status
)

Get the layout line orientation for the specified locale.

Details
Parameters
localeId
locale name
status
Error status
Returns
an enum indicating the layout orientation for lines.

uloc_getName

Declared in unicode/uloc.h
U_CAPI int32_t U_EXPORT2 uloc_getName(
  const char *localeID,
  char *name,
  int32_t nameCapacity,
  UErrorCode *err
)

Gets the full name for the specified locale.

This function may return with a failure error code for certain kinds of inputs but does not fully check for well-formed locale IDs / language tags.

Note: This has the effect of 'canonicalizing' the ICU locale ID to a certain extent. Upper and lower case are set as needed. It does NOT map aliased names in any way. See the top of this header file. This API supports preflighting.

Details
Parameters
localeID
the locale to get the full name with
name
fill in buffer for the name without keywords.
nameCapacity
capacity of the fill in buffer.
err
error information if retrieving the full name failed
Returns
the actual buffer size needed for the full name. If it's greater than nameCapacity, the returned full name will be truncated.

uloc_getScript

Declared in unicode/uloc.h
U_CAPI int32_t U_EXPORT2 uloc_getScript(
  const char *localeID,
  char *script,
  int32_t scriptCapacity,
  UErrorCode *err
)

Gets the script code for the specified locale.

This function may return with a failure error code for certain kinds of inputs but does not fully check for well-formed locale IDs / language tags.

Details
Parameters
localeID
the locale to get the ISO language code with
script
the language code for localeID
scriptCapacity
the size of the language buffer to store the language code with
err
error information if retrieving the language code failed
Returns
the actual buffer size needed for the language code. If it's greater than scriptCapacity, the returned language code will be truncated.

uloc_getVariant

Declared in unicode/uloc.h
U_CAPI int32_t U_EXPORT2 uloc_getVariant(
  const char *localeID,
  char *variant,
  int32_t variantCapacity,
  UErrorCode *err
)

Gets the variant code for the specified locale.

This function may return with a failure error code for certain kinds of inputs but does not fully check for well-formed locale IDs / language tags.

Details
Parameters
localeID
the locale to get the variant code with
variant
the variant code for localeID
variantCapacity
the size of the variant buffer to store the variant code with
err
error information if retrieving the variant code failed
Returns
the actual buffer size needed for the variant code. If it's greater than variantCapacity, the returned variant code will be truncated.

uloc_isRightToLeft

Declared in unicode/uloc.h
U_CAPIUBool U_EXPORT2 uloc_isRightToLeft(
  const char *locale
)

Returns whether the locale's script is written right-to-left.

If there is no script subtag, then the likely script is used, see uloc_addLikelySubtags(). If no likely script is known, then false is returned.

A script is right-to-left according to the CLDR script metadata which corresponds to whether the script's letters have Bidi_Class=R or AL.

Returns true for "ar" and "en-Hebr", false for "zh" and "fa-Cyrl".

Details
Parameters
locale
input locale ID
Returns
true if the locale's script is written right-to-left

uloc_minimizeSubtags

Declared in unicode/uloc.h
U_CAPI int32_t U_EXPORT2 uloc_minimizeSubtags(
  const char *localeID,
  char *minimizedLocaleID,
  int32_t minimizedLocaleIDCapacity,
  UErrorCode *err
)

Minimize the subtags for a provided locale ID, per the algorithm described in the following CLDR technical report:

http://www.unicode.org/reports/tr35/#Likely_Subtags

If localeID is already in the minimal form, or there is no data available for minimization, it will be copied to the output buffer. Since the minimization algorithm relies on proper maximization, see the comments for uloc_addLikelySubtags for reasons why there might not be any data.

Examples:

"en_Latn_US" minimizes to "en"

"de_Latn_US" minimizes to "de"

"sr_Cyrl_RS" minimizes to "sr"

"zh_Hant_TW" minimizes to "zh_TW" (The region is preferred to the script, and minimizing to "zh" would imply "zh_Hans_CN".)

Details
Parameters
localeID
The locale to minimize
minimizedLocaleID
The minimized locale
minimizedLocaleIDCapacity
The capacity of the minimizedLocaleID buffer
err
Error information if minimizing the locale failed. If the length of the localeID and the null-terminator is greater than the maximum allowed size, or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
Returns
The actual buffer size needed for the minimized locale. If it's greater than minimizedLocaleIDCapacity, the returned ID will be truncated. On error, the return value is -1.

uloc_openKeywords

Declared in unicode/uloc.h
U_CAPIUEnumeration *U_EXPORT2 uloc_openKeywords(
  const char *localeID,
  UErrorCode *status
)

Gets an enumeration of keywords for the specified locale.

Enumeration must get disposed of by the client using uenum_close function.

Details
Parameters
localeID
the locale to get the variant code with
status
error information if retrieving the keywords failed
Returns
enumeration of keywords or NULL if there are no keywords.

uloc_setKeywordValue

Declared in unicode/uloc.h
U_CAPI int32_t U_EXPORT2 uloc_setKeywordValue(
  const char *keywordName,
  const char *keywordValue,
  char *buffer,
  int32_t bufferCapacity,
  UErrorCode *status
)

Sets or removes the value of the specified keyword.

For removing all keywords, use uloc_getBaseName().

NOTE: Unlike almost every other ICU function which takes a buffer, this function will NOT truncate the output text, and will not update the buffer with unterminated text setting a status of U_STRING_NOT_TERMINATED_WARNING. If a BUFFER_OVERFLOW_ERROR is received, it means a terminated version of the updated locale ID would not fit in the buffer, and the original buffer is untouched. This is done to prevent incorrect or possibly even malformed locales from being generated and used.

See also: uloc_getKeywordValue

Details
Parameters
keywordName
name of the keyword to be set; must not be NULL or empty, and must consist only of [A-Za-z0-9]. Case insensitive.
keywordValue
value of the keyword to be set. If 0-length or NULL, will result in the keyword being removed; no error is given if that keyword does not exist. Otherwise, must consist only of [A-Za-z0-9] and [/_+-].
buffer
input buffer containing well-formed locale ID to be modified.
bufferCapacity
capacity of receiving buffer
status
containing error code: e.g. buffer not big enough or ill-formed keywordName or keywordValue parameters, or ill-formed locale ID in buffer on input.
Returns
the length needed for the buffer

uloc_toLanguageTag

Declared in unicode/uloc.h
U_CAPI int32_t U_EXPORT2 uloc_toLanguageTag(
  const char *localeID,
  char *langtag,
  int32_t langtagCapacity,
  UBool strict,
  UErrorCode *err
)

Returns a well-formed language tag for this locale ID.

Note: When strict is false, any locale fields which do not satisfy the BCP47 syntax requirement will be omitted from the result. When strict is true, this function sets U_ILLEGAL_ARGUMENT_ERROR to the err if any locale fields do not satisfy the BCP47 syntax requirement.

Details
Parameters
localeID
the input locale ID
langtag
the output buffer receiving BCP47 language tag for the locale ID.
langtagCapacity
the size of the BCP47 language tag output buffer.
strict
boolean value indicating if the function returns an error for an ill-formed input locale ID.
err
error information if receiving the language tag failed.
Returns
The length of the BCP47 language tag.

uloc_toLegacyKey

Declared in unicode/uloc.h
U_CAPI const char *U_EXPORT2 uloc_toLegacyKey(
  const char *keyword
)

Converts the specified keyword (BCP 47 Unicode locale extension key, or legacy key) to the legacy key.

For example, legacy key "collation" is returned for the input BCP 47 Unicode locale extension key "co".

See also:toUnicodeLocaleKey

Details
Parameters
keyword
the input locale keyword (either BCP 47 Unicode locale extension key or legacy key).
Returns
the well-formed legacy key, or NULL if the specified keyword cannot be mapped to a well-formed legacy key.

uloc_toLegacyType

Declared in unicode/uloc.h
U_CAPI const char *U_EXPORT2 uloc_toLegacyType(
  const char *keyword,
  const char *value
)

Converts the specified keyword value (BCP 47 Unicode locale extension type, or legacy type or type alias) to the canonical legacy type.

For example, the legacy type "phonebook" is returned for the input BCP 47 Unicode locale extension type "phonebk" with the keyword "collation" (or "co").

When the specified keyword is not recognized, but the specified value satisfies the syntax of legacy key, or when the specified keyword allows 'variable' type and the specified value satisfies the syntax, then the pointer to the input type value itself will be returned. For example, uloc_toLegacyType("Foo", "Bar") returns "Bar", uloc_toLegacyType("vt", "00A4") returns "00A4".

See also:toUnicodeLocaleType

Details
Parameters
keyword
the locale keyword (either legacy keyword such as "collation" or BCP 47 Unicode locale extension key such as "co").
value
the locale keyword value (either BCP 47 Unicode locale extension type such as "phonebk" or legacy keyword value such as "phonebook").
Returns
the well-formed legacy type, or NULL if the specified keyword value cannot be mapped to a well-formed legacy type.

uloc_toUnicodeLocaleKey

Declared in unicode/uloc.h
U_CAPI const char *U_EXPORT2 uloc_toUnicodeLocaleKey(
  const char *keyword
)

Converts the specified keyword (legacy key, or BCP 47 Unicode locale extension key) to the equivalent BCP 47 Unicode locale extension key.

For example, BCP 47 Unicode locale extension key "co" is returned for the input keyword "collation".

When the specified keyword is unknown, but satisfies the BCP syntax, then the pointer to the input keyword itself will be returned. For example, uloc_toUnicodeLocaleKey("ZZ") returns "ZZ".

See also: uloc_toLegacyKey

Details
Parameters
keyword
the input locale keyword (either legacy key such as "collation" or BCP 47 Unicode locale extension key such as "co").
Returns
the well-formed BCP 47 Unicode locale extension key, or NULL if the specified locale keyword cannot be mapped to a well-formed BCP 47 Unicode locale extension key.

uloc_toUnicodeLocaleType

Declared in unicode/uloc.h
U_CAPI const char *U_EXPORT2 uloc_toUnicodeLocaleType(
  const char *keyword,
  const char *value
)

Converts the specified keyword value (legacy type, or BCP 47 Unicode locale extension type) to the well-formed BCP 47 Unicode locale extension type for the specified keyword (category).

For example, BCP 47 Unicode locale extension type "phonebk" is returned for the input keyword value "phonebook", with the keyword "collation" (or "co").

When the specified keyword is not recognized, but the specified value satisfies the syntax of the BCP 47 Unicode locale extension type, or when the specified keyword allows 'variable' type and the specified value satisfies the syntax, then the pointer to the input type value itself will be returned. For example, uloc_toUnicodeLocaleType("Foo", "Bar") returns "Bar", uloc_toUnicodeLocaleType("variableTop", "00A4") returns "00A4".

See also: uloc_toLegacyType

Details
Parameters
keyword
the locale keyword (either legacy key such as "collation" or BCP 47 Unicode locale extension key such as "co").
value
the locale keyword value (either legacy type such as "phonebook" or BCP 47 Unicode locale extension type such as "phonebk").
Returns
the well-formed BCP47 Unicode locale extension type, or NULL if the locale keyword value cannot be mapped to a well-formed BCP 47 Unicode locale extension type.

ulocdata_getCLDRVersion

Declared in unicode/ulocdata.h
U_CAPI void U_EXPORT2 ulocdata_getCLDRVersion(
  UVersionInfo versionArray,
  UErrorCode *status
)

Return the current CLDR version used by the library.

Details
Parameters
versionArray
fill-in that will receive the version number
status
error code - could be U_MISSING_RESOURCE_ERROR if the version was not found.

unorm2_append

Declared in unicode/unorm2.h
U_CAPI int32_t U_EXPORT2 unorm2_append(
  const UNormalizer2 *norm2,
  UChar *first,
  int32_t firstLength,
  int32_t firstCapacity,
  const UChar *second,
  int32_t secondLength,
  UErrorCode *pErrorCode
)

Appends the second string to the first string (merging them at the boundary) and returns the length of the first string.

The result is normalized if both the strings were normalized. The first and second strings must be different buffers.

Details
Parameters
norm2
UNormalizer2 instance
first
string, should be normalized
firstLength
length of the first string, or -1 if NUL-terminated
firstCapacity
number of UChars that can be written to first
second
string, should be normalized
secondLength
length of the source string, or -1 if NUL-terminated
pErrorCode
Standard ICU error code. Its input value must pass the U_SUCCESS() test, or else the function returns immediately. Check for U_FAILURE() on output or use with function chaining. (See User Guide for details.)
Returns
first

unorm2_close

Declared in unicode/unorm2.h
U_CAPI void U_EXPORT2 unorm2_close(
  UNormalizer2 *norm2
)

Closes a UNormalizer2 instance from unorm2_openFiltered().

Do not close instances from unorm2_getInstance()!

Details
Parameters
norm2
UNormalizer2 instance to be closed

unorm2_composePair

Declared in unicode/unorm2.h
U_CAPIUChar32 U_EXPORT2 unorm2_composePair(
  const UNormalizer2 *norm2,
  UChar32 a,
  UChar32 b
)

Performs pairwise composition of a & b and returns the composite if there is one.

Returns a composite code point c only if c has a two-way mapping to a+b. In standard Unicode normalization, this means that c has a canonical decomposition to a+b and c does not have the Full_Composition_Exclusion property.

This function is independent of the mode of the UNormalizer2.

Details
Parameters
norm2
UNormalizer2 instance
a
A (normalization starter) code point.
b
Another code point.
Returns
The non-negative composite code point if there is one; otherwise a negative value.

unorm2_getCombiningClass

Declared in unicode/unorm2.h
U_CAPI uint8_t U_EXPORT2 unorm2_getCombiningClass(
  const UNormalizer2 *norm2,
  UChar32 c
)

Gets the combining class of c.

The default implementation returns 0 but all standard implementations return the Unicode Canonical_Combining_Class value.

Details
Parameters
norm2
UNormalizer2 instance
c
code point
Returns
c's combining class

unorm2_getDecomposition

Declared in unicode/unorm2.h
U_CAPI int32_t U_EXPORT2 unorm2_getDecomposition(
  const UNormalizer2 *norm2,
  UChar32 c,
  UChar *decomposition,
  int32_t capacity,
  UErrorCode *pErrorCode
)

Gets the decomposition mapping of c.

Roughly equivalent to normalizing the String form of c on a UNORM2_DECOMPOSE UNormalizer2 instance, but much faster, and except that this function returns a negative value and does not write a string if c does not have a decomposition mapping in this instance's data. This function is independent of the mode of the UNormalizer2.

Details
Parameters
norm2
UNormalizer2 instance
c
code point
decomposition
String buffer which will be set to c's decomposition mapping, if there is one.
capacity
number of UChars that can be written to decomposition
pErrorCode
Standard ICU error code. Its input value must pass the U_SUCCESS() test, or else the function returns immediately. Check for U_FAILURE() on output or use with function chaining. (See User Guide for details.)
Returns
the non-negative length of c's decomposition, if there is one; otherwise a negative value

unorm2_getNFCInstance

Declared in unicode/unorm2.h
U_CAPI const UNormalizer2 *U_EXPORT2 unorm2_getNFCInstance(
  UErrorCode *pErrorCode
)

Returns a UNormalizer2 instance for Unicode NFC normalization.

Same as unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE, pErrorCode). Returns an unmodifiable singleton instance. Do not delete it.

Details
Parameters
pErrorCode
Standard ICU error code. Its input value must pass the U_SUCCESS() test, or else the function returns immediately. Check for U_FAILURE() on output or use with function chaining. (See User Guide for details.)
Returns
the requested Normalizer2, if successful

unorm2_getNFDInstance

Declared in unicode/unorm2.h
U_CAPI const UNormalizer2 *U_EXPORT2 unorm2_getNFDInstance(
  UErrorCode *pErrorCode
)

Returns a UNormalizer2 instance for Unicode NFD normalization.

Same as unorm2_getInstance(NULL, "nfc", UNORM2_DECOMPOSE, pErrorCode). Returns an unmodifiable singleton instance. Do not delete it.

Details
Parameters
pErrorCode
Standard ICU error code. Its input value must pass the U_SUCCESS() test, or else the function returns immediately. Check for U_FAILURE() on output or use with function chaining. (See User Guide for details.)
Returns
the requested Normalizer2, if successful

unorm2_getNFKCCasefoldInstance

Declared in unicode/unorm2.h
U_CAPI const UNormalizer2 *U_EXPORT2 unorm2_getNFKCCasefoldInstance(
  UErrorCode *pErrorCode
)

Returns a UNormalizer2 instance for Unicode toNFKC_Casefold() normalization which is equivalent to applying the NFKC_Casefold mappings and then NFC.

See https://www.unicode.org/reports/tr44/#NFKC_Casefold

Same as unorm2_getInstance(NULL, "nfkc_cf", UNORM2_COMPOSE, pErrorCode). Returns an unmodifiable singleton instance. Do not delete it.

Details
Parameters
pErrorCode
Standard ICU error code. Its input value must pass the U_SUCCESS() test, or else the function returns immediately. Check for U_FAILURE() on output or use with function chaining. (See User Guide for details.)
Returns
the requested Normalizer2, if successful

unorm2_getNFKCInstance

Declared in unicode/unorm2.h
U_CAPI const UNormalizer2 *U_EXPORT2 unorm2_getNFKCInstance(
  UErrorCode *pErrorCode
)

Returns a UNormalizer2 instance for Unicode NFKC normalization.

Same as unorm2_getInstance(NULL, "nfkc", UNORM2_COMPOSE, pErrorCode). Returns an unmodifiable singleton instance. Do not delete it.

Details
Parameters
pErrorCode
Standard ICU error code. Its input value must pass the U_SUCCESS() test, or else the function returns immediately. Check for U_FAILURE() on output or use with function chaining. (See User Guide for details.)
Returns
the requested Normalizer2, if successful

unorm2_getNFKDInstance

Declared in unicode/unorm2.h
U_CAPI const UNormalizer2 *U_EXPORT2 unorm2_getNFKDInstance(
  UErrorCode *pErrorCode
)

Returns a UNormalizer2 instance for Unicode NFKD normalization.

Same as unorm2_getInstance(NULL, "nfkc", UNORM2_DECOMPOSE, pErrorCode). Returns an unmodifiable singleton instance. Do not delete it.

Details
Parameters
pErrorCode
Standard ICU error code. Its input value must pass the U_SUCCESS() test, or else the function returns immediately. Check for U_FAILURE() on output or use with function chaining. (See User Guide for details.)
Returns
the requested Normalizer2, if successful

unorm2_getRawDecomposition

Declared in unicode/unorm2.h
U_CAPI int32_t U_EXPORT2 unorm2_getRawDecomposition(
  const UNormalizer2 *norm2,
  UChar32 c,
  UChar *decomposition,
  int32_t capacity,
  UErrorCode *pErrorCode
)

Gets the raw decomposition mapping of c.

This is similar to the unorm2_getDecomposition() function but returns the raw decomposition mapping as specified in UnicodeData.txt or (for custom data) in the mapping files processed by the gennorm2 tool. By contrast, unorm2_getDecomposition() returns the processed, recursively-decomposed version of this mapping.

When used on a standard NFKC Normalizer2 instance, unorm2_getRawDecomposition() returns the Unicode Decomposition_Mapping (dm) property.

When used on a standard NFC Normalizer2 instance, it returns the Decomposition_Mapping only if the Decomposition_Type (dt) is Canonical (Can); in this case, the result contains either one or two code points (=1..4 UChars).

This function is independent of the mode of the UNormalizer2.

Details
Parameters
norm2
UNormalizer2 instance
c
code point
decomposition
String buffer which will be set to c's raw decomposition mapping, if there is one.
capacity
number of UChars that can be written to decomposition
pErrorCode
Standard ICU error code. Its input value must pass the U_SUCCESS() test, or else the function returns immediately. Check for U_FAILURE() on output or use with function chaining. (See User Guide for details.)
Returns
the non-negative length of c's raw decomposition, if there is one; otherwise a negative value

unorm2_hasBoundaryAfter

Declared in unicode/unorm2.h
U_CAPIUBool U_EXPORT2 unorm2_hasBoundaryAfter(
  const UNormalizer2 *norm2,
  UChar32 c
)

Tests if the character always has a normalization boundary after it, regardless of context.

For details see the Normalizer2 base class documentation.

Details
Parameters
norm2
UNormalizer2 instance
c
character to test
Returns
true if c has a normalization boundary after it

unorm2_hasBoundaryBefore

Declared in unicode/unorm2.h
U_CAPIUBool U_EXPORT2 unorm2_hasBoundaryBefore(
  const UNormalizer2 *norm2,
  UChar32 c
)

Tests if the character always has a normalization boundary before it, regardless of context.

For details see the Normalizer2 base class documentation.

Details
Parameters
norm2
UNormalizer2 instance
c
character to test
Returns
true if c has a normalization boundary before it

unorm2_isInert

Declared in unicode/unorm2.h
U_CAPIUBool U_EXPORT2 unorm2_isInert(
  const UNormalizer2 *norm2,
  UChar32 c
)

Tests if the character is normalization-inert.

For details see the Normalizer2 base class documentation.

Details
Parameters
norm2
UNormalizer2 instance
c
character to test
Returns
true if c is normalization-inert

unorm2_isNormalized

Declared in unicode/unorm2.h
U_CAPIUBool U_EXPORT2 unorm2_isNormalized(
  const UNormalizer2 *norm2,
  const UChar *s,
  int32_t length,
  UErrorCode *pErrorCode
)

Tests if the string is normalized.

Internally, in cases where the quickCheck() method would return "maybe" (which is only possible for the two COMPOSE modes) this method resolves to "yes" or "no" to provide a definitive result, at the cost of doing more work in those cases.

Details
Parameters
norm2
UNormalizer2 instance
s
input string
length
length of the string, or -1 if NUL-terminated
pErrorCode
Standard ICU error code. Its input value must pass the U_SUCCESS() test, or else the function returns immediately. Check for U_FAILURE() on output or use with function chaining. (See User Guide for details.)
Returns
true if s is normalized

unorm2_normalize

Declared in unicode/unorm2.h
U_CAPI int32_t U_EXPORT2 unorm2_normalize(
  const UNormalizer2 *norm2,
  const UChar *src,
  int32_t length,
  UChar *dest,
  int32_t capacity,
  UErrorCode *pErrorCode
)

Writes the normalized form of the source string to the destination string (replacing its contents) and returns the length of the destination string.

The source and destination strings must be different buffers.

Details
Parameters
norm2
UNormalizer2 instance
src
source string
length
length of the source string, or -1 if NUL-terminated
dest
destination string; its contents is replaced with normalized src
capacity
number of UChars that can be written to dest
pErrorCode
Standard ICU error code. Its input value must pass the U_SUCCESS() test, or else the function returns immediately. Check for U_FAILURE() on output or use with function chaining. (See User Guide for details.)
Returns
dest

unorm2_normalizeSecondAndAppend

Declared in unicode/unorm2.h
U_CAPI int32_t U_EXPORT2 unorm2_normalizeSecondAndAppend(
  const UNormalizer2 *norm2,
  UChar *first,
  int32_t firstLength,
  int32_t firstCapacity,
  const UChar *second,
  int32_t secondLength,
  UErrorCode *pErrorCode
)

Appends the normalized form of the second string to the first string (merging them at the boundary) and returns the length of the first string.

The result is normalized if the first string was normalized. The first and second strings must be different buffers.

Details
Parameters
norm2
UNormalizer2 instance
first
string, should be normalized
firstLength
length of the first string, or -1 if NUL-terminated
firstCapacity
number of UChars that can be written to first
second
string, will be normalized
secondLength
length of the source string, or -1 if NUL-terminated
pErrorCode
Standard ICU error code. Its input value must pass the U_SUCCESS() test, or else the function returns immediately. Check for U_FAILURE() on output or use with function chaining. (See User Guide for details.)
Returns
first

unorm2_quickCheck

Declared in unicode/unorm2.h
U_CAPIUNormalizationCheckResult U_EXPORT2 unorm2_quickCheck(
  const UNormalizer2 *norm2,
  const UChar *s,
  int32_t length,
  UErrorCode *pErrorCode
)

Tests if the string is normalized.

For the two COMPOSE modes, the result could be "maybe" in cases that would take a little more work to resolve definitively. Use spanQuickCheckYes() and normalizeSecondAndAppend() for a faster combination of quick check + normalization, to avoid re-checking the "yes" prefix.

Details
Parameters
norm2
UNormalizer2 instance
s
input string
length
length of the string, or -1 if NUL-terminated
pErrorCode
Standard ICU error code. Its input value must pass the U_SUCCESS() test, or else the function returns immediately. Check for U_FAILURE() on output or use with function chaining. (See User Guide for details.)
Returns
UNormalizationCheckResult

unorm2_spanQuickCheckYes

Declared in unicode/unorm2.h
U_CAPI int32_t U_EXPORT2 unorm2_spanQuickCheckYes(
  const UNormalizer2 *norm2,
  const UChar *s,
  int32_t length,
  UErrorCode *pErrorCode
)

Returns the end of the normalized substring of the input string.

In other words, with end=spanQuickCheckYes(s, ec); the substring UnicodeString(s, 0, end) will pass the quick check with a "yes" result.

The returned end index is usually one or more characters before the "no" or "maybe" character: The end index is at a normalization boundary. (See the class documentation for more about normalization boundaries.)

When the goal is a normalized string and most input strings are expected to be normalized already, then call this method, and if it returns a prefix shorter than the input string, copy that prefix and use normalizeSecondAndAppend() for the remainder.

Details
Parameters
norm2
UNormalizer2 instance
s
input string
length
length of the string, or -1 if NUL-terminated
pErrorCode
Standard ICU error code. Its input value must pass the U_SUCCESS() test, or else the function returns immediately. Check for U_FAILURE() on output or use with function chaining. (See User Guide for details.)
Returns
"yes" span end index

uscript_breaksBetweenLetters

Declared in unicode/uscript.h
U_CAPIUBool U_EXPORT2 uscript_breaksBetweenLetters(
  UScriptCode script
)

Returns true if the script allows line breaks between letters (excluding hyphenation).

Such a script typically requires dictionary-based line breaking. For example, Hani and Thai.

Details
Parameters
script
script code
Returns
true if the script allows line breaks between letters

uscript_getCode

Declared in unicode/uscript.h
U_CAPI int32_t U_EXPORT2 uscript_getCode(
  const char *nameOrAbbrOrLocale,
  UScriptCode *fillIn,
  int32_t capacity,
  UErrorCode *err
)

Gets the script codes associated with the given locale or ISO 15924 abbreviation or name.

Fills in USCRIPT_MALAYALAM given "Malayam" OR "Mlym". Fills in USCRIPT_LATIN given "en" OR "en_US" If the required capacity is greater than the capacity of the destination buffer, then the error code is set to U_BUFFER_OVERFLOW_ERROR and the required capacity is returned.

Note: To search by short or long script alias only, use u_getPropertyValueEnum(UCHAR_SCRIPT, alias) instead. That does a fast lookup with no access of the locale data.

Details
Parameters
nameOrAbbrOrLocale
name of the script, as given in PropertyValueAliases.txt, or ISO 15924 code or locale
fillIn
the UScriptCode buffer to fill in the script code
capacity
the capacity (size) of UScriptCode buffer passed in.
err
the error status code.
Returns
The number of script codes filled in the buffer passed in

uscript_getName

Declared in unicode/uscript.h
U_CAPI const char *U_EXPORT2 uscript_getName(
  UScriptCode scriptCode
)

Returns the long Unicode script name, if there is one.

Otherwise returns the 4-letter ISO 15924 script code. Returns "Malayam" given USCRIPT_MALAYALAM.

Details
Parameters
scriptCode
UScriptCode enum
Returns
long script name as given in PropertyValueAliases.txt, or the 4-letter code, or NULL if scriptCode is invalid

uscript_getSampleString

Declared in unicode/uscript.h
U_CAPI int32_t U_EXPORT2 uscript_getSampleString(
  UScriptCode script,
  UChar *dest,
  int32_t capacity,
  UErrorCode *pErrorCode
)

Writes the script sample character string.

This string normally consists of one code point but might be longer. The string is empty if the script is not encoded.

Details
Parameters
script
script code
dest
output string array
capacity
number of UChars in the dest array
pErrorCode
standard ICU in/out error code, must pass U_SUCCESS() on input
Returns
the string length, even if U_BUFFER_OVERFLOW_ERROR

uscript_getScript

Declared in unicode/uscript.h
U_CAPIUScriptCode U_EXPORT2 uscript_getScript(
  UChar32 codepoint,
  UErrorCode *err
)

Gets the script code associated with the given codepoint.

Returns USCRIPT_MALAYALAM given 0x0D02

Details
Parameters
codepoint
UChar32 codepoint
err
the error status code.
Returns
The UScriptCode, or 0 if codepoint is invalid

uscript_getScriptExtensions

Declared in unicode/uscript.h
U_CAPI int32_t U_EXPORT2 uscript_getScriptExtensions(
  UChar32 c,
  UScriptCode *scripts,
  int32_t capacity,
  UErrorCode *errorCode
)

Writes code point c's Script_Extensions as a list of UScriptCode values to the output scripts array and returns the number of script codes.

  • If c does have Script_Extensions, then the Script property value (normally Common or Inherited) is not included.
  • If c does not have Script_Extensions, then the one Script code is written to the output array.
  • If c is not a valid code point, then the one USCRIPT_UNKNOWN code is written. In other words, if the return value is 1, then the output array contains exactly c's single Script code. If the return value is n>=2, then the output array contains c's n Script_Extensions script codes.

Some characters are commonly used in multiple scripts. For more information, see UAX #24: http://www.unicode.org/reports/tr24/.

If there are more than capacity script codes to be written, then U_BUFFER_OVERFLOW_ERROR is set and the number of Script_Extensions is returned. (Usual ICU buffer handling behavior.)

Details
Parameters
c
code point
scripts
output script code array
capacity
capacity of the scripts array
errorCode
Standard ICU error code. Its input value must pass the U_SUCCESS() test, or else the function returns immediately. Check for U_FAILURE() on output or use with function chaining. (See User Guide for details.)
Returns
number of script codes in c's Script_Extensions, or 1 for the single Script value, written to scripts unless U_BUFFER_OVERFLOW_ERROR indicates insufficient capacity

uscript_getShortName

Declared in unicode/uscript.h
U_CAPI const char *U_EXPORT2 uscript_getShortName(
  UScriptCode scriptCode
)

Returns the 4-letter ISO 15924 script code, which is the same as the short Unicode script name if Unicode has names for the script.

Returns "Mlym" given USCRIPT_MALAYALAM.

Details
Parameters
scriptCode
UScriptCode enum
Returns
short script name (4-letter code), or NULL if scriptCode is invalid

uscript_getUsage

Declared in unicode/uscript.h
U_CAPIUScriptUsage U_EXPORT2 uscript_getUsage(
  UScriptCode script
)

Returns the script usage according to UAX #31 Unicode Identifier and Pattern Syntax.

Returns USCRIPT_USAGE_NOT_ENCODED if the script is not encoded in Unicode.

See also: UScriptUsage

Details
Parameters
script
script code
Returns
script usage

uscript_hasScript

Declared in unicode/uscript.h
U_CAPIUBool U_EXPORT2 uscript_hasScript(
  UChar32 c,
  UScriptCode sc
)

Do the Script_Extensions of code point c contain script sc? If c does not have explicit Script_Extensions, then this tests whether c has the Script property value sc.

Some characters are commonly used in multiple scripts. For more information, see UAX #24: http://www.unicode.org/reports/tr24/.

Details
Parameters
c
code point
sc
script code
Returns
true if sc is in Script_Extensions(c)

uscript_isCased

Declared in unicode/uscript.h
U_CAPIUBool U_EXPORT2 uscript_isCased(
  UScriptCode script
)

Returns true if in modern (or most recent) usage of the script case distinctions are customary.

For example, Latn and Cyrl.

Details
Parameters
script
script code
Returns
true if the script is cased

uscript_isRightToLeft

Declared in unicode/uscript.h
U_CAPIUBool U_EXPORT2 uscript_isRightToLeft(
  UScriptCode script
)

Returns true if the script is written right-to-left.

For example, Arab and Hebr.

Details
Parameters
script
script code
Returns
true if the script is right-to-left

utext_char32At

Declared in unicode/utext.h
U_CAPIUChar32 U_EXPORT2 utext_char32At(
  UText *ut,
  int64_t nativeIndex
)

Returns the code point at the requested index, or U_SENTINEL (-1) if it is out of bounds.

If the specified index points to the interior of a multi-unit character - one of the trail bytes of a UTF-8 sequence, for example - the complete code point will be returned.

The iteration position will be set to the start of the returned code point.

This function is roughly equivalent to the sequence utext_setNativeIndex(index); utext_current32(); (There is a subtle difference if the index is out of bounds by being less than zero - utext_setNativeIndex(negative value) sets the index to zero, after which utext_current() will return the char at zero. utext_char32At(negative index), on the other hand, will return the U_SENTINEL value of -1.)

Details
Parameters
ut
the text to be accessed
nativeIndex
the native index of the character to be accessed. If the index points to other than the first unit of a multi-unit character, it will be adjusted to the start of the character.
Returns
the code point at the specified index.

utext_clone

Declared in unicode/utext.h
U_CAPIUText *U_EXPORT2 utext_clone(
  UText *dest,
  const UText *src,
  UBool deep,
  UBool readOnly,
  UErrorCode *status
)

Clone a UText.

This is much like opening a UText where the source text is itself another UText.

A deep clone will copy both the UText data structures and the underlying text. The original and cloned UText will operate completely independently; modifications made to the text in one will not affect the other. Text providers are not required to support deep clones. The user of clone() must check the status return and be prepared to handle failures.

The standard UText implementations for UTF8, UChar *, UnicodeString and Replaceable all support deep cloning.

The UText returned from a deep clone will be writable, assuming that the text provider is able to support writing, even if the source UText had been made non-writable by means of UText_freeze().

A shallow clone replicates only the UText data structures; it does not make a copy of the underlying text. Shallow clones can be used as an efficient way to have multiple iterators active in a single text string that is not being modified.

A shallow clone operation will not fail, barring truly exceptional conditions such as memory allocation failures.

Shallow UText clones should be avoided if the UText functions that modify the text are expected to be used, either on the original or the cloned UText. Any such modifications can cause unpredictable behavior. Read Only shallow clones provide some protection against errors of this type by disabling text modification via the cloned UText.

A shallow clone made with the readOnly parameter == false will preserve the utext_isWritable() state of the source object. Note, however, that write operations must be avoided while more than one UText exists that refer to the same underlying text.

A UText and its clone may be safely concurrently accessed by separate threads. This is true for read access only with shallow clones, and for both read and write access with deep clones. It is the responsibility of the Text Provider to ensure that this thread safety constraint is met.

Details
Parameters
dest
A UText struct to be filled in with the result of the clone operation, or NULL if the clone function should heap-allocate a new UText struct. If non-NULL, must refer to an already existing UText, which will then be reset to become the clone.
src
The UText to be cloned.
deep
true to request a deep clone, false for a shallow clone.
readOnly
true to request that the cloned UText have read only access to the underlying text.
status
Errors are returned here. For deep clones, U_UNSUPPORTED_ERROR will be returned if the text provider is unable to clone the original text.
Returns
The newly created clone, or NULL if the clone operation failed.

utext_close

Declared in unicode/utext.h
U_CAPIUText *U_EXPORT2 utext_close(
  UText *ut
)

Close function for UText instances.

Cleans up, releases any resources being held by an open UText.

If the UText was originally allocated by one of the utext_open functions, the storage associated with the utext will also be freed. If the UText storage originated with the application, as it would with a local or static instance, the storage will not be deleted.

An open UText can be reset to refer to new string by using one of the utext_open() functions without first closing the UText.

Details
Parameters
ut
The UText to be closed.
Returns
NULL if the UText struct was deleted by the close. If the UText struct was originally provided by the caller to the open function, it is returned by this function, and may be safely used again in a subsequent utext_open.

utext_current32

Declared in unicode/utext.h
U_CAPIUChar32 U_EXPORT2 utext_current32(
  UText *ut
)

Get the code point at the current iteration position, or U_SENTINEL (-1) if the iteration has reached the end of the input text.

Details
Parameters
ut
the text to be accessed.
Returns
the Unicode code point at the current iterator position.

utext_equals

Declared in unicode/utext.h
U_CAPIUBool U_EXPORT2 utext_equals(
  const UText *a,
  const UText *b
)

Compare two UText objects for equality.

UTexts are equal if they are iterating over the same text, and have the same iteration position within the text. If either or both of the parameters are NULL, the comparison is false.

Details
Parameters
a
The first of the two UTexts to compare.
b
The other UText to be compared.
Returns
true if the two UTexts are equal.

utext_extract

Declared in unicode/utext.h
U_CAPI int32_t U_EXPORT2 utext_extract(
  UText *ut,
  int64_t nativeStart,
  int64_t nativeLimit,
  UChar *dest,
  int32_t destCapacity,
  UErrorCode *status
)

Extract text from a UText into a UChar buffer.

The range of text to be extracted is specified in the native indices of the UText provider. These may not necessarily be UTF-16 indices.

The size (number of 16 bit UChars) of the data to be extracted is returned. The full number of UChars is returned, even when the extracted text is truncated because the specified buffer size is too small.

The extracted string will (if you are a user) / must (if you are a text provider) be NUL-terminated if there is sufficient space in the destination buffer. This terminating NUL is not included in the returned length.

The iteration index is left at the position following the last extracted character.

Details
Parameters
ut
the UText from which to extract data.
nativeStart
the native index of the first character to extract.\ If the specified index is out of range, it will be pinned to be within 0 <= index <= textLength
nativeLimit
the native string index of the position following the last character to extract. If the specified index is out of range, it will be pinned to be within 0 <= index <= textLength. nativeLimit must be >= nativeStart.
dest
the UChar (UTF-16) buffer into which the extracted text is placed
destCapacity
The size, in UChars, of the destination buffer. May be zero for precomputing the required size.
status
receives any error status. U_BUFFER_OVERFLOW_ERROR: the extracted text was truncated because the buffer was too small. Returns number of UChars for preflighting.
Returns
Number of UChars in the data to be extracted. Does not include a trailing NUL.

utext_getNativeIndex

Declared in unicode/utext.h
U_CAPI int64_t U_EXPORT2 utext_getNativeIndex(
  const UText *ut
)

Get the current iterator position, which can range from 0 to the length of the text.

The position is a native index into the input text, in whatever format it may have (possibly UTF-8 for example), and may not always be the same as the corresponding UChar (UTF-16) index. The returned position will always be aligned to a code point boundary.

Details
Parameters
ut
the text to be accessed.
Returns
the current index position, in the native units of the text provider.

utext_getPreviousNativeIndex

Declared in unicode/utext.h
U_CAPI int64_t U_EXPORT2 utext_getPreviousNativeIndex(
  UText *ut
)

Get the native index of the character preceding the current position.

If the iteration position is already at the start of the text, zero is returned. The value returned is the same as that obtained from the following sequence, but without the side effect of changing the iteration position.

UText  *ut = whatever;
  ...
utext_previous(ut)
utext_getNativeIndex(ut);

This function is most useful during forwards iteration, where it will get the native index of the character most recently returned from utext_next().

Details
Parameters
ut
the text to be accessed
Returns
the native index of the character preceding the current index position, or zero if the current position is at the start of the text.

utext_moveIndex32

Declared in unicode/utext.h
U_CAPIUBool U_EXPORT2 utext_moveIndex32(
  UText *ut,
  int32_t delta
)

Move the iterator position by delta code points.

The number of code points is a signed number; a negative delta will move the iterator backwards, towards the start of the text.

The index is moved by delta code points forward or backward, but no further backward than to 0 and no further forward than to utext_nativeLength(). The resulting index value will be in between 0 and length, inclusive.

Details
Parameters
ut
the text to be accessed.
delta
the signed number of code points to move the iteration position.
Returns
true if the position could be moved the requested number of positions while staying within the range [0 - text length].

utext_nativeLength

Declared in unicode/utext.h
U_CAPI int64_t U_EXPORT2 utext_nativeLength(
  UText *ut
)

Get the length of the text.

Depending on the characteristics of the underlying text representation, this may be expensive. See also:utext_isLengthExpensive()

Details
Parameters
ut
the text to be accessed.
Returns
the length of the text, expressed in native units.

utext_next32

Declared in unicode/utext.h
U_CAPIUChar32 U_EXPORT2 utext_next32(
  UText *ut
)

Get the code point at the current iteration position of the UText, and advance the position to the first index following the character.

If the position is at the end of the text (the index following the last character, which is also the length of the text), return U_SENTINEL (-1) and do not advance the index.

This is a post-increment operation.

An inline macro version of this function, UTEXT_NEXT32(), is available for performance critical use.

See also:UTEXT_NEXT32

Details
Parameters
ut
the text to be accessed.
Returns
the Unicode code point at the iteration position.

utext_next32From

Declared in unicode/utext.h
U_CAPIUChar32 U_EXPORT2 utext_next32From(
  UText *ut,
  int64_t nativeIndex
)

Set the iteration index and return the code point at that index.

Leave the iteration index at the start of the following code point.

This function is the most efficient and convenient way to begin a forward iteration. The results are identical to the those from the sequence

utext_setIndex();
utext_next32();

Details
Parameters
ut
the text to be accessed.
nativeIndex
Iteration index, in the native units of the text provider.
Returns
Code point which starts at or before index, or U_SENTINEL (-1) if it is out of bounds.

utext_openUChars

Declared in unicode/utext.h
U_CAPIUText *U_EXPORT2 utext_openUChars(
  UText *ut,
  const UChar *s,
  int64_t length,
  UErrorCode *status
)

Open a read-only UText for UChar * string.

Details
Parameters
ut
Pointer to a UText struct. If NULL, a new UText will be created. If non-NULL, must refer to an initialized UText struct, which will then be reset to reference the specified UChar string.
s
A UChar (UTF-16) string
length
The number of UChars in the input string, or -1 if the string is zero terminated.
status
Errors are returned here.
Returns
A pointer to the UText. If a pre-allocated UText was provided, it will always be used and returned.

utext_openUTF8

Declared in unicode/utext.h
U_CAPIUText *U_EXPORT2 utext_openUTF8(
  UText *ut,
  const char *s,
  int64_t length,
  UErrorCode *status
)

Open a read-only UText implementation for UTF-8 strings.

Details
Parameters
ut
Pointer to a UText struct. If NULL, a new UText will be created. If non-NULL, must refer to an initialized UText struct, which will then be reset to reference the specified UTF-8 string.
s
A UTF-8 string. Must not be NULL.
length
The length of the UTF-8 string in bytes, or -1 if the string is zero terminated.
status
Errors are returned here.
Returns
A pointer to the UText. If a pre-allocated UText was provided, it will always be used and returned.

utext_previous32

Declared in unicode/utext.h
U_CAPIUChar32 U_EXPORT2 utext_previous32(
  UText *ut
)

Move the iterator position to the character (code point) whose index precedes the current position, and return that character.

This is a pre-decrement operation.

If the initial position is at the start of the text (index of 0) return U_SENTINEL (-1), and leave the position unchanged.

An inline macro version of this function, UTEXT_PREVIOUS32(), is available for performance critical use.

See also:UTEXT_PREVIOUS32

Details
Parameters
ut
the text to be accessed.
Returns
the previous UChar32 code point, or U_SENTINEL (-1) if the iteration has reached the start of the text.

utext_previous32From

Declared in unicode/utext.h
U_CAPIUChar32 U_EXPORT2 utext_previous32From(
  UText *ut,
  int64_t nativeIndex
)

Set the iteration index, and return the code point preceding the one specified by the initial index.

Leave the iteration position at the start of the returned code point.

This function is the most efficient and convenient way to begin a backwards iteration.

Details
Parameters
ut
the text to be accessed.
nativeIndex
Iteration index in the native units of the text provider.
Returns
Code point preceding the one at the initial index, or U_SENTINEL (-1) if it is out of bounds.

utext_setNativeIndex

Declared in unicode/utext.h
U_CAPI void U_EXPORT2 utext_setNativeIndex(
  UText *ut,
  int64_t nativeIndex
)

Set the current iteration position to the nearest code point boundary at or preceding the specified index.

The index is in the native units of the original input text. If the index is out of range, it will be pinned to be within the range of the input text.

It will usually be more efficient to begin an iteration using the functions utext_next32From() or utext_previous32From() rather than setIndex().

Moving the index position to an adjacent character is best done with utext_next32(), utext_previous32() or utext_moveIndex32(). Attempting to do direct arithmetic on the index position is complicated by the fact that the size (in native units) of a character depends on the underlying representation of the character (UTF-8, UTF-16, UTF-32, arbitrary codepage), and is not easily knowable.

Details
Parameters
ut
the text to be accessed.
nativeIndex
the native unit index of the new iteration position.

utrans_clone

Declared in unicode/utrans.h
U_CAPIUTransliterator *U_EXPORT2 utrans_clone(
  const UTransliterator *trans,
  UErrorCode *status
)

Create a copy of a transliterator.

Any non-NULL result from this function should later be closed with utrans_close().

Details
Parameters
trans
the transliterator to be copied.
status
a pointer to the UErrorCode
Returns
a transliterator pointer that may be passed to other utrans_xxx() functions, or NULL if the clone call fails.

utrans_close

Declared in unicode/utrans.h
U_CAPI void U_EXPORT2 utrans_close(
  UTransliterator *trans
)

Close a transliterator.

Any non-NULL pointer returned by utrans_openXxx() or utrans_clone() should eventually be closed.

Details
Parameters
trans
the transliterator to be closed.

utrans_openIDs

Declared in unicode/utrans.h
U_CAPIUEnumeration *U_EXPORT2 utrans_openIDs(
  UErrorCode *pErrorCode
)

Return a UEnumeration for the available transliterators.

Details
Parameters
pErrorCode
Pointer to the UErrorCode in/out parameter.
Returns
UEnumeration for the available transliterators. Close with uenum_close().

utrans_openInverse

Declared in unicode/utrans.h
U_CAPIUTransliterator *U_EXPORT2 utrans_openInverse(
  const UTransliterator *trans,
  UErrorCode *status
)

Open an inverse of an existing transliterator.

For this to work, the inverse must be registered with the system. For example, if the Transliterator "A-B" is opened, and then its inverse is opened, the result is the Transliterator "B-A", if such a transliterator is registered with the system. Otherwise the result is NULL and a failing UErrorCode is set. Any non-NULL result from this function should later be closed with utrans_close().

Details
Parameters
trans
the transliterator to open the inverse of.
status
a pointer to the UErrorCode
Returns
a pointer to a newly-opened transliterator that is the inverse of trans, or NULL if the open call fails.

utrans_openU

Declared in unicode/utrans.h
U_CAPIUTransliterator *U_EXPORT2 utrans_openU(
  const UChar *id,
  int32_t idLength,
  UTransDirection dir,
  const UChar *rules,
  int32_t rulesLength,
  UParseError *parseError,
  UErrorCode *pErrorCode
)

Open a custom transliterator, given a custom rules string OR a system transliterator, given its ID.

Any non-NULL result from this function should later be closed with utrans_close().

Details
Parameters
id
a valid transliterator ID
idLength
the length of the ID string, or -1 if NUL-terminated
dir
the desired direction
rules
the transliterator rules. See the C++ header rbt.h for rules syntax. If NULL then a system transliterator matching the ID is returned.
rulesLength
the length of the rules, or -1 if the rules are NUL-terminated.
parseError
a pointer to a UParseError struct to receive the details of any parsing errors. This parameter may be NULL if no parsing error details are desired.
pErrorCode
a pointer to the UErrorCode
Returns
a transliterator pointer that may be passed to other utrans_xxx() functions, or NULL if the open call fails.

utrans_setFilter

Declared in unicode/utrans.h
U_CAPI void U_EXPORT2 utrans_setFilter(
  UTransliterator *trans,
  const UChar *filterPattern,
  int32_t filterPatternLen,
  UErrorCode *status
)

Set the filter used by a transliterator.

A filter can be used to make the transliterator pass certain characters through untouched. The filter is expressed using a UnicodeSet pattern. If the filterPattern is NULL or the empty string, then the transliterator will be reset to use no filter.

See also:UnicodeSet

Details
Parameters
trans
the transliterator
filterPattern
a pattern string, in the form accepted by UnicodeSet, specifying which characters to apply the transliteration to. May be NULL or the empty string to indicate no filter.
filterPatternLen
the length of filterPattern, or -1 if filterPattern is zero-terminated
status
a pointer to the UErrorCode

utrans_toRules

Declared in unicode/utrans.h
U_CAPI int32_t U_EXPORT2 utrans_toRules(
  const UTransliterator *trans,
  UBool escapeUnprintable,
  UChar *result,
  int32_t resultLength,
  UErrorCode *status
)

Create a rule string that can be passed to utrans_openU to recreate this transliterator.

Details
Parameters
trans
The transliterator
escapeUnprintable
if true then convert unprintable characters to their hex escape representations, \uxxxx or \Uxxxxxxxx. Unprintable characters are those other than U+000A, U+0020..U+007E.
result
A pointer to a buffer to receive the rules.
resultLength
The maximum size of result.
status
A pointer to the UErrorCode. In case of error status, the contents of result are undefined.
Returns
int32_t The length of the rule string (may be greater than resultLength, in which case an error is returned).

utrans_trans

Declared in unicode/utrans.h
U_CAPI void U_EXPORT2 utrans_trans(
  const UTransliterator *trans,
  UReplaceable *rep,
  const UReplaceableCallbacks *repFunc,
  int32_t start,
  int32_t *limit,
  UErrorCode *status
)

Transliterate a segment of a UReplaceable string.

The string is passed in as a UReplaceable pointer rep and a UReplaceableCallbacks function pointer struct repFunc. Functions in the repFunc struct will be called in order to modify the rep string.

Details
Parameters
trans
the transliterator
rep
a pointer to the string. This will be passed to the repFunc functions.
repFunc
a set of function pointers that will be used to modify the string pointed to by rep.
start
the beginning index, inclusive; 0 <= start <= limit.
limit
pointer to the ending index, exclusive; start <= limit <= repFunc->length(rep). Upon return, *limit will contain the new limit index. The text previously occupying [start, limit) has been transliterated, possibly to a string of a different length, at [start, new-limit), where new-limit is the return value.
status
a pointer to the UErrorCode

utrans_transIncremental

Declared in unicode/utrans.h
U_CAPI void U_EXPORT2 utrans_transIncremental(
  const UTransliterator *trans,
  UReplaceable *rep,
  const UReplaceableCallbacks *repFunc,
  UTransPosition *pos,
  UErrorCode *status
)

Transliterate the portion of the UReplaceable text buffer that can be transliterated unambiguously.

This method is typically called after new text has been inserted, e.g. as a result of a keyboard event. The transliterator will try to transliterate characters of rep between index.cursor and index.limit. Characters before index.cursor will not be changed.

Upon return, values in index will be updated. index.start will be advanced to the first character that future calls to this method will read. index.cursor and index.limit will be adjusted to delimit the range of text that future calls to this method may change.

Typical usage of this method begins with an initial call with index.start and index.limit set to indicate the portion of text to be transliterated, and index.cursor == index.start. Thereafter, index can be used without modification in future calls, provided that all changes to text are made via this method.

This method assumes that future calls may be made that will insert new text into the buffer. As a result, it only performs unambiguous transliterations. After the last call to this method, there may be untransliterated text that is waiting for more input to resolve an ambiguity. In order to perform these pending transliterations, clients should call utrans_trans() with a start of index.start and a limit of index.end after the last call to this method has been made.

Details
Parameters
trans
the transliterator
rep
a pointer to the string. This will be passed to the repFunc functions.
repFunc
a set of function pointers that will be used to modify the string pointed to by rep.
pos
a struct containing the start and limit indices of the text to be read and the text to be transliterated
status
a pointer to the UErrorCode

utrans_transIncrementalUChars

Declared in unicode/utrans.h
U_CAPI void U_EXPORT2 utrans_transIncrementalUChars(
  const UTransliterator *trans,
  UChar *text,
  int32_t *textLength,
  int32_t textCapacity,
  UTransPosition *pos,
  UErrorCode *status
)

Transliterate the portion of the UChar* text buffer that can be transliterated unambiguously.

See utrans_transIncremental(). The string is passed in in a UChar* buffer. The string is modified in place. If the result is longer than textCapacity, it is truncated. The actual length of the result is returned in *textLength, if textLength is non-NULL. *textLength may be greater than textCapacity, but only textCapacity UChars will be written to *text, including the zero terminator. See utrans_transIncremental() for usage details.

See also: utrans_transIncremental

Details
Parameters
trans
the transliterator
text
a pointer to a buffer containing the text to be transliterated on input and the result text on output.
textLength
a pointer to the length of the string in text. If the length is -1 then the string is assumed to be zero-terminated. Upon return, the new length is stored in *textLength. If textLength is NULL then the string is assumed to be zero-terminated.
textCapacity
the length of the text buffer
pos
a struct containing the start and limit indices of the text to be read and the text to be transliterated
status
a pointer to the UErrorCode

utrans_transUChars

Declared in unicode/utrans.h
U_CAPI void U_EXPORT2 utrans_transUChars(
  const UTransliterator *trans,
  UChar *text,
  int32_t *textLength,
  int32_t textCapacity,
  int32_t start,
  int32_t *limit,
  UErrorCode *status
)

Transliterate a segment of a UChar* string.

The string is passed in in a UChar* buffer. The string is modified in place. If the result is longer than textCapacity, it is truncated. The actual length of the result is returned in *textLength, if textLength is non-NULL. *textLength may be greater than textCapacity, but only textCapacity UChars will be written to *text, including the zero terminator.

Details
Parameters
trans
the transliterator
text
a pointer to a buffer containing the text to be transliterated on input and the result text on output.
textLength
a pointer to the length of the string in text. If the length is -1 then the string is assumed to be zero-terminated. Upon return, the new length is stored in *textLength. If textLength is NULL then the string is assumed to be zero-terminated.
textCapacity
the length of the text buffer
start
the beginning index, inclusive; 0 <= start <= limit.
limit
pointer to the ending index, exclusive; start <= limit <= repFunc->length(rep). Upon return, *limit will contain the new limit index. The text previously occupying [start, limit) has been transliterated, possibly to a string of a different length, at [start, new-limit), where new-limit is the return value.
status
a pointer to the UErrorCode