ICU4C

#include <parseerr.h>
#include <ptypes.h>
#include <putil.h>
#include <stringoptions.h>
#include <ubrk.h>
#include <uchar.h>
#include <ucol.h>
#include <ucpmap.h>
#include <udisplaycontext.h>
#include <uenum.h>
#include <uldnames.h>
#include <uloc.h>
#include <ulocdata.h>
#include <umachine.h>
#include <unorm2.h>
#include <urep.h>
#include <uscript.h>
#include <ustring.h>
#include <utext.h>
#include <utf.h>
#include <utf16.h>
#include <utf8.h>
#include <utrans.h>
#include <utypes.h>
#include <uversion.h>

Summary

Enumerations
`Anonymous Enum 124{ U_PARSE_CONTEXT_LEN = 16 }`	enum The capacity of the context strings in UParseError.
`UAcceptResult{ ULOC_ACCEPT_FAILED = 0, ULOC_ACCEPT_VALID = 1, ULOC_ACCEPT_FALLBACK = 2 }`	enum Output values which uloc_acceptLanguage() writes to the 'outResult' parameter.
`UBidiPairedBracketType{ U_BPT_NONE, U_BPT_OPEN, U_BPT_CLOSE, U_BPT_COUNT }`	enum Bidi Paired Bracket Type constants.
UBlockCode{ UBLOCK_NO_BLOCK = 0, UBLOCK_BASIC_LATIN = 1, UBLOCK_LATIN_1_SUPPLEMENT =2, UBLOCK_LATIN_EXTENDED_A =3, UBLOCK_LATIN_EXTENDED_B =4, UBLOCK_IPA_EXTENSIONS =5, UBLOCK_SPACING_MODIFIER_LETTERS =6, UBLOCK_COMBINING_DIACRITICAL_MARKS =7, UBLOCK_GREEK =8, UBLOCK_CYRILLIC =9, UBLOCK_ARMENIAN =10, UBLOCK_HEBREW =11, UBLOCK_ARABIC =12, UBLOCK_SYRIAC =13, UBLOCK_THAANA =14, UBLOCK_DEVANAGARI =15, UBLOCK_BENGALI =16, UBLOCK_GURMUKHI =17, UBLOCK_GUJARATI =18, UBLOCK_ORIYA =19, UBLOCK_TAMIL =20, UBLOCK_TELUGU =21, UBLOCK_KANNADA =22, UBLOCK_MALAYALAM =23, UBLOCK_SINHALA =24, UBLOCK_THAI =25, UBLOCK_LAO =26, UBLOCK_TIBETAN =27, UBLOCK_MYANMAR =28, UBLOCK_GEORGIAN =29, UBLOCK_HANGUL_JAMO =30, UBLOCK_ETHIOPIC =31, UBLOCK_CHEROKEE =32, UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =33, UBLOCK_OGHAM =34, UBLOCK_RUNIC =35, UBLOCK_KHMER =36, UBLOCK_MONGOLIAN =37, UBLOCK_LATIN_EXTENDED_ADDITIONAL =38, UBLOCK_GREEK_EXTENDED =39, UBLOCK_GENERAL_PUNCTUATION =40, UBLOCK_SUPERSCRIPTS_AND_SUBSCRIPTS =41, UBLOCK_CURRENCY_SYMBOLS =42, UBLOCK_COMBINING_MARKS_FOR_SYMBOLS =43, UBLOCK_LETTERLIKE_SYMBOLS =44, UBLOCK_NUMBER_FORMS =45, UBLOCK_ARROWS =46, UBLOCK_MATHEMATICAL_OPERATORS =47, UBLOCK_MISCELLANEOUS_TECHNICAL =48, UBLOCK_CONTROL_PICTURES =49, UBLOCK_OPTICAL_CHARACTER_RECOGNITION =50, UBLOCK_ENCLOSED_ALPHANUMERICS =51, UBLOCK_BOX_DRAWING =52, UBLOCK_BLOCK_ELEMENTS =53, UBLOCK_GEOMETRIC_SHAPES =54, UBLOCK_MISCELLANEOUS_SYMBOLS =55, UBLOCK_DINGBATS =56, UBLOCK_BRAILLE_PATTERNS =57, UBLOCK_CJK_RADICALS_SUPPLEMENT =58, UBLOCK_KANGXI_RADICALS =59, UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS =60, UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION =61, UBLOCK_HIRAGANA =62, UBLOCK_KATAKANA =63, UBLOCK_BOPOMOFO =64, UBLOCK_HANGUL_COMPATIBILITY_JAMO =65, UBLOCK_KANBUN =66, UBLOCK_BOPOMOFO_EXTENDED =67, UBLOCK_ENCLOSED_CJK_LETTERS_AND_MONTHS =68, UBLOCK_CJK_COMPATIBILITY =69, UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =70, UBLOCK_CJK_UNIFIED_IDEOGRAPHS =71, UBLOCK_YI_SYLLABLES =72, UBLOCK_YI_RADICALS =73, UBLOCK_HANGUL_SYLLABLES =74, UBLOCK_HIGH_SURROGATES =75, UBLOCK_HIGH_PRIVATE_USE_SURROGATES =76, UBLOCK_LOW_SURROGATES =77, UBLOCK_PRIVATE_USE_AREA =78, UBLOCK_PRIVATE_USE = UBLOCK_PRIVATE_USE_AREA, UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS =79, UBLOCK_ALPHABETIC_PRESENTATION_FORMS =80, UBLOCK_ARABIC_PRESENTATION_FORMS_A =81, UBLOCK_COMBINING_HALF_MARKS =82, UBLOCK_CJK_COMPATIBILITY_FORMS =83, UBLOCK_SMALL_FORM_VARIANTS =84, UBLOCK_ARABIC_PRESENTATION_FORMS_B =85, UBLOCK_SPECIALS =86, UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS =87, UBLOCK_OLD_ITALIC = 88, UBLOCK_GOTHIC = 89, UBLOCK_DESERET = 90, UBLOCK_BYZANTINE_MUSICAL_SYMBOLS = 91, UBLOCK_MUSICAL_SYMBOLS = 92, UBLOCK_MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 93, UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 94, UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 95, UBLOCK_TAGS = 96, UBLOCK_CYRILLIC_SUPPLEMENT = 97, UBLOCK_CYRILLIC_SUPPLEMENTARY = UBLOCK_CYRILLIC_SUPPLEMENT, UBLOCK_TAGALOG = 98, UBLOCK_HANUNOO = 99, UBLOCK_BUHID = 100, UBLOCK_TAGBANWA = 101, UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = 102, UBLOCK_SUPPLEMENTAL_ARROWS_A = 103, UBLOCK_SUPPLEMENTAL_ARROWS_B = 104, UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = 105, UBLOCK_SUPPLEMENTAL_MATHEMATICAL_OPERATORS = 106, UBLOCK_KATAKANA_PHONETIC_EXTENSIONS = 107, UBLOCK_VARIATION_SELECTORS = 108, UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_A = 109, UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B = 110, UBLOCK_LIMBU = 111, UBLOCK_TAI_LE = 112, UBLOCK_KHMER_SYMBOLS = 113, UBLOCK_PHONETIC_EXTENSIONS = 114, UBLOCK_MISCELLANEOUS_SYMBOLS_AND_ARROWS = 115, UBLOCK_YIJING_HEXAGRAM_SYMBOLS = 116, UBLOCK_LINEAR_B_SYLLABARY = 117, UBLOCK_LINEAR_B_IDEOGRAMS = 118, UBLOCK_AEGEAN_NUMBERS = 119, UBLOCK_UGARITIC = 120, UBLOCK_SHAVIAN = 121, UBLOCK_OSMANYA = 122, UBLOCK_CYPRIOT_SYLLABARY = 123, UBLOCK_TAI_XUAN_JING_SYMBOLS = 124, UBLOCK_VARIATION_SELECTORS_SUPPLEMENT = 125, UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION = 126, UBLOCK_ANCIENT_GREEK_NUMBERS = 127, UBLOCK_ARABIC_SUPPLEMENT = 128, UBLOCK_BUGINESE = 129, UBLOCK_CJK_STROKES = 130, UBLOCK_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 131, UBLOCK_COPTIC = 132, UBLOCK_ETHIOPIC_EXTENDED = 133, UBLOCK_ETHIOPIC_SUPPLEMENT = 134, UBLOCK_GEORGIAN_SUPPLEMENT = 135, UBLOCK_GLAGOLITIC = 136, UBLOCK_KHAROSHTHI = 137, UBLOCK_MODIFIER_TONE_LETTERS = 138, UBLOCK_NEW_TAI_LUE = 139, UBLOCK_OLD_PERSIAN = 140, UBLOCK_PHONETIC_EXTENSIONS_SUPPLEMENT = 141, UBLOCK_SUPPLEMENTAL_PUNCTUATION = 142, UBLOCK_SYLOTI_NAGRI = 143, UBLOCK_TIFINAGH = 144, UBLOCK_VERTICAL_FORMS = 145, UBLOCK_NKO = 146, UBLOCK_BALINESE = 147, UBLOCK_LATIN_EXTENDED_C = 148, UBLOCK_LATIN_EXTENDED_D = 149, UBLOCK_PHAGS_PA = 150, UBLOCK_PHOENICIAN = 151, UBLOCK_CUNEIFORM = 152, UBLOCK_CUNEIFORM_NUMBERS_AND_PUNCTUATION = 153, UBLOCK_COUNTING_ROD_NUMERALS = 154, UBLOCK_SUNDANESE = 155, UBLOCK_LEPCHA = 156, UBLOCK_OL_CHIKI = 157, UBLOCK_CYRILLIC_EXTENDED_A = 158, UBLOCK_VAI = 159, UBLOCK_CYRILLIC_EXTENDED_B = 160, UBLOCK_SAURASHTRA = 161, UBLOCK_KAYAH_LI = 162, UBLOCK_REJANG = 163, UBLOCK_CHAM = 164, UBLOCK_ANCIENT_SYMBOLS = 165, UBLOCK_PHAISTOS_DISC = 166, UBLOCK_LYCIAN = 167, UBLOCK_CARIAN = 168, UBLOCK_LYDIAN = 169, UBLOCK_MAHJONG_TILES = 170, UBLOCK_DOMINO_TILES = 171, UBLOCK_SAMARITAN = 172, UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 173, UBLOCK_TAI_THAM = 174, UBLOCK_VEDIC_EXTENSIONS = 175, UBLOCK_LISU = 176, UBLOCK_BAMUM = 177, UBLOCK_COMMON_INDIC_NUMBER_FORMS = 178, UBLOCK_DEVANAGARI_EXTENDED = 179, UBLOCK_HANGUL_JAMO_EXTENDED_A = 180, UBLOCK_JAVANESE = 181, UBLOCK_MYANMAR_EXTENDED_A = 182, UBLOCK_TAI_VIET = 183, UBLOCK_MEETEI_MAYEK = 184, UBLOCK_HANGUL_JAMO_EXTENDED_B = 185, UBLOCK_IMPERIAL_ARAMAIC = 186, UBLOCK_OLD_SOUTH_ARABIAN = 187, UBLOCK_AVESTAN = 188, UBLOCK_INSCRIPTIONAL_PARTHIAN = 189, UBLOCK_INSCRIPTIONAL_PAHLAVI = 190, UBLOCK_OLD_TURKIC = 191, UBLOCK_RUMI_NUMERAL_SYMBOLS = 192, UBLOCK_KAITHI = 193, UBLOCK_EGYPTIAN_HIEROGLYPHS = 194, UBLOCK_ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 195, UBLOCK_ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 196, UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 197, UBLOCK_MANDAIC = 198, UBLOCK_BATAK = 199, UBLOCK_ETHIOPIC_EXTENDED_A = 200, UBLOCK_BRAHMI = 201, UBLOCK_BAMUM_SUPPLEMENT = 202, UBLOCK_KANA_SUPPLEMENT = 203, UBLOCK_PLAYING_CARDS = 204, UBLOCK_MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 205, UBLOCK_EMOTICONS = 206, UBLOCK_TRANSPORT_AND_MAP_SYMBOLS = 207, UBLOCK_ALCHEMICAL_SYMBOLS = 208, UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 209, UBLOCK_ARABIC_EXTENDED_A = 210, UBLOCK_ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = 211, UBLOCK_CHAKMA = 212, UBLOCK_MEETEI_MAYEK_EXTENSIONS = 213, UBLOCK_MEROITIC_CURSIVE = 214, UBLOCK_MEROITIC_HIEROGLYPHS = 215, UBLOCK_MIAO = 216, UBLOCK_SHARADA = 217, UBLOCK_SORA_SOMPENG = 218, UBLOCK_SUNDANESE_SUPPLEMENT = 219, UBLOCK_TAKRI = 220, UBLOCK_BASSA_VAH = 221, UBLOCK_CAUCASIAN_ALBANIAN = 222, UBLOCK_COPTIC_EPACT_NUMBERS = 223, UBLOCK_COMBINING_DIACRITICAL_MARKS_EXTENDED = 224, UBLOCK_DUPLOYAN = 225, UBLOCK_ELBASAN = 226, UBLOCK_GEOMETRIC_SHAPES_EXTENDED = 227, UBLOCK_GRANTHA = 228, UBLOCK_KHOJKI = 229, UBLOCK_KHUDAWADI = 230, UBLOCK_LATIN_EXTENDED_E = 231, UBLOCK_LINEAR_A = 232, UBLOCK_MAHAJANI = 233, UBLOCK_MANICHAEAN = 234, UBLOCK_MENDE_KIKAKUI = 235, UBLOCK_MODI = 236, UBLOCK_MRO = 237, UBLOCK_MYANMAR_EXTENDED_B = 238, UBLOCK_NABATAEAN = 239, UBLOCK_OLD_NORTH_ARABIAN = 240, UBLOCK_OLD_PERMIC = 241, UBLOCK_ORNAMENTAL_DINGBATS = 242, UBLOCK_PAHAWH_HMONG = 243, UBLOCK_PALMYRENE = 244, UBLOCK_PAU_CIN_HAU = 245, UBLOCK_PSALTER_PAHLAVI = 246, UBLOCK_SHORTHAND_FORMAT_CONTROLS = 247, UBLOCK_SIDDHAM = 248, UBLOCK_SINHALA_ARCHAIC_NUMBERS = 249, UBLOCK_SUPPLEMENTAL_ARROWS_C = 250, UBLOCK_TIRHUTA = 251, UBLOCK_WARANG_CITI = 252, UBLOCK_AHOM = 253, UBLOCK_ANATOLIAN_HIEROGLYPHS = 254, UBLOCK_CHEROKEE_SUPPLEMENT = 255, UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E = 256, UBLOCK_EARLY_DYNASTIC_CUNEIFORM = 257, UBLOCK_HATRAN = 258, UBLOCK_MULTANI = 259, UBLOCK_OLD_HUNGARIAN = 260, UBLOCK_SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS = 261, UBLOCK_SUTTON_SIGNWRITING = 262, UBLOCK_ADLAM = 263, UBLOCK_BHAIKSUKI = 264, UBLOCK_CYRILLIC_EXTENDED_C = 265, UBLOCK_GLAGOLITIC_SUPPLEMENT = 266, UBLOCK_IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION = 267, UBLOCK_MARCHEN = 268, UBLOCK_MONGOLIAN_SUPPLEMENT = 269, UBLOCK_NEWA = 270, UBLOCK_OSAGE = 271, UBLOCK_TANGUT = 272, UBLOCK_TANGUT_COMPONENTS = 273, UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F = 274, UBLOCK_KANA_EXTENDED_A = 275, UBLOCK_MASARAM_GONDI = 276, UBLOCK_NUSHU = 277, UBLOCK_SOYOMBO = 278, UBLOCK_SYRIAC_SUPPLEMENT = 279, UBLOCK_ZANABAZAR_SQUARE = 280, UBLOCK_CHESS_SYMBOLS = 281, UBLOCK_DOGRA = 282, UBLOCK_GEORGIAN_EXTENDED = 283, UBLOCK_GUNJALA_GONDI = 284, UBLOCK_HANIFI_ROHINGYA = 285, UBLOCK_INDIC_SIYAQ_NUMBERS = 286, UBLOCK_MAKASAR = 287, UBLOCK_MAYAN_NUMERALS = 288, UBLOCK_MEDEFAIDRIN = 289, UBLOCK_OLD_SOGDIAN = 290, UBLOCK_SOGDIAN = 291, UBLOCK_EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS = 292, UBLOCK_ELYMAIC = 293, UBLOCK_NANDINAGARI = 294, UBLOCK_NYIAKENG_PUACHUE_HMONG = 295, UBLOCK_OTTOMAN_SIYAQ_NUMBERS = 296, UBLOCK_SMALL_KANA_EXTENSION = 297, UBLOCK_SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A = 298, UBLOCK_TAMIL_SUPPLEMENT = 299, UBLOCK_WANCHO = 300, UBLOCK_CHORASMIAN = 301, UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G = 302, UBLOCK_DIVES_AKURU = 303, UBLOCK_KHITAN_SMALL_SCRIPT = 304, UBLOCK_LISU_SUPPLEMENT = 305, UBLOCK_SYMBOLS_FOR_LEGACY_COMPUTING = 306, UBLOCK_TANGUT_SUPPLEMENT = 307, UBLOCK_YEZIDI = 308, UBLOCK_ARABIC_EXTENDED_B = 309, UBLOCK_CYPRO_MINOAN = 310, UBLOCK_ETHIOPIC_EXTENDED_B = 311, UBLOCK_KANA_EXTENDED_B = 312, UBLOCK_LATIN_EXTENDED_F = 313, UBLOCK_LATIN_EXTENDED_G = 314, UBLOCK_OLD_UYGHUR = 315, UBLOCK_TANGSA = 316, UBLOCK_TOTO = 317, UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A = 318, UBLOCK_VITHKUQI = 319, UBLOCK_ZNAMENNY_MUSICAL_NOTATION = 320, UBLOCK_ARABIC_EXTENDED_C = 321, UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H = 322, UBLOCK_CYRILLIC_EXTENDED_D = 323, UBLOCK_DEVANAGARI_EXTENDED_A = 324, UBLOCK_KAKTOVIK_NUMERALS = 325, UBLOCK_KAWI = 326, UBLOCK_NAG_MUNDARI = 327, UBLOCK_COUNT = 328, UBLOCK_INVALID_CODE =-1 }	enum Constants for Unicode blocks, see the Unicode Data file Blocks.txt.
`UBreakIteratorType{ UBRK_CHARACTER = 0, UBRK_WORD = 1, UBRK_LINE = 2, UBRK_SENTENCE = 3, UBRK_TITLE = 4, UBRK_COUNT = 5 }`	enum The possible types of text boundaries.
`UCPMapRangeOption{ UCPMAP_RANGE_NORMAL, UCPMAP_RANGE_FIXED_LEAD_SURROGATES, UCPMAP_RANGE_FIXED_ALL_SURROGATES }`	enum Selectors for how ucpmap_getRange() etc.
UCharCategory{ U_UNASSIGNED = 0, U_GENERAL_OTHER_TYPES = 0, U_UPPERCASE_LETTER = 1, U_LOWERCASE_LETTER = 2, U_TITLECASE_LETTER = 3, U_MODIFIER_LETTER = 4, U_OTHER_LETTER = 5, U_NON_SPACING_MARK = 6, U_ENCLOSING_MARK = 7, U_COMBINING_SPACING_MARK = 8, U_DECIMAL_DIGIT_NUMBER = 9, U_LETTER_NUMBER = 10, U_OTHER_NUMBER = 11, U_SPACE_SEPARATOR = 12, U_LINE_SEPARATOR = 13, U_PARAGRAPH_SEPARATOR = 14, U_CONTROL_CHAR = 15, U_FORMAT_CHAR = 16, U_PRIVATE_USE_CHAR = 17, U_SURROGATE = 18, U_DASH_PUNCTUATION = 19, U_START_PUNCTUATION = 20, U_END_PUNCTUATION = 21, U_CONNECTOR_PUNCTUATION = 22, U_OTHER_PUNCTUATION = 23, U_MATH_SYMBOL = 24, U_CURRENCY_SYMBOL = 25, U_MODIFIER_SYMBOL = 26, U_OTHER_SYMBOL = 27, U_INITIAL_PUNCTUATION = 28, U_FINAL_PUNCTUATION = 29, U_CHAR_CATEGORY_COUNT }	enum Data for enumerated Unicode general category types.
UCharDirection{ U_LEFT_TO_RIGHT = 0, U_RIGHT_TO_LEFT = 1, U_EUROPEAN_NUMBER = 2, U_EUROPEAN_NUMBER_SEPARATOR = 3, U_EUROPEAN_NUMBER_TERMINATOR = 4, U_ARABIC_NUMBER = 5, U_COMMON_NUMBER_SEPARATOR = 6, U_BLOCK_SEPARATOR = 7, U_SEGMENT_SEPARATOR = 8, U_WHITE_SPACE_NEUTRAL = 9, U_OTHER_NEUTRAL = 10, U_LEFT_TO_RIGHT_EMBEDDING = 11, U_LEFT_TO_RIGHT_OVERRIDE = 12, U_RIGHT_TO_LEFT_ARABIC = 13, U_RIGHT_TO_LEFT_EMBEDDING = 14, U_RIGHT_TO_LEFT_OVERRIDE = 15, U_POP_DIRECTIONAL_FORMAT = 16, U_DIR_NON_SPACING_MARK = 17, U_BOUNDARY_NEUTRAL = 18, U_FIRST_STRONG_ISOLATE = 19, U_LEFT_TO_RIGHT_ISOLATE = 20, U_RIGHT_TO_LEFT_ISOLATE = 21, U_POP_DIRECTIONAL_ISOLATE = 22, U_CHAR_DIRECTION_COUNT }	enum This specifies the language directional property of a character set.
`UCharNameChoice{ U_UNICODE_CHAR_NAME, U_UNICODE_10_CHAR_NAME, U_EXTENDED_CHAR_NAME = U_UNICODE_CHAR_NAME+2, U_CHAR_NAME_ALIAS, U_CHAR_NAME_CHOICE_COUNT }`	enum Selector constants for u_charName().
`UColAttribute{ UCOL_FRENCH_COLLATION, UCOL_ALTERNATE_HANDLING, UCOL_CASE_FIRST, UCOL_CASE_LEVEL, UCOL_NORMALIZATION_MODE, UCOL_DECOMPOSITION_MODE = UCOL_NORMALIZATION_MODE, UCOL_STRENGTH, UCOL_HIRAGANA_QUATERNARY_MODE = UCOL_STRENGTH + 1, UCOL_NUMERIC_COLLATION = UCOL_STRENGTH + 2, UCOL_ATTRIBUTE_COUNT }`	enum Attributes that collation service understands.
`UColAttributeValue{ UCOL_DEFAULT = -1, UCOL_PRIMARY = 0, UCOL_SECONDARY = 1, UCOL_TERTIARY = 2, UCOL_DEFAULT_STRENGTH = UCOL_TERTIARY, UCOL_CE_STRENGTH_LIMIT, UCOL_QUATERNARY =3, UCOL_IDENTICAL =15, UCOL_STRENGTH_LIMIT, UCOL_OFF = 16, UCOL_ON = 17, UCOL_SHIFTED = 20, UCOL_NON_IGNORABLE = 21, UCOL_LOWER_FIRST = 24, UCOL_UPPER_FIRST = 25, UCOL_ATTRIBUTE_VALUE_COUNT }`	enum Enum containing attribute values for controlling collation behavior.
`UColBoundMode{ UCOL_BOUND_LOWER = 0, UCOL_BOUND_UPPER = 1, UCOL_BOUND_UPPER_LONG = 2, UCOL_BOUND_VALUE_COUNT }`	enum enum that is taken by ucol_getBound API See below for explanation do not change the values assigned to the members of this enum.
`UColReorderCode{ UCOL_REORDER_CODE_DEFAULT = -1, UCOL_REORDER_CODE_NONE = USCRIPT_UNKNOWN, UCOL_REORDER_CODE_OTHERS = USCRIPT_UNKNOWN, UCOL_REORDER_CODE_SPACE = 0x1000, UCOL_REORDER_CODE_FIRST = UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION = 0x1001, UCOL_REORDER_CODE_SYMBOL = 0x1002, UCOL_REORDER_CODE_CURRENCY = 0x1003, UCOL_REORDER_CODE_DIGIT = 0x1004, UCOL_REORDER_CODE_LIMIT = 0x1005 }`	enum Enum containing the codes for reordering segments of the collation table that are not script codes.
`UCollationResult{ UCOL_EQUAL = 0, UCOL_GREATER = 1, UCOL_LESS = -1 }`	enum UCOL_LESS is returned if source string is compared to be less than target string in the ucol_strcoll() method.
`UDecompositionType{ U_DT_NONE, U_DT_CANONICAL, U_DT_COMPAT, U_DT_CIRCLE, U_DT_FINAL, U_DT_FONT, U_DT_FRACTION, U_DT_INITIAL, U_DT_ISOLATED, U_DT_MEDIAL, U_DT_NARROW, U_DT_NOBREAK, U_DT_SMALL, U_DT_SQUARE, U_DT_SUB, U_DT_SUPER, U_DT_VERTICAL, U_DT_WIDE, U_DT_COUNT }`	enum Decomposition Type constants.
`UDialectHandling{ ULDN_STANDARD_NAMES = 0, ULDN_DIALECT_NAMES }`	enum Enum used in LocaleDisplayNames::createInstance.
UDisplayContext{ UDISPCTX_STANDARD_NAMES = (UDISPCTX_TYPE_DIALECT_HANDLING<<8) + 0, UDISPCTX_DIALECT_NAMES = (UDISPCTX_TYPE_DIALECT_HANDLING<<8) + 1, UDISPCTX_CAPITALIZATION_NONE = (UDISPCTX_TYPE_CAPITALIZATION<<8) + 0, UDISPCTX_CAPITALIZATION_FOR_MIDDLE_OF_SENTENCE = (UDISPCTX_TYPE_CAPITALIZATION<<8) + 1, UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE = (UDISPCTX_TYPE_CAPITALIZATION<<8) + 2, UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU = (UDISPCTX_TYPE_CAPITALIZATION<<8) + 3, UDISPCTX_CAPITALIZATION_FOR_STANDALONE = (UDISPCTX_TYPE_CAPITALIZATION<<8) + 4, UDISPCTX_LENGTH_FULL = (UDISPCTX_TYPE_DISPLAY_LENGTH<<8) + 0, UDISPCTX_LENGTH_SHORT = (UDISPCTX_TYPE_DISPLAY_LENGTH<<8) + 1, UDISPCTX_SUBSTITUTE = (UDISPCTX_TYPE_SUBSTITUTE_HANDLING<<8) + 0, UDISPCTX_NO_SUBSTITUTE = (UDISPCTX_TYPE_SUBSTITUTE_HANDLING<<8) + 1 }	enum Display context settings.
`UDisplayContextType{ UDISPCTX_TYPE_DIALECT_HANDLING = 0, UDISPCTX_TYPE_CAPITALIZATION = 1, UDISPCTX_TYPE_DISPLAY_LENGTH = 2, UDISPCTX_TYPE_SUBSTITUTE_HANDLING = 3 }`	enum Display context types, for getting values of a particular setting.
`UEastAsianWidth{ U_EA_NEUTRAL, U_EA_AMBIGUOUS, U_EA_HALFWIDTH, U_EA_FULLWIDTH, U_EA_NARROW, U_EA_WIDE, U_EA_COUNT }`	enum East Asian Width constants.
UErrorCode{ U_USING_FALLBACK_WARNING = -128, U_ERROR_WARNING_START = -128, U_USING_DEFAULT_WARNING = -127, U_SAFECLONE_ALLOCATED_WARNING = -126, U_STATE_OLD_WARNING = -125, U_STRING_NOT_TERMINATED_WARNING = -124, U_SORT_KEY_TOO_SHORT_WARNING = -123, U_AMBIGUOUS_ALIAS_WARNING = -122, U_DIFFERENT_UCA_VERSION = -121, U_PLUGIN_CHANGED_LEVEL_WARNING = -120, U_ERROR_WARNING_LIMIT, U_ZERO_ERROR = 0, U_ILLEGAL_ARGUMENT_ERROR = 1, U_MISSING_RESOURCE_ERROR = 2, U_INVALID_FORMAT_ERROR = 3, U_FILE_ACCESS_ERROR = 4, U_INTERNAL_PROGRAM_ERROR = 5, U_MESSAGE_PARSE_ERROR = 6, U_MEMORY_ALLOCATION_ERROR = 7, U_INDEX_OUTOFBOUNDS_ERROR = 8, U_PARSE_ERROR = 9, U_INVALID_CHAR_FOUND = 10, U_TRUNCATED_CHAR_FOUND = 11, U_ILLEGAL_CHAR_FOUND = 12, U_INVALID_TABLE_FORMAT = 13, U_INVALID_TABLE_FILE = 14, U_BUFFER_OVERFLOW_ERROR = 15, U_UNSUPPORTED_ERROR = 16, U_RESOURCE_TYPE_MISMATCH = 17, U_ILLEGAL_ESCAPE_SEQUENCE = 18, U_UNSUPPORTED_ESCAPE_SEQUENCE = 19, U_NO_SPACE_AVAILABLE = 20, U_CE_NOT_FOUND_ERROR = 21, U_PRIMARY_TOO_LONG_ERROR = 22, U_STATE_TOO_OLD_ERROR = 23, U_TOO_MANY_ALIASES_ERROR = 24, U_ENUM_OUT_OF_SYNC_ERROR = 25, U_INVARIANT_CONVERSION_ERROR = 26, U_INVALID_STATE_ERROR = 27, U_COLLATOR_VERSION_MISMATCH = 28, U_USELESS_COLLATOR_ERROR = 29, U_NO_WRITE_PERMISSION = 30, U_INPUT_TOO_LONG_ERROR = 31, U_STANDARD_ERROR_LIMIT = 32, U_BAD_VARIABLE_DEFINITION =0x10000, U_PARSE_ERROR_START = 0x10000, U_MALFORMED_RULE, U_MALFORMED_SET, U_MALFORMED_SYMBOL_REFERENCE, U_MALFORMED_UNICODE_ESCAPE, U_MALFORMED_VARIABLE_DEFINITION, U_MALFORMED_VARIABLE_REFERENCE, U_MISMATCHED_SEGMENT_DELIMITERS, U_MISPLACED_ANCHOR_START, U_MISPLACED_CURSOR_OFFSET, U_MISPLACED_QUANTIFIER, U_MISSING_OPERATOR, U_MISSING_SEGMENT_CLOSE, U_MULTIPLE_ANTE_CONTEXTS, U_MULTIPLE_CURSORS, U_MULTIPLE_POST_CONTEXTS, U_TRAILING_BACKSLASH, U_UNDEFINED_SEGMENT_REFERENCE, U_UNDEFINED_VARIABLE, U_UNQUOTED_SPECIAL, U_UNTERMINATED_QUOTE, U_RULE_MASK_ERROR, U_MISPLACED_COMPOUND_FILTER, U_MULTIPLE_COMPOUND_FILTERS, U_INVALID_RBT_SYNTAX, U_INVALID_PROPERTY_PATTERN, U_MALFORMED_PRAGMA, U_UNCLOSED_SEGMENT, U_ILLEGAL_CHAR_IN_SEGMENT, U_VARIABLE_RANGE_EXHAUSTED, U_VARIABLE_RANGE_OVERLAP, U_ILLEGAL_CHARACTER, U_INTERNAL_TRANSLITERATOR_ERROR, U_INVALID_ID, U_INVALID_FUNCTION, U_PARSE_ERROR_LIMIT, U_UNEXPECTED_TOKEN =0x10100, U_FMT_PARSE_ERROR_START =0x10100, U_MULTIPLE_DECIMAL_SEPARATORS, U_MULTIPLE_DECIMAL_SEPERATORS = U_MULTIPLE_DECIMAL_SEPARATORS, U_MULTIPLE_EXPONENTIAL_SYMBOLS, U_MALFORMED_EXPONENTIAL_PATTERN, U_MULTIPLE_PERCENT_SYMBOLS, U_MULTIPLE_PERMILL_SYMBOLS, U_MULTIPLE_PAD_SPECIFIERS, U_PATTERN_SYNTAX_ERROR, U_ILLEGAL_PAD_POSITION, U_UNMATCHED_BRACES, U_UNSUPPORTED_PROPERTY, U_UNSUPPORTED_ATTRIBUTE, U_ARGUMENT_TYPE_MISMATCH, U_DUPLICATE_KEYWORD, U_UNDEFINED_KEYWORD, U_DEFAULT_KEYWORD_MISSING, U_DECIMAL_NUMBER_SYNTAX_ERROR, U_FORMAT_INEXACT_ERROR, U_NUMBER_ARG_OUTOFBOUNDS_ERROR, U_NUMBER_SKELETON_SYNTAX_ERROR, U_FMT_PARSE_ERROR_LIMIT = 0x10114, U_BRK_INTERNAL_ERROR =0x10200, U_BRK_ERROR_START =0x10200, U_BRK_HEX_DIGITS_EXPECTED, U_BRK_SEMICOLON_EXPECTED, U_BRK_RULE_SYNTAX, U_BRK_UNCLOSED_SET, U_BRK_ASSIGN_ERROR, U_BRK_VARIABLE_REDFINITION, U_BRK_MISMATCHED_PAREN, U_BRK_NEW_LINE_IN_QUOTED_STRING, U_BRK_UNDEFINED_VARIABLE, U_BRK_INIT_ERROR, U_BRK_RULE_EMPTY_SET, U_BRK_UNRECOGNIZED_OPTION, U_BRK_MALFORMED_RULE_TAG, U_BRK_ERROR_LIMIT, U_REGEX_INTERNAL_ERROR =0x10300, U_REGEX_ERROR_START =0x10300, U_REGEX_RULE_SYNTAX, U_REGEX_INVALID_STATE, U_REGEX_BAD_ESCAPE_SEQUENCE, U_REGEX_PROPERTY_SYNTAX, U_REGEX_UNIMPLEMENTED, U_REGEX_MISMATCHED_PAREN, U_REGEX_NUMBER_TOO_BIG, U_REGEX_BAD_INTERVAL, U_REGEX_MAX_LT_MIN, U_REGEX_INVALID_BACK_REF, U_REGEX_INVALID_FLAG, U_REGEX_LOOK_BEHIND_LIMIT, U_REGEX_SET_CONTAINS_STRING, U_REGEX_OCTAL_TOO_BIG, U_REGEX_MISSING_CLOSE_BRACKET =U_REGEX_SET_CONTAINS_STRING+2, U_REGEX_INVALID_RANGE, U_REGEX_STACK_OVERFLOW, U_REGEX_TIME_OUT, U_REGEX_STOPPED_BY_CALLER, U_REGEX_PATTERN_TOO_BIG, U_REGEX_INVALID_CAPTURE_GROUP_NAME, U_REGEX_ERROR_LIMIT =U_REGEX_STOPPED_BY_CALLER+3, U_IDNA_PROHIBITED_ERROR =0x10400, U_IDNA_ERROR_START =0x10400, U_IDNA_UNASSIGNED_ERROR, U_IDNA_CHECK_BIDI_ERROR, U_IDNA_STD3_ASCII_RULES_ERROR, U_IDNA_ACE_PREFIX_ERROR, U_IDNA_VERIFICATION_ERROR, U_IDNA_LABEL_TOO_LONG_ERROR, U_IDNA_ZERO_LENGTH_LABEL_ERROR, U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR, U_IDNA_ERROR_LIMIT, U_STRINGPREP_PROHIBITED_ERROR = U_IDNA_PROHIBITED_ERROR, U_STRINGPREP_UNASSIGNED_ERROR = U_IDNA_UNASSIGNED_ERROR, U_STRINGPREP_CHECK_BIDI_ERROR = U_IDNA_CHECK_BIDI_ERROR, U_PLUGIN_ERROR_START =0x10500, U_PLUGIN_TOO_HIGH =0x10500, U_PLUGIN_DIDNT_SET_LEVEL, U_PLUGIN_ERROR_LIMIT, U_ERROR_LIMIT =U_PLUGIN_ERROR_LIMIT }	enum Standard ICU4C error code type, a substitute for exceptions.
`UGraphemeClusterBreak{ U_GCB_OTHER = 0, U_GCB_CONTROL = 1, U_GCB_CR = 2, U_GCB_EXTEND = 3, U_GCB_L = 4, U_GCB_LF = 5, U_GCB_LV = 6, U_GCB_LVT = 7, U_GCB_T = 8, U_GCB_V = 9, U_GCB_SPACING_MARK = 10, U_GCB_PREPEND = 11, U_GCB_REGIONAL_INDICATOR = 12, U_GCB_E_BASE = 13, U_GCB_E_BASE_GAZ = 14, U_GCB_E_MODIFIER = 15, U_GCB_GLUE_AFTER_ZWJ = 16, U_GCB_ZWJ = 17, U_GCB_COUNT = 18 }`	enum Grapheme Cluster Break constants.
`UHangulSyllableType{ U_HST_NOT_APPLICABLE, U_HST_LEADING_JAMO, U_HST_VOWEL_JAMO, U_HST_TRAILING_JAMO, U_HST_LV_SYLLABLE, U_HST_LVT_SYLLABLE, U_HST_COUNT }`	enum Hangul Syllable Type constants.
`UIndicPositionalCategory{ U_INPC_NA, U_INPC_BOTTOM, U_INPC_BOTTOM_AND_LEFT, U_INPC_BOTTOM_AND_RIGHT, U_INPC_LEFT, U_INPC_LEFT_AND_RIGHT, U_INPC_OVERSTRUCK, U_INPC_RIGHT, U_INPC_TOP, U_INPC_TOP_AND_BOTTOM, U_INPC_TOP_AND_BOTTOM_AND_RIGHT, U_INPC_TOP_AND_LEFT, U_INPC_TOP_AND_LEFT_AND_RIGHT, U_INPC_TOP_AND_RIGHT, U_INPC_VISUAL_ORDER_LEFT, U_INPC_TOP_AND_BOTTOM_AND_LEFT }`	enum Indic Positional Category constants.
UIndicSyllabicCategory{ U_INSC_OTHER, U_INSC_AVAGRAHA, U_INSC_BINDU, U_INSC_BRAHMI_JOINING_NUMBER, U_INSC_CANTILLATION_MARK, U_INSC_CONSONANT, U_INSC_CONSONANT_DEAD, U_INSC_CONSONANT_FINAL, U_INSC_CONSONANT_HEAD_LETTER, U_INSC_CONSONANT_INITIAL_POSTFIXED, U_INSC_CONSONANT_KILLER, U_INSC_CONSONANT_MEDIAL, U_INSC_CONSONANT_PLACEHOLDER, U_INSC_CONSONANT_PRECEDING_REPHA, U_INSC_CONSONANT_PREFIXED, U_INSC_CONSONANT_SUBJOINED, U_INSC_CONSONANT_SUCCEEDING_REPHA, U_INSC_CONSONANT_WITH_STACKER, U_INSC_GEMINATION_MARK, U_INSC_INVISIBLE_STACKER, U_INSC_JOINER, U_INSC_MODIFYING_LETTER, U_INSC_NON_JOINER, U_INSC_NUKTA, U_INSC_NUMBER, U_INSC_NUMBER_JOINER, U_INSC_PURE_KILLER, U_INSC_REGISTER_SHIFTER, U_INSC_SYLLABLE_MODIFIER, U_INSC_TONE_LETTER, U_INSC_TONE_MARK, U_INSC_VIRAMA, U_INSC_VISARGA, U_INSC_VOWEL, U_INSC_VOWEL_DEPENDENT, U_INSC_VOWEL_INDEPENDENT }	enum Indic Syllabic Category constants.
UJoiningGroup{ U_JG_NO_JOINING_GROUP, U_JG_AIN, U_JG_ALAPH, U_JG_ALEF, U_JG_BEH, U_JG_BETH, U_JG_DAL, U_JG_DALATH_RISH, U_JG_E, U_JG_FEH, U_JG_FINAL_SEMKATH, U_JG_GAF, U_JG_GAMAL, U_JG_HAH, U_JG_TEH_MARBUTA_GOAL, U_JG_HAMZA_ON_HEH_GOAL =U_JG_TEH_MARBUTA_GOAL, U_JG_HE, U_JG_HEH, U_JG_HEH_GOAL, U_JG_HETH, U_JG_KAF, U_JG_KAPH, U_JG_KNOTTED_HEH, U_JG_LAM, U_JG_LAMADH, U_JG_MEEM, U_JG_MIM, U_JG_NOON, U_JG_NUN, U_JG_PE, U_JG_QAF, U_JG_QAPH, U_JG_REH, U_JG_REVERSED_PE, U_JG_SAD, U_JG_SADHE, U_JG_SEEN, U_JG_SEMKATH, U_JG_SHIN, U_JG_SWASH_KAF, U_JG_SYRIAC_WAW, U_JG_TAH, U_JG_TAW, U_JG_TEH_MARBUTA, U_JG_TETH, U_JG_WAW, U_JG_YEH, U_JG_YEH_BARREE, U_JG_YEH_WITH_TAIL, U_JG_YUDH, U_JG_YUDH_HE, U_JG_ZAIN, U_JG_FE, U_JG_KHAPH, U_JG_ZHAIN, U_JG_BURUSHASKI_YEH_BARREE, U_JG_FARSI_YEH, U_JG_NYA, U_JG_ROHINGYA_YEH, U_JG_MANICHAEAN_ALEPH, U_JG_MANICHAEAN_AYIN, U_JG_MANICHAEAN_BETH, U_JG_MANICHAEAN_DALETH, U_JG_MANICHAEAN_DHAMEDH, U_JG_MANICHAEAN_FIVE, U_JG_MANICHAEAN_GIMEL, U_JG_MANICHAEAN_HETH, U_JG_MANICHAEAN_HUNDRED, U_JG_MANICHAEAN_KAPH, U_JG_MANICHAEAN_LAMEDH, U_JG_MANICHAEAN_MEM, U_JG_MANICHAEAN_NUN, U_JG_MANICHAEAN_ONE, U_JG_MANICHAEAN_PE, U_JG_MANICHAEAN_QOPH, U_JG_MANICHAEAN_RESH, U_JG_MANICHAEAN_SADHE, U_JG_MANICHAEAN_SAMEKH, U_JG_MANICHAEAN_TAW, U_JG_MANICHAEAN_TEN, U_JG_MANICHAEAN_TETH, U_JG_MANICHAEAN_THAMEDH, U_JG_MANICHAEAN_TWENTY, U_JG_MANICHAEAN_WAW, U_JG_MANICHAEAN_YODH, U_JG_MANICHAEAN_ZAYIN, U_JG_STRAIGHT_WAW, U_JG_AFRICAN_FEH, U_JG_AFRICAN_NOON, U_JG_AFRICAN_QAF, U_JG_MALAYALAM_BHA, U_JG_MALAYALAM_JA, U_JG_MALAYALAM_LLA, U_JG_MALAYALAM_LLLA, U_JG_MALAYALAM_NGA, U_JG_MALAYALAM_NNA, U_JG_MALAYALAM_NNNA, U_JG_MALAYALAM_NYA, U_JG_MALAYALAM_RA, U_JG_MALAYALAM_SSA, U_JG_MALAYALAM_TTA, U_JG_HANIFI_ROHINGYA_KINNA_YA, U_JG_HANIFI_ROHINGYA_PA, U_JG_THIN_YEH, U_JG_VERTICAL_TAIL, U_JG_COUNT }	enum Joining Group constants.
`UJoiningType{ U_JT_NON_JOINING, U_JT_JOIN_CAUSING, U_JT_DUAL_JOINING, U_JT_LEFT_JOINING, U_JT_RIGHT_JOINING, U_JT_TRANSPARENT, U_JT_COUNT }`	enum Joining Type constants.
`ULayoutType{ ULOC_LAYOUT_LTR = 0, ULOC_LAYOUT_RTL = 1, ULOC_LAYOUT_TTB = 2, ULOC_LAYOUT_BTT = 3, ULOC_LAYOUT_UNKNOWN }`	enum enums for the return value for the character and line orientation functions.
ULineBreak{ U_LB_UNKNOWN = 0, U_LB_AMBIGUOUS = 1, U_LB_ALPHABETIC = 2, U_LB_BREAK_BOTH = 3, U_LB_BREAK_AFTER = 4, U_LB_BREAK_BEFORE = 5, U_LB_MANDATORY_BREAK = 6, U_LB_CONTINGENT_BREAK = 7, U_LB_CLOSE_PUNCTUATION = 8, U_LB_COMBINING_MARK = 9, U_LB_CARRIAGE_RETURN = 10, U_LB_EXCLAMATION = 11, U_LB_GLUE = 12, U_LB_HYPHEN = 13, U_LB_IDEOGRAPHIC = 14, U_LB_INSEPARABLE = 15, U_LB_INSEPERABLE = U_LB_INSEPARABLE, U_LB_INFIX_NUMERIC = 16, U_LB_LINE_FEED = 17, U_LB_NONSTARTER = 18, U_LB_NUMERIC = 19, U_LB_OPEN_PUNCTUATION = 20, U_LB_POSTFIX_NUMERIC = 21, U_LB_PREFIX_NUMERIC = 22, U_LB_QUOTATION = 23, U_LB_COMPLEX_CONTEXT = 24, U_LB_SURROGATE = 25, U_LB_SPACE = 26, U_LB_BREAK_SYMBOLS = 27, U_LB_ZWSPACE = 28, U_LB_NEXT_LINE = 29, U_LB_WORD_JOINER = 30, U_LB_H2 = 31, U_LB_H3 = 32, U_LB_JL = 33, U_LB_JT = 34, U_LB_JV = 35, U_LB_CLOSE_PARENTHESIS = 36, U_LB_CONDITIONAL_JAPANESE_STARTER = 37, U_LB_HEBREW_LETTER = 38, U_LB_REGIONAL_INDICATOR = 39, U_LB_E_BASE = 40, U_LB_E_MODIFIER = 41, U_LB_ZWJ = 42, U_LB_COUNT = 43 }	enum Line Break constants.
`ULineBreakTag{ UBRK_LINE_SOFT = 0, UBRK_LINE_SOFT_LIMIT = 100, UBRK_LINE_HARD = 100, UBRK_LINE_HARD_LIMIT = 200 }`	enum Enum constants for the line break tags returned by getRuleStatus().
`ULocAvailableType{ ULOC_AVAILABLE_DEFAULT, ULOC_AVAILABLE_ONLY_LEGACY_ALIASES, ULOC_AVAILABLE_WITH_LEGACY_ALIASES, ULOC_AVAILABLE_COUNT }`	enum Types for uloc_getAvailableByType and uloc_countAvailableByType.
`ULocDataLocaleType{ ULOC_ACTUAL_LOCALE = 0, ULOC_VALID_LOCALE = 1, ULOC_REQUESTED_LOCALE = 2, ULOC_DATA_LOCALE_TYPE_LIMIT = 3 }`	enum Constants for *_getLocale() Allow user to select whether she wants information on requested, valid or actual locale.
`UNormalization2Mode{ UNORM2_COMPOSE, UNORM2_DECOMPOSE, UNORM2_FCD, UNORM2_COMPOSE_CONTIGUOUS }`	enum Constants for normalization modes.
`UNormalizationCheckResult{ UNORM_NO, UNORM_YES, UNORM_MAYBE }`	enum Result values for normalization quick check functions.
`UNumericType{ U_NT_NONE, U_NT_DECIMAL, U_NT_DIGIT, U_NT_NUMERIC, U_NT_COUNT }`	enum Numeric Type constants.
UProperty{ UCHAR_ALPHABETIC =0, UCHAR_BINARY_START =UCHAR_ALPHABETIC, UCHAR_ASCII_HEX_DIGIT =1, UCHAR_BIDI_CONTROL =2, UCHAR_BIDI_MIRRORED =3, UCHAR_DASH =4, UCHAR_DEFAULT_IGNORABLE_CODE_POINT =5, UCHAR_DEPRECATED =6, UCHAR_DIACRITIC =7, UCHAR_EXTENDER =8, UCHAR_FULL_COMPOSITION_EXCLUSION =9, UCHAR_GRAPHEME_BASE =10, UCHAR_GRAPHEME_EXTEND =11, UCHAR_GRAPHEME_LINK =12, UCHAR_HEX_DIGIT =13, UCHAR_HYPHEN =14, UCHAR_ID_CONTINUE =15, UCHAR_ID_START =16, UCHAR_IDEOGRAPHIC =17, UCHAR_IDS_BINARY_OPERATOR =18, UCHAR_IDS_TRINARY_OPERATOR =19, UCHAR_JOIN_CONTROL =20, UCHAR_LOGICAL_ORDER_EXCEPTION =21, UCHAR_LOWERCASE =22, UCHAR_MATH =23, UCHAR_NONCHARACTER_CODE_POINT =24, UCHAR_QUOTATION_MARK =25, UCHAR_RADICAL =26, UCHAR_SOFT_DOTTED =27, UCHAR_TERMINAL_PUNCTUATION =28, UCHAR_UNIFIED_IDEOGRAPH =29, UCHAR_UPPERCASE =30, UCHAR_WHITE_SPACE =31, UCHAR_XID_CONTINUE =32, UCHAR_XID_START =33, UCHAR_CASE_SENSITIVE =34, UCHAR_S_TERM =35, UCHAR_VARIATION_SELECTOR =36, UCHAR_NFD_INERT =37, UCHAR_NFKD_INERT =38, UCHAR_NFC_INERT =39, UCHAR_NFKC_INERT =40, UCHAR_SEGMENT_STARTER =41, UCHAR_PATTERN_SYNTAX =42, UCHAR_PATTERN_WHITE_SPACE =43, UCHAR_POSIX_ALNUM =44, UCHAR_POSIX_BLANK =45, UCHAR_POSIX_GRAPH =46, UCHAR_POSIX_PRINT =47, UCHAR_POSIX_XDIGIT =48, UCHAR_CASED =49, UCHAR_CASE_IGNORABLE =50, UCHAR_CHANGES_WHEN_LOWERCASED =51, UCHAR_CHANGES_WHEN_UPPERCASED =52, UCHAR_CHANGES_WHEN_TITLECASED =53, UCHAR_CHANGES_WHEN_CASEFOLDED =54, UCHAR_CHANGES_WHEN_CASEMAPPED =55, UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED =56, UCHAR_EMOJI =57, UCHAR_EMOJI_PRESENTATION =58, UCHAR_EMOJI_MODIFIER =59, UCHAR_EMOJI_MODIFIER_BASE =60, UCHAR_EMOJI_COMPONENT =61, UCHAR_REGIONAL_INDICATOR =62, UCHAR_PREPENDED_CONCATENATION_MARK =63, UCHAR_EXTENDED_PICTOGRAPHIC =64, UCHAR_BASIC_EMOJI =65, UCHAR_EMOJI_KEYCAP_SEQUENCE =66, UCHAR_RGI_EMOJI_MODIFIER_SEQUENCE =67, UCHAR_RGI_EMOJI_FLAG_SEQUENCE =68, UCHAR_RGI_EMOJI_TAG_SEQUENCE =69, UCHAR_RGI_EMOJI_ZWJ_SEQUENCE =70, UCHAR_RGI_EMOJI =71, UCHAR_BINARY_LIMIT =72, UCHAR_BIDI_CLASS =0x1000, UCHAR_INT_START =UCHAR_BIDI_CLASS, UCHAR_BLOCK =0x1001, UCHAR_CANONICAL_COMBINING_CLASS =0x1002, UCHAR_DECOMPOSITION_TYPE =0x1003, UCHAR_EAST_ASIAN_WIDTH =0x1004, UCHAR_GENERAL_CATEGORY =0x1005, UCHAR_JOINING_GROUP =0x1006, UCHAR_JOINING_TYPE =0x1007, UCHAR_LINE_BREAK =0x1008, UCHAR_NUMERIC_TYPE =0x1009, UCHAR_SCRIPT =0x100A, UCHAR_HANGUL_SYLLABLE_TYPE =0x100B, UCHAR_NFD_QUICK_CHECK =0x100C, UCHAR_NFKD_QUICK_CHECK =0x100D, UCHAR_NFC_QUICK_CHECK =0x100E, UCHAR_NFKC_QUICK_CHECK =0x100F, UCHAR_LEAD_CANONICAL_COMBINING_CLASS =0x1010, UCHAR_TRAIL_CANONICAL_COMBINING_CLASS =0x1011, UCHAR_GRAPHEME_CLUSTER_BREAK =0x1012, UCHAR_SENTENCE_BREAK =0x1013, UCHAR_WORD_BREAK =0x1014, UCHAR_BIDI_PAIRED_BRACKET_TYPE =0x1015, UCHAR_INDIC_POSITIONAL_CATEGORY =0x1016, UCHAR_INDIC_SYLLABIC_CATEGORY =0x1017, UCHAR_VERTICAL_ORIENTATION =0x1018, UCHAR_INT_LIMIT =0x1019, UCHAR_GENERAL_CATEGORY_MASK =0x2000, UCHAR_MASK_START =UCHAR_GENERAL_CATEGORY_MASK, UCHAR_MASK_LIMIT =0x2001, UCHAR_NUMERIC_VALUE =0x3000, UCHAR_DOUBLE_START =UCHAR_NUMERIC_VALUE, UCHAR_DOUBLE_LIMIT =0x3001, UCHAR_AGE =0x4000, UCHAR_STRING_START =UCHAR_AGE, UCHAR_BIDI_MIRRORING_GLYPH =0x4001, UCHAR_CASE_FOLDING =0x4002, UCHAR_ISO_COMMENT =0x4003, UCHAR_LOWERCASE_MAPPING =0x4004, UCHAR_NAME =0x4005, UCHAR_SIMPLE_CASE_FOLDING =0x4006, UCHAR_SIMPLE_LOWERCASE_MAPPING =0x4007, UCHAR_SIMPLE_TITLECASE_MAPPING =0x4008, UCHAR_SIMPLE_UPPERCASE_MAPPING =0x4009, UCHAR_TITLECASE_MAPPING =0x400A, UCHAR_UNICODE_1_NAME =0x400B, UCHAR_UPPERCASE_MAPPING =0x400C, UCHAR_BIDI_PAIRED_BRACKET =0x400D, UCHAR_STRING_LIMIT =0x400E, UCHAR_SCRIPT_EXTENSIONS =0x7000, UCHAR_OTHER_PROPERTY_START =UCHAR_SCRIPT_EXTENSIONS, UCHAR_OTHER_PROPERTY_LIMIT =0x7001, UCHAR_INVALID_CODE = -1 }	enum Selection constants for Unicode properties.
`UPropertyNameChoice{ U_SHORT_PROPERTY_NAME, U_LONG_PROPERTY_NAME, U_PROPERTY_NAME_CHOICE_COUNT }`	enum Selector constants for u_getPropertyName() and u_getPropertyValueName().
UScriptCode{ USCRIPT_INVALID_CODE = -1, USCRIPT_COMMON = 0, USCRIPT_INHERITED = 1, USCRIPT_ARABIC = 2, USCRIPT_ARMENIAN = 3, USCRIPT_BENGALI = 4, USCRIPT_BOPOMOFO = 5, USCRIPT_CHEROKEE = 6, USCRIPT_COPTIC = 7, USCRIPT_CYRILLIC = 8, USCRIPT_DESERET = 9, USCRIPT_DEVANAGARI = 10, USCRIPT_ETHIOPIC = 11, USCRIPT_GEORGIAN = 12, USCRIPT_GOTHIC = 13, USCRIPT_GREEK = 14, USCRIPT_GUJARATI = 15, USCRIPT_GURMUKHI = 16, USCRIPT_HAN = 17, USCRIPT_HANGUL = 18, USCRIPT_HEBREW = 19, USCRIPT_HIRAGANA = 20, USCRIPT_KANNADA = 21, USCRIPT_KATAKANA = 22, USCRIPT_KHMER = 23, USCRIPT_LAO = 24, USCRIPT_LATIN = 25, USCRIPT_MALAYALAM = 26, USCRIPT_MONGOLIAN = 27, USCRIPT_MYANMAR = 28, USCRIPT_OGHAM = 29, USCRIPT_OLD_ITALIC = 30, USCRIPT_ORIYA = 31, USCRIPT_RUNIC = 32, USCRIPT_SINHALA = 33, USCRIPT_SYRIAC = 34, USCRIPT_TAMIL = 35, USCRIPT_TELUGU = 36, USCRIPT_THAANA = 37, USCRIPT_THAI = 38, USCRIPT_TIBETAN = 39, USCRIPT_CANADIAN_ABORIGINAL = 40, USCRIPT_UCAS = USCRIPT_CANADIAN_ABORIGINAL, USCRIPT_YI = 41, USCRIPT_TAGALOG = 42, USCRIPT_HANUNOO = 43, USCRIPT_BUHID = 44, USCRIPT_TAGBANWA = 45, USCRIPT_BRAILLE = 46, USCRIPT_CYPRIOT = 47, USCRIPT_LIMBU = 48, USCRIPT_LINEAR_B = 49, USCRIPT_OSMANYA = 50, USCRIPT_SHAVIAN = 51, USCRIPT_TAI_LE = 52, USCRIPT_UGARITIC = 53, USCRIPT_KATAKANA_OR_HIRAGANA = 54, USCRIPT_BUGINESE = 55, USCRIPT_GLAGOLITIC = 56, USCRIPT_KHAROSHTHI = 57, USCRIPT_SYLOTI_NAGRI = 58, USCRIPT_NEW_TAI_LUE = 59, USCRIPT_TIFINAGH = 60, USCRIPT_OLD_PERSIAN = 61, USCRIPT_BALINESE = 62, USCRIPT_BATAK = 63, USCRIPT_BLISSYMBOLS = 64, USCRIPT_BRAHMI = 65, USCRIPT_CHAM = 66, USCRIPT_CIRTH = 67, USCRIPT_OLD_CHURCH_SLAVONIC_CYRILLIC = 68, USCRIPT_DEMOTIC_EGYPTIAN = 69, USCRIPT_HIERATIC_EGYPTIAN = 70, USCRIPT_EGYPTIAN_HIEROGLYPHS = 71, USCRIPT_KHUTSURI = 72, USCRIPT_SIMPLIFIED_HAN = 73, USCRIPT_TRADITIONAL_HAN = 74, USCRIPT_PAHAWH_HMONG = 75, USCRIPT_OLD_HUNGARIAN = 76, USCRIPT_HARAPPAN_INDUS = 77, USCRIPT_JAVANESE = 78, USCRIPT_KAYAH_LI = 79, USCRIPT_LATIN_FRAKTUR = 80, USCRIPT_LATIN_GAELIC = 81, USCRIPT_LEPCHA = 82, USCRIPT_LINEAR_A = 83, USCRIPT_MANDAIC = 84, USCRIPT_MANDAEAN = USCRIPT_MANDAIC, USCRIPT_MAYAN_HIEROGLYPHS = 85, USCRIPT_MEROITIC_HIEROGLYPHS = 86, USCRIPT_MEROITIC = USCRIPT_MEROITIC_HIEROGLYPHS, USCRIPT_NKO = 87, USCRIPT_ORKHON = 88, USCRIPT_OLD_PERMIC = 89, USCRIPT_PHAGS_PA = 90, USCRIPT_PHOENICIAN = 91, USCRIPT_MIAO = 92, USCRIPT_PHONETIC_POLLARD = USCRIPT_MIAO, USCRIPT_RONGORONGO = 93, USCRIPT_SARATI = 94, USCRIPT_ESTRANGELO_SYRIAC = 95, USCRIPT_WESTERN_SYRIAC = 96, USCRIPT_EASTERN_SYRIAC = 97, USCRIPT_TENGWAR = 98, USCRIPT_VAI = 99, USCRIPT_VISIBLE_SPEECH = 100, USCRIPT_CUNEIFORM = 101, USCRIPT_UNWRITTEN_LANGUAGES = 102, USCRIPT_UNKNOWN = 103, USCRIPT_CARIAN = 104, USCRIPT_JAPANESE = 105, USCRIPT_LANNA = 106, USCRIPT_LYCIAN = 107, USCRIPT_LYDIAN = 108, USCRIPT_OL_CHIKI = 109, USCRIPT_REJANG = 110, USCRIPT_SAURASHTRA = 111, USCRIPT_SIGN_WRITING = 112, USCRIPT_SUNDANESE = 113, USCRIPT_MOON = 114, USCRIPT_MEITEI_MAYEK = 115, USCRIPT_IMPERIAL_ARAMAIC = 116, USCRIPT_AVESTAN = 117, USCRIPT_CHAKMA = 118, USCRIPT_KOREAN = 119, USCRIPT_KAITHI = 120, USCRIPT_MANICHAEAN = 121, USCRIPT_INSCRIPTIONAL_PAHLAVI = 122, USCRIPT_PSALTER_PAHLAVI = 123, USCRIPT_BOOK_PAHLAVI = 124, USCRIPT_INSCRIPTIONAL_PARTHIAN = 125, USCRIPT_SAMARITAN = 126, USCRIPT_TAI_VIET = 127, USCRIPT_MATHEMATICAL_NOTATION = 128, USCRIPT_SYMBOLS = 129, USCRIPT_BAMUM = 130, USCRIPT_LISU = 131, USCRIPT_NAKHI_GEBA = 132, USCRIPT_OLD_SOUTH_ARABIAN = 133, USCRIPT_BASSA_VAH = 134, USCRIPT_DUPLOYAN = 135, USCRIPT_DUPLOYAN_SHORTAND = USCRIPT_DUPLOYAN, USCRIPT_ELBASAN = 136, USCRIPT_GRANTHA = 137, USCRIPT_KPELLE = 138, USCRIPT_LOMA = 139, USCRIPT_MENDE = 140, USCRIPT_MEROITIC_CURSIVE = 141, USCRIPT_OLD_NORTH_ARABIAN = 142, USCRIPT_NABATAEAN = 143, USCRIPT_PALMYRENE = 144, USCRIPT_KHUDAWADI = 145, USCRIPT_SINDHI = USCRIPT_KHUDAWADI, USCRIPT_WARANG_CITI = 146, USCRIPT_AFAKA = 147, USCRIPT_JURCHEN = 148, USCRIPT_MRO = 149, USCRIPT_NUSHU = 150, USCRIPT_SHARADA = 151, USCRIPT_SORA_SOMPENG = 152, USCRIPT_TAKRI = 153, USCRIPT_TANGUT = 154, USCRIPT_WOLEAI = 155, USCRIPT_ANATOLIAN_HIEROGLYPHS = 156, USCRIPT_KHOJKI = 157, USCRIPT_TIRHUTA = 158, USCRIPT_CAUCASIAN_ALBANIAN = 159, USCRIPT_MAHAJANI = 160, USCRIPT_AHOM = 161, USCRIPT_HATRAN = 162, USCRIPT_MODI = 163, USCRIPT_MULTANI = 164, USCRIPT_PAU_CIN_HAU = 165, USCRIPT_SIDDHAM = 166, USCRIPT_ADLAM = 167, USCRIPT_BHAIKSUKI = 168, USCRIPT_MARCHEN = 169, USCRIPT_NEWA = 170, USCRIPT_OSAGE = 171, USCRIPT_HAN_WITH_BOPOMOFO = 172, USCRIPT_JAMO = 173, USCRIPT_SYMBOLS_EMOJI = 174, USCRIPT_MASARAM_GONDI = 175, USCRIPT_SOYOMBO = 176, USCRIPT_ZANABAZAR_SQUARE = 177, USCRIPT_DOGRA = 178, USCRIPT_GUNJALA_GONDI = 179, USCRIPT_MAKASAR = 180, USCRIPT_MEDEFAIDRIN = 181, USCRIPT_HANIFI_ROHINGYA = 182, USCRIPT_SOGDIAN = 183, USCRIPT_OLD_SOGDIAN = 184, USCRIPT_ELYMAIC = 185, USCRIPT_NYIAKENG_PUACHUE_HMONG = 186, USCRIPT_NANDINAGARI = 187, USCRIPT_WANCHO = 188, USCRIPT_CHORASMIAN = 189, USCRIPT_DIVES_AKURU = 190, USCRIPT_KHITAN_SMALL_SCRIPT = 191, USCRIPT_YEZIDI = 192, USCRIPT_CYPRO_MINOAN = 193, USCRIPT_OLD_UYGHUR = 194, USCRIPT_TANGSA = 195, USCRIPT_TOTO = 196, USCRIPT_VITHKUQI = 197, USCRIPT_KAWI = 198, USCRIPT_NAG_MUNDARI = 199, USCRIPT_CODE_LIMIT = 200 }	enum Constants for ISO 15924 script codes.
`UScriptUsage{ USCRIPT_USAGE_NOT_ENCODED, USCRIPT_USAGE_UNKNOWN, USCRIPT_USAGE_EXCLUDED, USCRIPT_USAGE_LIMITED_USE, USCRIPT_USAGE_ASPIRATIONAL, USCRIPT_USAGE_RECOMMENDED }`	enum Script usage constants.
`USentenceBreak{ U_SB_OTHER = 0, U_SB_ATERM = 1, U_SB_CLOSE = 2, U_SB_FORMAT = 3, U_SB_LOWER = 4, U_SB_NUMERIC = 5, U_SB_OLETTER = 6, U_SB_SEP = 7, U_SB_SP = 8, U_SB_STERM = 9, U_SB_UPPER = 10, U_SB_CR = 11, U_SB_EXTEND = 12, U_SB_LF = 13, U_SB_SCONTINUE = 14, U_SB_COUNT = 15 }`	enum Sentence Break constants.
`USentenceBreakTag{ UBRK_SENTENCE_TERM = 0, UBRK_SENTENCE_TERM_LIMIT = 100, UBRK_SENTENCE_SEP = 100, UBRK_SENTENCE_SEP_LIMIT = 200 }`	enum Enum constants for the sentence break tags returned by getRuleStatus().
`UTransDirection{ UTRANS_FORWARD, UTRANS_REVERSE }`	enum Direction constant indicating the direction in a transliterator, e.g., the forward or reverse rules of a RuleBasedTransliterator.
`UVerticalOrientation{ U_VO_ROTATED, U_VO_TRANSFORMED_ROTATED, U_VO_TRANSFORMED_UPRIGHT, U_VO_UPRIGHT }`	enum Vertical Orientation constants.
`UWordBreak{ UBRK_WORD_NONE = 0, UBRK_WORD_NONE_LIMIT = 100, UBRK_WORD_NUMBER = 100, UBRK_WORD_NUMBER_LIMIT = 200, UBRK_WORD_LETTER = 200, UBRK_WORD_LETTER_LIMIT = 300, UBRK_WORD_KANA = 300, UBRK_WORD_KANA_LIMIT = 400, UBRK_WORD_IDEO = 400, UBRK_WORD_IDEO_LIMIT = 500 }`	enum Enum constants for the word break tags returned by getRuleStatus().
UWordBreakValues{ U_WB_OTHER = 0, U_WB_ALETTER = 1, U_WB_FORMAT = 2, U_WB_KATAKANA = 3, U_WB_MIDLETTER = 4, U_WB_MIDNUM = 5, U_WB_NUMERIC = 6, U_WB_EXTENDNUMLET = 7, U_WB_CR = 8, U_WB_EXTEND = 9, U_WB_LF = 10, U_WB_MIDNUMLET =11, U_WB_NEWLINE =12, U_WB_REGIONAL_INDICATOR = 13, U_WB_HEBREW_LETTER = 14, U_WB_SINGLE_QUOTE = 15, U_WB_DOUBLE_QUOTE = 16, U_WB_E_BASE = 17, U_WB_E_BASE_GAZ = 18, U_WB_E_MODIFIER = 19, U_WB_GLUE_AFTER_ZWJ = 20, U_WB_ZWJ = 21, U_WB_WSEGSPACE = 22, U_WB_COUNT = 23 }	enum Word Break constants.

Typedefs
`OldUChar`	OldUChar `uint16_t` Default ICU 58 definition of UChar.
`UBidiPairedBracketType`	typedef `enum UBidiPairedBracketType` Bidi Paired Bracket Type constants.
`UBlockCode`	typedef `enum UBlockCode`
`UBool`	typedef `int8_t` The ICU boolean type, a signed-byte integer.
`UBreakIteratorType`	typedef `enum UBreakIteratorType` The possible types of text boundaries.
`UCPMap`	typedef `struct UCPMap` Abstract map from Unicode code points (U+0000..U+10FFFF) to integer values.
`UCPMapValueFilter(const void *context, uint32_t value)`	typedef `uint32_t U_CALLCONV` Callback function type: Modifies a map value.
`UChar`	UChar `uint16_t` The base type for UTF-16 code units and pointers.
`UChar32`	typedef `int32_t` Define UChar32 as a type for single Unicode code points.
`UCharCategory`	typedef `enum UCharCategory` Data for enumerated Unicode general category types.
`UCharDirection`	typedef `enum UCharDirection` This specifies the language directional property of a character set.
`UCharEnumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type)`	typedef `UBoolU_CALLCONV` Callback from u_enumCharTypes(), is called for each contiguous range of code points c (where start<=c
`UCharNameChoice`	typedef `enum UCharNameChoice` Selector constants for u_charName().
`UColAttribute`	typedef `enum UColAttribute` Attributes that collation service understands.
`UColAttributeValue`	typedef `enum UColAttributeValue` Enum containing attribute values for controlling collation behavior.
`UColBoundMode`	typedef `enum UColBoundMode` enum that is taken by ucol_getBound API See below for explanation do not change the values assigned to the members of this enum.
`UColReorderCode`	typedef `enum UColReorderCode` Enum containing the codes for reordering segments of the collation table that are not script codes.
`UCollationResult`	typedef `enum UCollationResult` UCOL_LESS is returned if source string is compared to be less than target string in the ucol_strcoll() method.
`UCollationStrength`	typedef `UColAttributeValue` Base letter represents a primary difference.
`UCollator`	typedef `struct UCollator` structure representing a collator object instance
`UDate`	typedef `double` Date and Time data type.
`UDecompositionType`	typedef `enum UDecompositionType` Decomposition Type constants.
`UDisplayContext`	typedef `enum UDisplayContext`
`UDisplayContextType`	typedef `enum UDisplayContextType`
`UEastAsianWidth`	typedef `enum UEastAsianWidth` East Asian Width constants.
`UEnumCharNamesFn(void context, UChar32 code, UCharNameChoice nameChoice, const char name, int32_t length)`	typedef `UBoolU_CALLCONV` Type of a callback function for u_enumCharNames() that gets called for each Unicode character with the code point value and the character name.
`UEnumeration`	typedef `struct UEnumeration` structure representing an enumeration object instance
`UErrorCode`	typedef `enum UErrorCode` Standard ICU4C error code type, a substitute for exceptions.
`UGraphemeClusterBreak`	typedef `enum UGraphemeClusterBreak` Grapheme Cluster Break constants.
`UHangulSyllableType`	typedef `enum UHangulSyllableType` Hangul Syllable Type constants.
`UIndicPositionalCategory`	typedef `enum UIndicPositionalCategory` Indic Positional Category constants.
`UIndicSyllabicCategory`	typedef `enum UIndicSyllabicCategory` Indic Syllabic Category constants.
`UJoiningGroup`	typedef `enum UJoiningGroup` Joining Group constants.
`UJoiningType`	typedef `enum UJoiningType` Joining Type constants.
`ULineBreak`	typedef `enum ULineBreak` Line Break constants.
`ULineBreakTag`	typedef `enum ULineBreakTag` Enum constants for the line break tags returned by getRuleStatus().
`ULocAvailableType`	typedef `enum ULocAvailableType` Types for uloc_getAvailableByType and uloc_countAvailableByType.
`ULocaleData`	typedef `struct ULocaleData` A locale data object.
`ULocaleDisplayNames`	typedef `struct ULocaleDisplayNames` C typedef for struct ULocaleDisplayNames.
`UNormalizationCheckResult`	typedef `enum UNormalizationCheckResult` Result values for normalization quick check functions.
`UNormalizer2`	typedef `struct UNormalizer2` C typedef for struct UNormalizer2.
`UNumericType`	typedef `enum UNumericType` Numeric Type constants.
`UParseError`	typedef `struct UParseError` A UParseError struct is used to returned detailed information about parsing errors.
`UProperty`	typedef `enum UProperty` Selection constants for Unicode properties.
`UPropertyNameChoice`	typedef `enum UPropertyNameChoice` Selector constants for u_getPropertyName() and u_getPropertyValueName().
`UReplaceable`	typedef `void *` An opaque replaceable text object.
`UReplaceableCallbacks`	typedef `struct UReplaceableCallbacks` A set of function pointers that transliterators use to manipulate a UReplaceable.
`UScriptCode`	typedef `enum UScriptCode` Constants for ISO 15924 script codes.
`UScriptUsage`	typedef `enum UScriptUsage` Script usage constants.
`USentenceBreak`	typedef `enum USentenceBreak` Sentence Break constants.
`USentenceBreakTag`	typedef `enum USentenceBreakTag` Enum constants for the sentence break tags returned by getRuleStatus().
`UText`	typedef `struct UText` C typedef for struct UText.
`UTransDirection`	typedef `enum UTransDirection` Direction constant indicating the direction in a transliterator, e.g., the forward or reverse rules of a RuleBasedTransliterator.
`UTransPosition`	typedef `struct UTransPosition` Position structure for utrans_transIncremental() incremental transliteration.
`UTransliterator`	typedef `void *` An opaque transliterator for use in C.
`UVersionInfo[U_MAX_VERSION_LENGTH]`	typedef `uint8_t` The binary form of a version on ICU APIs is an array of 4 uint8_t.
`UVerticalOrientation`	typedef `enum UVerticalOrientation` Vertical Orientation constants.
`UWordBreak`	typedef `enum UWordBreak` Enum constants for the word break tags returned by getRuleStatus().
`UWordBreakValues`	typedef `enum UWordBreakValues` Word Break constants.

Variables
`context`	`U_CDECL_BEGIN typedef void *`

Functions
`UChar(U_CALLCONV *UNESCAPE_CHAR_AT)(int32_t offset`	`U_CDECL_BEGIN typedef` Callback function for u_unescapeAt() that returns a character of the source text given an offset and a context pointer.
`u_charAge(UChar32 c, UVersionInfo versionArray)`	`U_CAPI void U_EXPORT2` Get the "age" of the code point.
`u_charDigitValue(UChar32 c)`	`U_CAPI int32_t U_EXPORT2` Returns the decimal digit value of a decimal digit character.
`u_charDirection(UChar32 c)`	`U_CAPIUCharDirection U_EXPORT2` Returns the bidirectional category value for the code point, which is used in the Unicode bidirectional algorithm (UAX #9 http://www.unicode.org/reports/tr9/).
`u_charFromName(UCharNameChoice nameChoice, const char name, UErrorCode pErrorCode)`	`U_CAPIUChar32 U_EXPORT2` Find a Unicode character by its name and return its code point value.
`u_charMirror(UChar32 c)`	`U_CAPIUChar32 U_EXPORT2` Maps the specified character to a "mirror-image" character.
`u_charName(UChar32 code, UCharNameChoice nameChoice, char buffer, int32_t bufferLength, UErrorCode pErrorCode)`	`U_CAPI int32_t U_EXPORT2` Retrieve the name of a Unicode character.
`u_charType(UChar32 c)`	`U_CAPI int8_t U_EXPORT2` Returns the general category value for the code point.
`u_countChar32(const UChar *s, int32_t length)`	`U_CAPI int32_t U_EXPORT2` Count Unicode code points in the length UChar code units of the string.
`u_digit(UChar32 ch, int8_t radix)`	`U_CAPI int32_t U_EXPORT2` Returns the decimal digit value of the code point in the specified radix.
`u_enumCharNames(UChar32 start, UChar32 limit, UEnumCharNamesFn fn, void context, UCharNameChoice nameChoice, UErrorCode *pErrorCode)`	`U_CAPI void U_EXPORT2` Enumerate all assigned Unicode characters between the start and limit code points (start inclusive, limit exclusive) and call a function for each, passing the code point value and the character name.
`u_enumCharTypes(UCharEnumTypeRange enumRange, const void context)`	`U_CAPI void U_EXPORT2` Enumerate efficiently all code points with their Unicode general categories.
`u_errorName(UErrorCode code)`	`U_CAPI const char *U_EXPORT2` Return a string for a UErrorCode value.
`u_foldCase(UChar32 c, uint32_t options)`	`U_CAPIUChar32 U_EXPORT2` The given character is mapped to its case folding equivalent according to UnicodeData.txt and CaseFolding.txt; if the character has no case folding equivalent, the character itself is returned.
`u_forDigit(int32_t digit, int8_t radix)`	`U_CAPIUChar32 U_EXPORT2` Determines the character representation for a specific digit in the specified radix.
`u_getBidiPairedBracket(UChar32 c)`	`U_CAPIUChar32 U_EXPORT2` Maps the specified character to its paired bracket character.
`u_getCombiningClass(UChar32 c)`	`U_CAPI uint8_t U_EXPORT2` Returns the combining class of the code point as specified in UnicodeData.txt.
`u_getIntPropertyMaxValue(UProperty which)`	`U_CAPI int32_t U_EXPORT2` Get the maximum value for an enumerated/integer/binary Unicode property.
`u_getIntPropertyMinValue(UProperty which)`	`U_CAPI int32_t U_EXPORT2` Get the minimum value for an enumerated/integer/binary Unicode property.
`u_getIntPropertyValue(UChar32 c, UProperty which)`	`U_CAPI int32_t U_EXPORT2` Get the property value for an enumerated or integer Unicode property for a code point.
`u_getNumericValue(UChar32 c)`	`U_CAPI double U_EXPORT2` Get the numeric value for a Unicode code point as defined in the Unicode Character Database.
`u_getPropertyEnum(const char *alias)`	`U_CAPIUProperty U_EXPORT2` Return the UProperty enum for a given property name, as specified in the Unicode database file PropertyAliases.txt.
`u_getPropertyName(UProperty property, UPropertyNameChoice nameChoice)`	`U_CAPI const char *U_EXPORT2` Return the Unicode name for a given property, as given in the Unicode database file PropertyAliases.txt.
`u_getPropertyValueEnum(UProperty property, const char *alias)`	`U_CAPI int32_t U_EXPORT2` Return the property value integer for a given value name, as specified in the Unicode database file PropertyValueAliases.txt.
`u_getPropertyValueName(UProperty property, int32_t value, UPropertyNameChoice nameChoice)`	`U_CAPI const char *U_EXPORT2` Return the Unicode name for a given property value, as given in the Unicode database file PropertyValueAliases.txt.
`u_getUnicodeVersion(UVersionInfo versionArray)`	`U_CAPI void U_EXPORT2` Gets the Unicode version information.
`u_getVersion(UVersionInfo versionArray)`	`U_CAPI void U_EXPORT2` Gets the ICU release version.
`u_hasBinaryProperty(UChar32 c, UProperty which)`	`U_CAPIUBool U_EXPORT2` Check a binary Unicode property for a code point.
`u_isIDIgnorable(UChar32 c)`	`U_CAPIUBool U_EXPORT2` Determines if the specified character should be regarded as an ignorable character in an identifier, according to Java.
`u_isIDPart(UChar32 c)`	`U_CAPIUBool U_EXPORT2` Determines if the specified character is permissible in an identifier according to Java.
`u_isIDStart(UChar32 c)`	`U_CAPIUBool U_EXPORT2` Determines if the specified character is permissible as the first character in an identifier according to Unicode (The Unicode Standard, Version 3.0, chapter 5.16 Identifiers).
`u_isISOControl(UChar32 c)`	`U_CAPIUBool U_EXPORT2` Determines whether the specified code point is an ISO control code.
`u_isJavaIDPart(UChar32 c)`	`U_CAPIUBool U_EXPORT2` Determines if the specified character is permissible in a Java identifier.
`u_isJavaIDStart(UChar32 c)`	`U_CAPIUBool U_EXPORT2` Determines if the specified character is permissible as the first character in a Java identifier.
`u_isJavaSpaceChar(UChar32 c)`	`U_CAPIUBool U_EXPORT2` Determine if the specified code point is a space character according to Java.
`u_isMirrored(UChar32 c)`	`U_CAPIUBool U_EXPORT2` Determines whether the code point has the Bidi_Mirrored property.
`u_isUAlphabetic(UChar32 c)`	`U_CAPIUBool U_EXPORT2` Check if a code point has the Alphabetic Unicode property.
`u_isULowercase(UChar32 c)`	`U_CAPIUBool U_EXPORT2` Check if a code point has the Lowercase Unicode property.
`u_isUUppercase(UChar32 c)`	`U_CAPIUBool U_EXPORT2` Check if a code point has the Uppercase Unicode property.
`u_isUWhiteSpace(UChar32 c)`	`U_CAPIUBool U_EXPORT2` Check if a code point has the White_Space Unicode property.
`u_isWhitespace(UChar32 c)`	`U_CAPIUBool U_EXPORT2` Determines if the specified code point is a whitespace character according to Java/ICU.
`u_isalnum(UChar32 c)`	`U_CAPIUBool U_EXPORT2` Determines whether the specified code point is an alphanumeric character (letter or digit) according to Java.
`u_isalpha(UChar32 c)`	`U_CAPIUBool U_EXPORT2` Determines whether the specified code point is a letter character.
`u_isbase(UChar32 c)`	`U_CAPIUBool U_EXPORT2` Non-standard: Determines whether the specified code point is a base character.
`u_isblank(UChar32 c)`	`U_CAPIUBool U_EXPORT2` Determines whether the specified code point is a "blank" or "horizontal space", a character that visibly separates words on a line.
`u_iscntrl(UChar32 c)`	`U_CAPIUBool U_EXPORT2` Determines whether the specified code point is a control character (as defined by this function).
`u_isdefined(UChar32 c)`	`U_CAPIUBool U_EXPORT2` Determines whether the specified code point is "defined", which usually means that it is assigned a character.
`u_isdigit(UChar32 c)`	`U_CAPIUBool U_EXPORT2` Determines whether the specified code point is a digit character according to Java.
`u_isgraph(UChar32 c)`	`U_CAPIUBool U_EXPORT2` Determines whether the specified code point is a "graphic" character (printable, excluding spaces).
`u_islower(UChar32 c)`	`U_CAPIUBool U_EXPORT2` Determines whether the specified code point has the general category "Ll" (lowercase letter).
`u_isprint(UChar32 c)`	`U_CAPIUBool U_EXPORT2` Determines whether the specified code point is a printable character.
`u_ispunct(UChar32 c)`	`U_CAPIUBool U_EXPORT2` Determines whether the specified code point is a punctuation character.
`u_isspace(UChar32 c)`	`U_CAPIUBool U_EXPORT2` Determines if the specified character is a space character or not.
`u_istitle(UChar32 c)`	`U_CAPIUBool U_EXPORT2` Determines whether the specified code point is a titlecase letter.
`u_isupper(UChar32 c)`	`U_CAPIUBool U_EXPORT2` Determines whether the specified code point has the general category "Lu" (uppercase letter).
`u_isxdigit(UChar32 c)`	`U_CAPIUBool U_EXPORT2` Determines whether the specified code point is a hexadecimal digit.
`u_memcasecmp(const UChar s1, const UChar s2, int32_t length, uint32_t options)`	`U_CAPI int32_t U_EXPORT2` Compare two strings case-insensitively using full case folding.
`u_memchr(const UChar *s, UChar c, int32_t count)`	`U_CAPIUChar *U_EXPORT2` Find the first occurrence of a BMP code point in a string.
`u_memchr32(const UChar *s, UChar32 c, int32_t count)`	`U_CAPIUChar *U_EXPORT2` Find the first occurrence of a code point in a string.
`u_memcmp(const UChar buf1, const UChar buf2, int32_t count)`	`U_CAPI int32_t U_EXPORT2` Compare the first `count` UChars of each buffer.
`u_memcmpCodePointOrder(const UChar s1, const UChar s2, int32_t count)`	`U_CAPI int32_t U_EXPORT2` Compare two Unicode strings in code point order.
`u_memcpy(UChar dest, const UChar src, int32_t count)`	`U_CAPIUChar *U_EXPORT2` Synonym for memcpy(), but with UChars only.
`u_memmove(UChar dest, const UChar src, int32_t count)`	`U_CAPIUChar *U_EXPORT2` Synonym for memmove(), but with UChars only.
`u_memrchr(const UChar *s, UChar c, int32_t count)`	`U_CAPIUChar *U_EXPORT2` Find the last occurrence of a BMP code point in a string.
`u_memrchr32(const UChar *s, UChar32 c, int32_t count)`	`U_CAPIUChar *U_EXPORT2` Find the last occurrence of a code point in a string.
`u_memset(UChar *dest, UChar c, int32_t count)`	`U_CAPIUChar *U_EXPORT2` Initialize `count` characters of `dest` to `c`.
`u_strCaseCompare(const UChar s1, int32_t length1, const UChar s2, int32_t length2, uint32_t options, UErrorCode *pErrorCode)`	`U_CAPI int32_t U_EXPORT2` Compare two strings case-insensitively using full case folding.
`u_strCompare(const UChar s1, int32_t length1, const UChar s2, int32_t length2, UBool codePointOrder)`	`U_CAPI int32_t U_EXPORT2` Compare two Unicode strings (binary order).
`u_strFindFirst(const UChar s, int32_t length, const UChar substring, int32_t subLength)`	`U_CAPIUChar *U_EXPORT2` Find the first occurrence of a substring in a string.
`u_strFindLast(const UChar s, int32_t length, const UChar substring, int32_t subLength)`	`U_CAPIUChar *U_EXPORT2` Find the last occurrence of a substring in a string.
`u_strFoldCase(UChar dest, int32_t destCapacity, const UChar src, int32_t srcLength, uint32_t options, UErrorCode *pErrorCode)`	`U_CAPI int32_t U_EXPORT2` Case-folds the characters in a string.
`u_strFromUTF32(UChar dest, int32_t destCapacity, int32_t pDestLength, const UChar32 src, int32_t srcLength, UErrorCode pErrorCode)`	`U_CAPIUChar *U_EXPORT2` Convert a UTF-32 string to UTF-16.
`u_strFromUTF32WithSub(UChar dest, int32_t destCapacity, int32_t pDestLength, const UChar32 src, int32_t srcLength, UChar32 subchar, int32_t pNumSubstitutions, UErrorCode *pErrorCode)`	`U_CAPIUChar *U_EXPORT2` Convert a UTF-32 string to UTF-16.
`u_strFromUTF8(UChar dest, int32_t destCapacity, int32_t pDestLength, const char src, int32_t srcLength, UErrorCode pErrorCode)`	`U_CAPIUChar *U_EXPORT2` Convert a UTF-8 string to UTF-16.
`u_strFromUTF8Lenient(UChar dest, int32_t destCapacity, int32_t pDestLength, const char src, int32_t srcLength, UErrorCode pErrorCode)`	`U_CAPIUChar *U_EXPORT2` Convert a UTF-8 string to UTF-16.
`u_strFromUTF8WithSub(UChar dest, int32_t destCapacity, int32_t pDestLength, const char src, int32_t srcLength, UChar32 subchar, int32_t pNumSubstitutions, UErrorCode *pErrorCode)`	`U_CAPIUChar *U_EXPORT2` Convert a UTF-8 string to UTF-16.
`u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number)`	`U_CAPIUBool U_EXPORT2` Check if the string contains more Unicode code points than a certain number.
`u_strToLower(UChar dest, int32_t destCapacity, const UChar src, int32_t srcLength, const char locale, UErrorCode pErrorCode)`	`U_CAPI int32_t U_EXPORT2` Lowercase the characters in a string.
`u_strToTitle(UChar dest, int32_t destCapacity, const UChar src, int32_t srcLength, UBreakIterator titleIter, const char locale, UErrorCode *pErrorCode)`	`U_CAPI int32_t U_EXPORT2` Titlecase a string.
`u_strToUTF32(UChar32 dest, int32_t destCapacity, int32_t pDestLength, const UChar src, int32_t srcLength, UErrorCode pErrorCode)`	`U_CAPIUChar32 *U_EXPORT2` Convert a UTF-16 string to UTF-32.
`u_strToUTF32WithSub(UChar32 dest, int32_t destCapacity, int32_t pDestLength, const UChar src, int32_t srcLength, UChar32 subchar, int32_t pNumSubstitutions, UErrorCode *pErrorCode)`	`U_CAPIUChar32 *U_EXPORT2` Convert a UTF-16 string to UTF-32.
`u_strToUTF8(char dest, int32_t destCapacity, int32_t pDestLength, const UChar src, int32_t srcLength, UErrorCode pErrorCode)`	`U_CAPI char *U_EXPORT2` Convert a UTF-16 string to UTF-8.
`u_strToUTF8WithSub(char dest, int32_t destCapacity, int32_t pDestLength, const UChar src, int32_t srcLength, UChar32 subchar, int32_t pNumSubstitutions, UErrorCode *pErrorCode)`	`U_CAPI char *U_EXPORT2` Convert a UTF-16 string to UTF-8.
`u_strToUpper(UChar dest, int32_t destCapacity, const UChar src, int32_t srcLength, const char locale, UErrorCode pErrorCode)`	`U_CDECL_ENDU_CAPI int32_t U_EXPORT2` Uppercase the characters in a string.
`u_strcasecmp(const UChar s1, const UChar s2, uint32_t options)`	`U_CAPI int32_t U_EXPORT2` Compare two strings case-insensitively using full case folding.
`u_strcat(UChar dst, const UChar src)`	`U_CAPIUChar *U_EXPORT2` Concatenate two ustrings.
`u_strchr(const UChar *s, UChar c)`	`U_CAPIUChar *U_EXPORT2` Find the first occurrence of a BMP code point in a string.
`u_strchr32(const UChar *s, UChar32 c)`	`U_CAPIUChar *U_EXPORT2` Find the first occurrence of a code point in a string.
`u_strcmp(const UChar s1, const UChar s2)`	`U_CAPI int32_t U_EXPORT2` Compare two Unicode strings for bitwise equality (code unit order).
`u_strcmpCodePointOrder(const UChar s1, const UChar s2)`	`U_CAPI int32_t U_EXPORT2` Compare two Unicode strings in code point order.
`u_strcpy(UChar dst, const UChar src)`	`U_CAPIUChar *U_EXPORT2` Copy a ustring.
`u_strcspn(const UChar string, const UChar matchSet)`	`U_CAPI int32_t U_EXPORT2` Returns the number of consecutive characters in `string`, beginning with the first, that do not occur somewhere in `matchSet`.
`u_strlen(const UChar *s)`	`U_CAPI int32_t U_EXPORT2` Determine the length of an array of UChar.
`u_strncasecmp(const UChar s1, const UChar s2, int32_t n, uint32_t options)`	`U_CAPI int32_t U_EXPORT2` Compare two strings case-insensitively using full case folding.
`u_strncat(UChar dst, const UChar src, int32_t n)`	`U_CAPIUChar *U_EXPORT2` Concatenate two ustrings.
`u_strncmp(const UChar ucs1, const UChar ucs2, int32_t n)`	`U_CAPI int32_t U_EXPORT2` Compare two ustrings for bitwise equality.
`u_strncmpCodePointOrder(const UChar s1, const UChar s2, int32_t n)`	`U_CAPI int32_t U_EXPORT2` Compare two Unicode strings in code point order.
`u_strncpy(UChar dst, const UChar src, int32_t n)`	`U_CAPIUChar *U_EXPORT2` Copy a ustring.
`u_strpbrk(const UChar string, const UChar matchSet)`	`U_CAPIUChar *U_EXPORT2` Locates the first occurrence in the string `string` of any of the characters in the string `matchSet`.
`u_strrchr(const UChar *s, UChar c)`	`U_CAPIUChar *U_EXPORT2` Find the last occurrence of a BMP code point in a string.
`u_strrchr32(const UChar *s, UChar32 c)`	`U_CAPIUChar *U_EXPORT2` Find the last occurrence of a code point in a string.
`u_strrstr(const UChar s, const UChar substring)`	`U_CAPIUChar *U_EXPORT2` Find the last occurrence of a substring in a string.
`u_strspn(const UChar string, const UChar matchSet)`	`U_CAPI int32_t U_EXPORT2` Returns the number of consecutive characters in `string`, beginning with the first, that occur somewhere in `matchSet`.
`u_strstr(const UChar s, const UChar substring)`	`U_CAPIUChar *U_EXPORT2` Find the first occurrence of a substring in a string.
`u_strtok_r(UChar src, const UChar delim, UChar **saveState)`	`U_CAPIUChar *U_EXPORT2` The string tokenizer API allows an application to break a string into tokens.
`u_tolower(UChar32 c)`	`U_CAPIUChar32 U_EXPORT2` The given character is mapped to its lowercase equivalent according to UnicodeData.txt; if the character has no lowercase equivalent, the character itself is returned.
`u_totitle(UChar32 c)`	`U_CAPIUChar32 U_EXPORT2` The given character is mapped to its titlecase equivalent according to UnicodeData.txt; if none is defined, the character itself is returned.
`u_toupper(UChar32 c)`	`U_CAPIUChar32 U_EXPORT2` The given character is mapped to its uppercase equivalent according to UnicodeData.txt; if the character has no uppercase equivalent, the character itself is returned.
`u_versionToString(const UVersionInfo versionArray, char *versionString)`	`U_CAPI void U_EXPORT2` Write a string with dotted-decimal version information according to the input UVersionInfo.
`ubrk_clone(const UBreakIterator bi, UErrorCode status)`	`U_CAPIUBreakIterator *U_EXPORT2` Thread safe cloning operation.
`ubrk_close(UBreakIterator *bi)`	`U_CAPI void U_EXPORT2` Close a UBreakIterator.
`ubrk_countAvailable(void)`	`U_CAPI int32_t U_EXPORT2` Determine how many locales have text breaking information available.
`ubrk_current(const UBreakIterator *bi)`	`U_CAPI int32_t U_EXPORT2` Determine the most recently-returned text boundary.
`ubrk_first(UBreakIterator *bi)`	`U_CAPI int32_t U_EXPORT2` Set the iterator position to zero, the start of the text being scanned.
`ubrk_following(UBreakIterator *bi, int32_t offset)`	`U_CAPI int32_t U_EXPORT2` Advance the iterator to the first boundary following the specified offset.
`ubrk_getAvailable(int32_t index)`	`U_CAPI const char *U_EXPORT2` Get a locale for which text breaking information is available.
`ubrk_getRuleStatus(UBreakIterator *bi)`	`U_CAPI int32_t U_EXPORT2` Return the status from the break rule that determined the most recently returned break position.
`ubrk_getRuleStatusVec(UBreakIterator bi, int32_t fillInVec, int32_t capacity, UErrorCode *status)`	`U_CAPI int32_t U_EXPORT2` Get the statuses from the break rules that determined the most recently returned break position.
`ubrk_isBoundary(UBreakIterator *bi, int32_t offset)`	`U_CAPIUBool U_EXPORT2` Returns true if the specified position is a boundary position.
`ubrk_last(UBreakIterator *bi)`	`U_CAPI int32_t U_EXPORT2` Set the iterator position to the index immediately beyond the last character in the text being scanned.
`ubrk_next(UBreakIterator *bi)`	`U_CAPI int32_t U_EXPORT2` Advance the iterator to the boundary following the current boundary.
`ubrk_open(UBreakIteratorType type, const char locale, const UChar text, int32_t textLength, UErrorCode *status)`	`U_CAPIUBreakIterator *U_EXPORT2` Open a new UBreakIterator for locating text boundaries for a specified locale.
`ubrk_preceding(UBreakIterator *bi, int32_t offset)`	`U_CAPI int32_t U_EXPORT2` Set the iterator position to the first boundary preceding the specified offset.
`ubrk_previous(UBreakIterator *bi)`	`U_CAPI int32_t U_EXPORT2` Set the iterator position to the boundary preceding the current boundary.
`ubrk_setText(UBreakIterator bi, const UChar text, int32_t textLength, UErrorCode *status)`	`U_CAPI void U_EXPORT2` Sets an existing iterator to point to a new piece of text.
`ubrk_setUText(UBreakIterator bi, UText text, UErrorCode *status)`	`U_CAPI void U_EXPORT2` Sets an existing iterator to point to a new piece of text.
`ucol_clone(const UCollator coll, UErrorCode status)`	`U_CAPIUCollator *U_EXPORT2` Thread safe cloning operation.
`ucol_close(UCollator *coll)`	`U_CAPI void U_EXPORT2` Close a UCollator.
`ucol_countAvailable(void)`	`U_CAPI int32_t U_EXPORT2` Determine how many locales have collation rules available.
`ucol_getAttribute(const UCollator coll, UColAttribute attr, UErrorCode status)`	`U_CAPIUColAttributeValue U_EXPORT2` Universal attribute getter.
`ucol_getAvailable(int32_t localeIndex)`	`U_CAPI const char *U_EXPORT2` Get a locale for which collation rules are available.
`ucol_getDisplayName(const char objLoc, const char dispLoc, UChar result, int32_t resultLength, UErrorCode status)`	`U_CAPI int32_t U_EXPORT2` Get the display name for a UCollator.
`ucol_getEquivalentReorderCodes(int32_t reorderCode, int32_t dest, int32_t destCapacity, UErrorCode pErrorCode)`	`U_CAPI int32_t U_EXPORT2` Retrieves the reorder codes that are grouped with the given reorder code.
`ucol_getFunctionalEquivalent(char result, int32_t resultCapacity, const char keyword, const char locale, UBool isAvailable, UErrorCode *status)`	`U_CAPI int32_t U_EXPORT2` Return the functionally equivalent locale for the specified input locale, with respect to given keyword, for the collation service.
`ucol_getKeywordValues(const char keyword, UErrorCode status)`	`U_CAPIUEnumeration *U_EXPORT2` Given a keyword, create a string enumeration of all values for that keyword that are currently in use.
`ucol_getKeywordValuesForLocale(const char key, const char locale, UBool commonlyUsed, UErrorCode *status)`	`U_CAPIUEnumeration *U_EXPORT2` Given a key and a locale, returns an array of string values in a preferred order that would make a difference.
`ucol_getKeywords(UErrorCode *status)`	`U_CAPIUEnumeration *U_EXPORT2` Create a string enumerator of all possible keywords that are relevant to collation.
`ucol_getMaxVariable(const UCollator *coll)`	`U_CAPIUColReorderCode U_EXPORT2` Returns the maximum reordering group whose characters are affected by UCOL_ALTERNATE_HANDLING.
`ucol_getReorderCodes(const UCollator coll, int32_t dest, int32_t destCapacity, UErrorCode *pErrorCode)`	`U_CAPI int32_t U_EXPORT2` Retrieves the reordering codes for this collator.
`ucol_getSortKey(const UCollator coll, const UChar source, int32_t sourceLength, uint8_t *result, int32_t resultLength)`	`U_CAPI int32_t U_EXPORT2` Get a sort key for a string from a UCollator.
`ucol_getStrength(const UCollator *coll)`	`U_CAPIUCollationStrength U_EXPORT2` Get the collation strength used in a UCollator.
`ucol_mergeSortkeys(const uint8_t src1, int32_t src1Length, const uint8_t src2, int32_t src2Length, uint8_t *dest, int32_t destCapacity)`	`U_CAPI int32_t U_EXPORT2` Merges two sort keys.
`ucol_open(const char loc, UErrorCode status)`	`U_CAPIUCollator *U_EXPORT2` Open a UCollator for comparing strings.
`ucol_openAvailableLocales(UErrorCode *status)`	`U_CAPIUEnumeration *U_EXPORT2` Create a string enumerator of all locales for which a valid collator may be opened.
`ucol_openRules(const UChar rules, int32_t rulesLength, UColAttributeValue normalizationMode, UCollationStrength strength, UParseError parseError, UErrorCode *status)`	`U_CAPIUCollator *U_EXPORT2` Produce a UCollator instance according to the rules supplied.
`ucol_setAttribute(UCollator coll, UColAttribute attr, UColAttributeValue value, UErrorCode status)`	`U_CAPI void U_EXPORT2` Universal attribute setter.
`ucol_setMaxVariable(UCollator coll, UColReorderCode group, UErrorCode pErrorCode)`	`U_CAPI void U_EXPORT2` Sets the variable top to the top of the specified reordering group.
`ucol_setReorderCodes(UCollator coll, const int32_t reorderCodes, int32_t reorderCodesLength, UErrorCode *pErrorCode)`	`U_CAPI void U_EXPORT2` Sets the reordering codes for this collator.
`ucol_setStrength(UCollator *coll, UCollationStrength strength)`	`U_CAPI void U_EXPORT2` Set the collation strength used in a UCollator.
`ucol_strcoll(const UCollator coll, const UChar source, int32_t sourceLength, const UChar *target, int32_t targetLength)`	`U_CAPIUCollationResult U_EXPORT2` Compare two strings.
`ucol_strcollUTF8(const UCollator coll, const char source, int32_t sourceLength, const char target, int32_t targetLength, UErrorCode status)`	`U_CAPIUCollationResult U_EXPORT2` Compare two strings in UTF-8.
`uenum_close(UEnumeration *en)`	`U_CAPI void U_EXPORT2` Disposes of resources in use by the iterator.
`uenum_count(UEnumeration en, UErrorCode status)`	`U_CAPI int32_t U_EXPORT2` Returns the number of elements that the iterator traverses.
`uenum_next(UEnumeration en, int32_t resultLength, UErrorCode *status)`	`U_CAPI const char *U_EXPORT2` Returns the next element in the iterator's list.
`uenum_openCharStringsEnumeration(const char const strings[], int32_t count, UErrorCode ec)`	`U_CAPIUEnumeration U_EXPORT2` Given an array of const char strings (invariant chars only), return a UEnumeration.
`uenum_openUCharStringsEnumeration(const UChar const strings[], int32_t count, UErrorCode ec)`	`U_CAPIUEnumeration U_EXPORT2` Given an array of const UChar strings, return a UEnumeration.
`uenum_reset(UEnumeration en, UErrorCode status)`	`U_CAPI void U_EXPORT2` Resets the iterator to the current list of service IDs.
`uenum_unext(UEnumeration en, int32_t resultLength, UErrorCode *status)`	`U_CAPI const UChar *U_EXPORT2` Returns the next element in the iterator's list.
`uldn_close(ULocaleDisplayNames *ldn)`	`U_CAPI void U_EXPORT2` Closes a ULocaleDisplayNames instance obtained from uldn_open().
`uldn_getContext(const ULocaleDisplayNames ldn, UDisplayContextType type, UErrorCode pErrorCode)`	`U_CAPIUDisplayContext U_EXPORT2` Returns the UDisplayContext value for the specified UDisplayContextType.
`uldn_getDialectHandling(const ULocaleDisplayNames *ldn)`	`U_CAPIUDialectHandling U_EXPORT2` Returns the dialect handling used in the display names.
`uldn_getLocale(const ULocaleDisplayNames *ldn)`	`U_CAPI const char *U_EXPORT2` Returns the locale used to determine the display names.
`uldn_keyDisplayName(const ULocaleDisplayNames ldn, const char key, UChar result, int32_t maxResultSize, UErrorCode pErrorCode)`	`U_CAPI int32_t U_EXPORT2` Returns the display name of the provided locale key.
`uldn_keyValueDisplayName(const ULocaleDisplayNames ldn, const char key, const char value, UChar result, int32_t maxResultSize, UErrorCode *pErrorCode)`	`U_CAPI int32_t U_EXPORT2` Returns the display name of the provided value (used with the provided key).
`uldn_languageDisplayName(const ULocaleDisplayNames ldn, const char lang, UChar result, int32_t maxResultSize, UErrorCode pErrorCode)`	`U_CAPI int32_t U_EXPORT2` Returns the display name of the provided language code.
`uldn_localeDisplayName(const ULocaleDisplayNames ldn, const char locale, UChar result, int32_t maxResultSize, UErrorCode pErrorCode)`	`U_CAPI int32_t U_EXPORT2` Returns the display name of the provided locale.
`uldn_open(const char locale, UDialectHandling dialectHandling, UErrorCode pErrorCode)`	`U_CAPIULocaleDisplayNames *U_EXPORT2` Returns an instance of LocaleDisplayNames that returns names formatted for the provided locale, using the provided dialectHandling.
`uldn_openForContext(const char locale, UDisplayContext contexts, int32_t length, UErrorCode *pErrorCode)`	`U_CAPIULocaleDisplayNames *U_EXPORT2` Returns an instance of LocaleDisplayNames that returns names formatted for the provided locale, using the provided UDisplayContext settings.
`uldn_regionDisplayName(const ULocaleDisplayNames ldn, const char region, UChar result, int32_t maxResultSize, UErrorCode pErrorCode)`	`U_CAPI int32_t U_EXPORT2` Returns the display name of the provided region code.
`uldn_scriptCodeDisplayName(const ULocaleDisplayNames ldn, UScriptCode scriptCode, UChar result, int32_t maxResultSize, UErrorCode *pErrorCode)`	`U_CAPI int32_t U_EXPORT2` Returns the display name of the provided script code.
`uldn_scriptDisplayName(const ULocaleDisplayNames ldn, const char script, UChar result, int32_t maxResultSize, UErrorCode pErrorCode)`	`U_CAPI int32_t U_EXPORT2` Returns the display name of the provided script.
`uldn_variantDisplayName(const ULocaleDisplayNames ldn, const char variant, UChar result, int32_t maxResultSize, UErrorCode pErrorCode)`	`U_CAPI int32_t U_EXPORT2` Returns the display name of the provided variant.
`uloc_acceptLanguage(char result, int32_t resultAvailable, UAcceptResult outResult, const char *acceptList, int32_t acceptListCount, UEnumeration availableLocales, UErrorCode *status)`	`U_CAPI int32_t U_EXPORT2` Based on a list of available locales, determine an acceptable locale for the user.
`uloc_addLikelySubtags(const char localeID, char maximizedLocaleID, int32_t maximizedLocaleIDCapacity, UErrorCode *err)`	`U_CAPI int32_t U_EXPORT2` Add the likely subtags for a provided locale ID, per the algorithm described in the following CLDR technical report:
`uloc_canonicalize(const char localeID, char name, int32_t nameCapacity, UErrorCode *err)`	`U_CAPI int32_t U_EXPORT2` Gets the full name for the specified locale.
`uloc_countAvailable(void)`	`U_CAPI int32_t U_EXPORT2` Gets the size of the all available locale list.
`uloc_forLanguageTag(const char langtag, char localeID, int32_t localeIDCapacity, int32_t parsedLength, UErrorCode err)`	`U_CAPI int32_t U_EXPORT2` Returns a locale ID for the specified BCP47 language tag string.
`uloc_getAvailable(int32_t n)`	`U_CAPI const char *U_EXPORT2` Gets the specified locale from a list of available locales.
`uloc_getBaseName(const char localeID, char name, int32_t nameCapacity, UErrorCode *err)`	`U_CAPI int32_t U_EXPORT2` Gets the full name for the specified locale, like uloc_getName(), but without keywords.
`uloc_getCharacterOrientation(const char localeId, UErrorCode status)`	`U_CAPIULayoutType U_EXPORT2` Get the layout character orientation for the specified locale.
`uloc_getCountry(const char localeID, char country, int32_t countryCapacity, UErrorCode *err)`	`U_CAPI int32_t U_EXPORT2` Gets the country code for the specified locale.
`uloc_getDefault(void)`	`U_CAPI const char *U_EXPORT2` Gets ICU's default locale.
`uloc_getDisplayCountry(const char locale, const char displayLocale, UChar country, int32_t countryCapacity, UErrorCode status)`	`U_CAPI int32_t U_EXPORT2` Gets the country name suitable for display for the specified locale.
`uloc_getDisplayKeyword(const char keyword, const char displayLocale, UChar dest, int32_t destCapacity, UErrorCode status)`	`U_CAPI int32_t U_EXPORT2` Gets the keyword name suitable for display for the specified locale.
`uloc_getDisplayKeywordValue(const char locale, const char keyword, const char displayLocale, UChar dest, int32_t destCapacity, UErrorCode *status)`	`U_CAPI int32_t U_EXPORT2` Gets the value of the keyword suitable for display for the specified locale.
`uloc_getDisplayLanguage(const char locale, const char displayLocale, UChar language, int32_t languageCapacity, UErrorCode status)`	`U_CAPI int32_t U_EXPORT2` Gets the language name suitable for display for the specified locale.
`uloc_getDisplayName(const char localeID, const char inLocaleID, UChar result, int32_t maxResultSize, UErrorCode err)`	`U_CAPI int32_t U_EXPORT2` Gets the full name suitable for display for the specified locale.
`uloc_getDisplayScript(const char locale, const char displayLocale, UChar script, int32_t scriptCapacity, UErrorCode status)`	`U_CAPI int32_t U_EXPORT2` Gets the script name suitable for display for the specified locale.
`uloc_getDisplayVariant(const char locale, const char displayLocale, UChar variant, int32_t variantCapacity, UErrorCode status)`	`U_CAPI int32_t U_EXPORT2` Gets the variant name suitable for display for the specified locale.
`uloc_getISO3Country(const char *localeID)`	`U_CAPI const char *U_EXPORT2` Gets the ISO country code for the specified locale.
`uloc_getISO3Language(const char *localeID)`	`U_CAPI const char *U_EXPORT2` Gets the ISO language code for the specified locale.
`uloc_getISOCountries(void)`	`U_CAPI const char const U_EXPORT2` Gets a list of all available 2-letter country codes defined in ISO 639.
`uloc_getISOLanguages(void)`	`U_CAPI const char const U_EXPORT2` Gets a list of all available 2-letter language codes defined in ISO 639, plus additional 3-letter codes determined to be useful for locale generation as defined by Unicode CLDR.
`uloc_getKeywordValue(const char localeID, const char keywordName, char buffer, int32_t bufferCapacity, UErrorCode status)`	`U_CAPI int32_t U_EXPORT2` Get the value for a keyword.
`uloc_getLanguage(const char localeID, char language, int32_t languageCapacity, UErrorCode *err)`	`U_CAPI int32_t U_EXPORT2` Gets the language code for the specified locale.
`uloc_getLineOrientation(const char localeId, UErrorCode status)`	`U_CAPIULayoutType U_EXPORT2` Get the layout line orientation for the specified locale.
`uloc_getName(const char localeID, char name, int32_t nameCapacity, UErrorCode *err)`	`U_CAPI int32_t U_EXPORT2` Gets the full name for the specified locale.
`uloc_getScript(const char localeID, char script, int32_t scriptCapacity, UErrorCode *err)`	`U_CAPI int32_t U_EXPORT2` Gets the script code for the specified locale.
`uloc_getVariant(const char localeID, char variant, int32_t variantCapacity, UErrorCode *err)`	`U_CAPI int32_t U_EXPORT2` Gets the variant code for the specified locale.
`uloc_isRightToLeft(const char *locale)`	`U_CAPIUBool U_EXPORT2` Returns whether the locale's script is written right-to-left.
`uloc_minimizeSubtags(const char localeID, char minimizedLocaleID, int32_t minimizedLocaleIDCapacity, UErrorCode *err)`	`U_CAPI int32_t U_EXPORT2` Minimize the subtags for a provided locale ID, per the algorithm described in the following CLDR technical report:
`uloc_openKeywords(const char localeID, UErrorCode status)`	`U_CAPIUEnumeration *U_EXPORT2` Gets an enumeration of keywords for the specified locale.
`uloc_setKeywordValue(const char keywordName, const char keywordValue, char buffer, int32_t bufferCapacity, UErrorCode status)`	`U_CAPI int32_t U_EXPORT2` Sets or removes the value of the specified keyword.
`uloc_toLanguageTag(const char localeID, char langtag, int32_t langtagCapacity, UBool strict, UErrorCode *err)`	`U_CAPI int32_t U_EXPORT2` Returns a well-formed language tag for this locale ID.
`uloc_toLegacyKey(const char *keyword)`	`U_CAPI const char *U_EXPORT2` Converts the specified keyword (BCP 47 Unicode locale extension key, or legacy key) to the legacy key.
`uloc_toLegacyType(const char keyword, const char value)`	`U_CAPI const char *U_EXPORT2` Converts the specified keyword value (BCP 47 Unicode locale extension type, or legacy type or type alias) to the canonical legacy type.
`uloc_toUnicodeLocaleKey(const char *keyword)`	`U_CAPI const char *U_EXPORT2` Converts the specified keyword (legacy key, or BCP 47 Unicode locale extension key) to the equivalent BCP 47 Unicode locale extension key.
`uloc_toUnicodeLocaleType(const char keyword, const char value)`	`U_CAPI const char *U_EXPORT2` Converts the specified keyword value (legacy type, or BCP 47 Unicode locale extension type) to the well-formed BCP 47 Unicode locale extension type for the specified keyword (category).
`ulocdata_getCLDRVersion(UVersionInfo versionArray, UErrorCode *status)`	`U_CAPI void U_EXPORT2` Return the current CLDR version used by the library.
`unorm2_append(const UNormalizer2 norm2, UChar first, int32_t firstLength, int32_t firstCapacity, const UChar second, int32_t secondLength, UErrorCode pErrorCode)`	`U_CAPI int32_t U_EXPORT2` Appends the second string to the first string (merging them at the boundary) and returns the length of the first string.
`unorm2_close(UNormalizer2 *norm2)`	`U_CAPI void U_EXPORT2` Closes a UNormalizer2 instance from unorm2_openFiltered().
`unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b)`	`U_CAPIUChar32 U_EXPORT2` Performs pairwise composition of a & b and returns the composite if there is one.
`unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c)`	`U_CAPI uint8_t U_EXPORT2` Gets the combining class of c.
`unorm2_getDecomposition(const UNormalizer2 norm2, UChar32 c, UChar decomposition, int32_t capacity, UErrorCode *pErrorCode)`	`U_CAPI int32_t U_EXPORT2` Gets the decomposition mapping of c.
`unorm2_getNFCInstance(UErrorCode *pErrorCode)`	`U_CAPI const UNormalizer2 *U_EXPORT2` Returns a UNormalizer2 instance for Unicode NFC normalization.
`unorm2_getNFDInstance(UErrorCode *pErrorCode)`	`U_CAPI const UNormalizer2 *U_EXPORT2` Returns a UNormalizer2 instance for Unicode NFD normalization.
`unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode)`	`U_CAPI const UNormalizer2 *U_EXPORT2` Returns a UNormalizer2 instance for Unicode NFKC_Casefold normalization.
`unorm2_getNFKCInstance(UErrorCode *pErrorCode)`	`U_CAPI const UNormalizer2 *U_EXPORT2` Returns a UNormalizer2 instance for Unicode NFKC normalization.
`unorm2_getNFKDInstance(UErrorCode *pErrorCode)`	`U_CAPI const UNormalizer2 *U_EXPORT2` Returns a UNormalizer2 instance for Unicode NFKD normalization.
`unorm2_getRawDecomposition(const UNormalizer2 norm2, UChar32 c, UChar decomposition, int32_t capacity, UErrorCode *pErrorCode)`	`U_CAPI int32_t U_EXPORT2` Gets the raw decomposition mapping of c.
`unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c)`	`U_CAPIUBool U_EXPORT2` Tests if the character always has a normalization boundary after it, regardless of context.
`unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c)`	`U_CAPIUBool U_EXPORT2` Tests if the character always has a normalization boundary before it, regardless of context.
`unorm2_isInert(const UNormalizer2 *norm2, UChar32 c)`	`U_CAPIUBool U_EXPORT2` Tests if the character is normalization-inert.
`unorm2_isNormalized(const UNormalizer2 norm2, const UChar s, int32_t length, UErrorCode *pErrorCode)`	`U_CAPIUBool U_EXPORT2` Tests if the string is normalized.
`unorm2_normalize(const UNormalizer2 norm2, const UChar src, int32_t length, UChar dest, int32_t capacity, UErrorCode pErrorCode)`	`U_CAPI int32_t U_EXPORT2` Writes the normalized form of the source string to the destination string (replacing its contents) and returns the length of the destination string.
`unorm2_normalizeSecondAndAppend(const UNormalizer2 norm2, UChar first, int32_t firstLength, int32_t firstCapacity, const UChar second, int32_t secondLength, UErrorCode pErrorCode)`	`U_CAPI int32_t U_EXPORT2` Appends the normalized form of the second string to the first string (merging them at the boundary) and returns the length of the first string.
`unorm2_quickCheck(const UNormalizer2 norm2, const UChar s, int32_t length, UErrorCode *pErrorCode)`	`U_CAPIUNormalizationCheckResult U_EXPORT2` Tests if the string is normalized.
`unorm2_spanQuickCheckYes(const UNormalizer2 norm2, const UChar s, int32_t length, UErrorCode *pErrorCode)`	`U_CAPI int32_t U_EXPORT2` Returns the end of the normalized substring of the input string.
`uscript_breaksBetweenLetters(UScriptCode script)`	`U_CAPIUBool U_EXPORT2` Returns true if the script allows line breaks between letters (excluding hyphenation).
`uscript_getCode(const char nameOrAbbrOrLocale, UScriptCode fillIn, int32_t capacity, UErrorCode *err)`	`U_CAPI int32_t U_EXPORT2` Gets the script codes associated with the given locale or ISO 15924 abbreviation or name.
`uscript_getName(UScriptCode scriptCode)`	`U_CAPI const char *U_EXPORT2` Returns the long Unicode script name, if there is one.
`uscript_getSampleString(UScriptCode script, UChar dest, int32_t capacity, UErrorCode pErrorCode)`	`U_CAPI int32_t U_EXPORT2` Writes the script sample character string.
`uscript_getScript(UChar32 codepoint, UErrorCode *err)`	`U_CAPIUScriptCode U_EXPORT2` Gets the script code associated with the given codepoint.
`uscript_getScriptExtensions(UChar32 c, UScriptCode scripts, int32_t capacity, UErrorCode errorCode)`	`U_CAPI int32_t U_EXPORT2` Writes code point c's Script_Extensions as a list of UScriptCode values to the output scripts array and returns the number of script codes.
`uscript_getShortName(UScriptCode scriptCode)`	`U_CAPI const char *U_EXPORT2` Returns the 4-letter ISO 15924 script code, which is the same as the short Unicode script name if Unicode has names for the script.
`uscript_getUsage(UScriptCode script)`	`U_CAPIUScriptUsage U_EXPORT2` Returns the script usage according to UAX #31 Unicode Identifier and Pattern Syntax.
`uscript_hasScript(UChar32 c, UScriptCode sc)`	`U_CAPIUBool U_EXPORT2` Do the Script_Extensions of code point c contain script sc? If c does not have explicit Script_Extensions, then this tests whether c has the Script property value sc.
`uscript_isCased(UScriptCode script)`	`U_CAPIUBool U_EXPORT2` Returns true if in modern (or most recent) usage of the script case distinctions are customary.
`uscript_isRightToLeft(UScriptCode script)`	`U_CAPIUBool U_EXPORT2` Returns true if the script is written right-to-left.
`utext_char32At(UText *ut, int64_t nativeIndex)`	`U_CAPIUChar32 U_EXPORT2` Returns the code point at the requested index, or U_SENTINEL (-1) if it is out of bounds.
`utext_clone(UText dest, const UText src, UBool deep, UBool readOnly, UErrorCode *status)`	`U_CAPIUText *U_EXPORT2` Clone a UText.
`utext_close(UText *ut)`	`U_CAPIUText *U_EXPORT2` Close function for UText instances.
`utext_current32(UText *ut)`	`U_CAPIUChar32 U_EXPORT2` Get the code point at the current iteration position, or U_SENTINEL (-1) if the iteration has reached the end of the input text.
`utext_equals(const UText a, const UText b)`	`U_CAPIUBool U_EXPORT2` Compare two UText objects for equality.
`utext_extract(UText ut, int64_t nativeStart, int64_t nativeLimit, UChar dest, int32_t destCapacity, UErrorCode *status)`	`U_CAPI int32_t U_EXPORT2` Extract text from a UText into a UChar buffer.
`utext_getNativeIndex(const UText *ut)`	`U_CAPI int64_t U_EXPORT2` Get the current iterator position, which can range from 0 to the length of the text.
`utext_getPreviousNativeIndex(UText *ut)`	`U_CAPI int64_t U_EXPORT2` Get the native index of the character preceding the current position.
`utext_moveIndex32(UText *ut, int32_t delta)`	`U_CAPIUBool U_EXPORT2` Move the iterator position by delta code points.
`utext_nativeLength(UText *ut)`	`U_CAPI int64_t U_EXPORT2` Get the length of the text.
`utext_next32(UText *ut)`	`U_CAPIUChar32 U_EXPORT2` Get the code point at the current iteration position of the UText, and advance the position to the first index following the character.
`utext_next32From(UText *ut, int64_t nativeIndex)`	`U_CAPIUChar32 U_EXPORT2` Set the iteration index and return the code point at that index.
`utext_openUChars(UText ut, const UChar s, int64_t length, UErrorCode *status)`	`U_CAPIUText U_EXPORT2` Open a read-only UText for UChar string.
`utext_openUTF8(UText ut, const char s, int64_t length, UErrorCode *status)`	`U_CAPIUText *U_EXPORT2` Open a read-only UText implementation for UTF-8 strings.
`utext_previous32(UText *ut)`	`U_CAPIUChar32 U_EXPORT2` Move the iterator position to the character (code point) whose index precedes the current position, and return that character.
`utext_previous32From(UText *ut, int64_t nativeIndex)`	`U_CAPIUChar32 U_EXPORT2` Set the iteration index, and return the code point preceding the one specified by the initial index.
`utext_setNativeIndex(UText *ut, int64_t nativeIndex)`	`U_CAPI void U_EXPORT2` Set the current iteration position to the nearest code point boundary at or preceding the specified index.
`utrans_clone(const UTransliterator trans, UErrorCode status)`	`U_CAPIUTransliterator *U_EXPORT2` Create a copy of a transliterator.
`utrans_close(UTransliterator *trans)`	`U_CAPI void U_EXPORT2` Close a transliterator.
`utrans_openIDs(UErrorCode *pErrorCode)`	`U_CAPIUEnumeration *U_EXPORT2` Return a UEnumeration for the available transliterators.
`utrans_openInverse(const UTransliterator trans, UErrorCode status)`	`U_CAPIUTransliterator *U_EXPORT2` Open an inverse of an existing transliterator.
`utrans_openU(const UChar id, int32_t idLength, UTransDirection dir, const UChar rules, int32_t rulesLength, UParseError parseError, UErrorCode pErrorCode)`	`U_CAPIUTransliterator *U_EXPORT2` Open a custom transliterator, given a custom rules string OR a system transliterator, given its ID.
`utrans_setFilter(UTransliterator trans, const UChar filterPattern, int32_t filterPatternLen, UErrorCode *status)`	`U_CAPI void U_EXPORT2` Set the filter used by a transliterator.
`utrans_toRules(const UTransliterator trans, UBool escapeUnprintable, UChar result, int32_t resultLength, UErrorCode *status)`	`U_CAPI int32_t U_EXPORT2` Create a rule string that can be passed to utrans_openU to recreate this transliterator.
`utrans_trans(const UTransliterator trans, UReplaceable rep, const UReplaceableCallbacks repFunc, int32_t start, int32_t limit, UErrorCode *status)`	`U_CAPI void U_EXPORT2` Transliterate a segment of a UReplaceable string.
`utrans_transIncremental(const UTransliterator trans, UReplaceable rep, const UReplaceableCallbacks repFunc, UTransPosition pos, UErrorCode *status)`	`U_CAPI void U_EXPORT2` Transliterate the portion of the UReplaceable text buffer that can be transliterated unambiguously.
`utrans_transIncrementalUChars(const UTransliterator trans, UChar text, int32_t textLength, int32_t textCapacity, UTransPosition pos, UErrorCode *status)`	`U_CAPI void U_EXPORT2` Transliterate the portion of the UChar* text buffer that can be transliterated unambiguously.
`utrans_transUChars(const UTransliterator trans, UChar text, int32_t textLength, int32_t textCapacity, int32_t start, int32_t limit, UErrorCode *status)`	`U_CAPI void U_EXPORT2` Transliterate a segment of a UChar* string.

Structs
UParseError	A UParseError struct is used to returned detailed information about parsing errors.
UReplaceableCallbacks	A set of function pointers that transliterators use to manipulate a UReplaceable.
UTransPosition	Position structure for utrans_transIncremental() incremental transliteration.

Enumerations

Anonymous Enum 124

 Anonymous Enum 124

The capacity of the context strings in UParseError.

Properties
`U_PARSE_CONTEXT_LEN`

UAcceptResult

 UAcceptResult

Output values which uloc_acceptLanguage() writes to the 'outResult' parameter.

See also:uloc_acceptLanguageFromHTTP See also:uloc_acceptLanguage

Properties

Properties
`ULOC_ACCEPT_FAILED`	No exact match was found.
`ULOC_ACCEPT_FALLBACK`	A fallback was found. For example, the Accept-Language list includes 'ja_JP' and is matched with available locale 'ja'.
`ULOC_ACCEPT_VALID`	An exact match was found.

ULOC_ACCEPT_FAILED

No exact match was found.

ULOC_ACCEPT_FALLBACK

A fallback was found.

For example, the Accept-Language list includes 'ja_JP' and is matched with available locale 'ja'.

ULOC_ACCEPT_VALID

An exact match was found.

UBidiPairedBracketType

 UBidiPairedBracketType

Bidi Paired Bracket Type constants.

See also: UCHAR_BIDI_PAIRED_BRACKET_TYPE

Properties
`U_BPT_CLOSE`	Close paired bracket.
`U_BPT_COUNT`	One more than the highest normal UBidiPairedBracketType value. The highest value is available via u_getIntPropertyMaxValue(UCHAR_BIDI_PAIRED_BRACKET_TYPE). Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.
`U_BPT_NONE`	Not a paired bracket.
`U_BPT_OPEN`	Open paired bracket.

UBlockCode

 UBlockCode

Constants for Unicode blocks, see the Unicode Data file Blocks.txt.

Properties
`UBLOCK_ADLAM`
`UBLOCK_AEGEAN_NUMBERS`
`UBLOCK_AHOM`
`UBLOCK_ALCHEMICAL_SYMBOLS`
`UBLOCK_ALPHABETIC_PRESENTATION_FORMS`
`UBLOCK_ANATOLIAN_HIEROGLYPHS`
`UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION`
`UBLOCK_ANCIENT_GREEK_NUMBERS`
`UBLOCK_ANCIENT_SYMBOLS`
`UBLOCK_ARABIC`
`UBLOCK_ARABIC_EXTENDED_A`
`UBLOCK_ARABIC_EXTENDED_B`
`UBLOCK_ARABIC_EXTENDED_C`
`UBLOCK_ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS`
`UBLOCK_ARABIC_PRESENTATION_FORMS_A`
`UBLOCK_ARABIC_PRESENTATION_FORMS_B`
`UBLOCK_ARABIC_SUPPLEMENT`
`UBLOCK_ARMENIAN`
`UBLOCK_ARROWS`
`UBLOCK_AVESTAN`
`UBLOCK_BALINESE`
`UBLOCK_BAMUM`
`UBLOCK_BAMUM_SUPPLEMENT`
`UBLOCK_BASIC_LATIN`
`UBLOCK_BASSA_VAH`
`UBLOCK_BATAK`
`UBLOCK_BENGALI`
`UBLOCK_BHAIKSUKI`
`UBLOCK_BLOCK_ELEMENTS`
`UBLOCK_BOPOMOFO`
`UBLOCK_BOPOMOFO_EXTENDED`
`UBLOCK_BOX_DRAWING`
`UBLOCK_BRAHMI`
`UBLOCK_BRAILLE_PATTERNS`
`UBLOCK_BUGINESE`
`UBLOCK_BUHID`
`UBLOCK_BYZANTINE_MUSICAL_SYMBOLS`
`UBLOCK_CARIAN`
`UBLOCK_CAUCASIAN_ALBANIAN`
`UBLOCK_CHAKMA`
`UBLOCK_CHAM`
`UBLOCK_CHEROKEE`
`UBLOCK_CHEROKEE_SUPPLEMENT`
`UBLOCK_CHESS_SYMBOLS`
`UBLOCK_CHORASMIAN`
`UBLOCK_CJK_COMPATIBILITY`
`UBLOCK_CJK_COMPATIBILITY_FORMS`
`UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS`
`UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT`
`UBLOCK_CJK_RADICALS_SUPPLEMENT`
`UBLOCK_CJK_STROKES`
`UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION`
`UBLOCK_CJK_UNIFIED_IDEOGRAPHS`
`UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A`
`UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B`
`UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C`
`UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D`
`UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E`
`UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F`
`UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G`
`UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H`
`UBLOCK_COMBINING_DIACRITICAL_MARKS`
`UBLOCK_COMBINING_DIACRITICAL_MARKS_EXTENDED`
`UBLOCK_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT`
`UBLOCK_COMBINING_HALF_MARKS`
`UBLOCK_COMBINING_MARKS_FOR_SYMBOLS`	Unicode 3.2 renames this block to "Combining Diacritical Marks for Symbols".
`UBLOCK_COMMON_INDIC_NUMBER_FORMS`
`UBLOCK_CONTROL_PICTURES`
`UBLOCK_COPTIC`
`UBLOCK_COPTIC_EPACT_NUMBERS`
`UBLOCK_COUNT`	One more than the highest normal UBlockCode value. The highest value is available via u_getIntPropertyMaxValue(UCHAR_BLOCK). Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.
`UBLOCK_COUNTING_ROD_NUMERALS`
`UBLOCK_CUNEIFORM`
`UBLOCK_CUNEIFORM_NUMBERS_AND_PUNCTUATION`
`UBLOCK_CURRENCY_SYMBOLS`
`UBLOCK_CYPRIOT_SYLLABARY`
`UBLOCK_CYPRO_MINOAN`
`UBLOCK_CYRILLIC`
`UBLOCK_CYRILLIC_EXTENDED_A`
`UBLOCK_CYRILLIC_EXTENDED_B`
`UBLOCK_CYRILLIC_EXTENDED_C`
`UBLOCK_CYRILLIC_EXTENDED_D`
`UBLOCK_CYRILLIC_SUPPLEMENT`
`UBLOCK_CYRILLIC_SUPPLEMENTARY`	Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
`UBLOCK_DESERET`
`UBLOCK_DEVANAGARI`
`UBLOCK_DEVANAGARI_EXTENDED`
`UBLOCK_DEVANAGARI_EXTENDED_A`
`UBLOCK_DINGBATS`
`UBLOCK_DIVES_AKURU`
`UBLOCK_DOGRA`
`UBLOCK_DOMINO_TILES`
`UBLOCK_DUPLOYAN`
`UBLOCK_EARLY_DYNASTIC_CUNEIFORM`
`UBLOCK_EGYPTIAN_HIEROGLYPHS`
`UBLOCK_EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS`
`UBLOCK_ELBASAN`
`UBLOCK_ELYMAIC`
`UBLOCK_EMOTICONS`
`UBLOCK_ENCLOSED_ALPHANUMERICS`
`UBLOCK_ENCLOSED_ALPHANUMERIC_SUPPLEMENT`
`UBLOCK_ENCLOSED_CJK_LETTERS_AND_MONTHS`
`UBLOCK_ENCLOSED_IDEOGRAPHIC_SUPPLEMENT`
`UBLOCK_ETHIOPIC`
`UBLOCK_ETHIOPIC_EXTENDED`
`UBLOCK_ETHIOPIC_EXTENDED_A`
`UBLOCK_ETHIOPIC_EXTENDED_B`
`UBLOCK_ETHIOPIC_SUPPLEMENT`
`UBLOCK_GENERAL_PUNCTUATION`
`UBLOCK_GEOMETRIC_SHAPES`
`UBLOCK_GEOMETRIC_SHAPES_EXTENDED`
`UBLOCK_GEORGIAN`
`UBLOCK_GEORGIAN_EXTENDED`
`UBLOCK_GEORGIAN_SUPPLEMENT`
`UBLOCK_GLAGOLITIC`
`UBLOCK_GLAGOLITIC_SUPPLEMENT`
`UBLOCK_GOTHIC`
`UBLOCK_GRANTHA`
`UBLOCK_GREEK`	Unicode 3.2 renames this block to "Greek and Coptic".
`UBLOCK_GREEK_EXTENDED`
`UBLOCK_GUJARATI`
`UBLOCK_GUNJALA_GONDI`
`UBLOCK_GURMUKHI`
`UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS`
`UBLOCK_HANGUL_COMPATIBILITY_JAMO`
`UBLOCK_HANGUL_JAMO`
`UBLOCK_HANGUL_JAMO_EXTENDED_A`
`UBLOCK_HANGUL_JAMO_EXTENDED_B`
`UBLOCK_HANGUL_SYLLABLES`
`UBLOCK_HANIFI_ROHINGYA`
`UBLOCK_HANUNOO`
`UBLOCK_HATRAN`
`UBLOCK_HEBREW`
`UBLOCK_HIGH_PRIVATE_USE_SURROGATES`
`UBLOCK_HIGH_SURROGATES`
`UBLOCK_HIRAGANA`
`UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS`
`UBLOCK_IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION`
`UBLOCK_IMPERIAL_ARAMAIC`
`UBLOCK_INDIC_SIYAQ_NUMBERS`
`UBLOCK_INSCRIPTIONAL_PAHLAVI`
`UBLOCK_INSCRIPTIONAL_PARTHIAN`
`UBLOCK_INVALID_CODE`
`UBLOCK_IPA_EXTENSIONS`
`UBLOCK_JAVANESE`
`UBLOCK_KAITHI`
`UBLOCK_KAKTOVIK_NUMERALS`
`UBLOCK_KANA_EXTENDED_A`
`UBLOCK_KANA_EXTENDED_B`
`UBLOCK_KANA_SUPPLEMENT`
`UBLOCK_KANBUN`
`UBLOCK_KANGXI_RADICALS`
`UBLOCK_KANNADA`
`UBLOCK_KATAKANA`
`UBLOCK_KATAKANA_PHONETIC_EXTENSIONS`
`UBLOCK_KAWI`
`UBLOCK_KAYAH_LI`
`UBLOCK_KHAROSHTHI`
`UBLOCK_KHITAN_SMALL_SCRIPT`
`UBLOCK_KHMER`
`UBLOCK_KHMER_SYMBOLS`
`UBLOCK_KHOJKI`
`UBLOCK_KHUDAWADI`
`UBLOCK_LAO`
`UBLOCK_LATIN_1_SUPPLEMENT`
`UBLOCK_LATIN_EXTENDED_A`
`UBLOCK_LATIN_EXTENDED_ADDITIONAL`
`UBLOCK_LATIN_EXTENDED_B`
`UBLOCK_LATIN_EXTENDED_C`
`UBLOCK_LATIN_EXTENDED_D`
`UBLOCK_LATIN_EXTENDED_E`
`UBLOCK_LATIN_EXTENDED_F`
`UBLOCK_LATIN_EXTENDED_G`
`UBLOCK_LEPCHA`
`UBLOCK_LETTERLIKE_SYMBOLS`
`UBLOCK_LIMBU`
`UBLOCK_LINEAR_A`
`UBLOCK_LINEAR_B_IDEOGRAMS`
`UBLOCK_LINEAR_B_SYLLABARY`
`UBLOCK_LISU`
`UBLOCK_LISU_SUPPLEMENT`
`UBLOCK_LOW_SURROGATES`
`UBLOCK_LYCIAN`
`UBLOCK_LYDIAN`
`UBLOCK_MAHAJANI`
`UBLOCK_MAHJONG_TILES`
`UBLOCK_MAKASAR`
`UBLOCK_MALAYALAM`
`UBLOCK_MANDAIC`
`UBLOCK_MANICHAEAN`
`UBLOCK_MARCHEN`
`UBLOCK_MASARAM_GONDI`
`UBLOCK_MATHEMATICAL_ALPHANUMERIC_SYMBOLS`
`UBLOCK_MATHEMATICAL_OPERATORS`
`UBLOCK_MAYAN_NUMERALS`
`UBLOCK_MEDEFAIDRIN`
`UBLOCK_MEETEI_MAYEK`
`UBLOCK_MEETEI_MAYEK_EXTENSIONS`
`UBLOCK_MENDE_KIKAKUI`
`UBLOCK_MEROITIC_CURSIVE`
`UBLOCK_MEROITIC_HIEROGLYPHS`
`UBLOCK_MIAO`
`UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A`
`UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B`
`UBLOCK_MISCELLANEOUS_SYMBOLS`
`UBLOCK_MISCELLANEOUS_SYMBOLS_AND_ARROWS`
`UBLOCK_MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS`
`UBLOCK_MISCELLANEOUS_TECHNICAL`
`UBLOCK_MODI`
`UBLOCK_MODIFIER_TONE_LETTERS`
`UBLOCK_MONGOLIAN`
`UBLOCK_MONGOLIAN_SUPPLEMENT`
`UBLOCK_MRO`
`UBLOCK_MULTANI`
`UBLOCK_MUSICAL_SYMBOLS`
`UBLOCK_MYANMAR`
`UBLOCK_MYANMAR_EXTENDED_A`
`UBLOCK_MYANMAR_EXTENDED_B`
`UBLOCK_NABATAEAN`
`UBLOCK_NAG_MUNDARI`
`UBLOCK_NANDINAGARI`
`UBLOCK_NEWA`
`UBLOCK_NEW_TAI_LUE`
`UBLOCK_NKO`
`UBLOCK_NO_BLOCK`	New No_Block value in Unicode 4.
`UBLOCK_NUMBER_FORMS`
`UBLOCK_NUSHU`
`UBLOCK_NYIAKENG_PUACHUE_HMONG`
`UBLOCK_OGHAM`
`UBLOCK_OLD_HUNGARIAN`
`UBLOCK_OLD_ITALIC`
`UBLOCK_OLD_NORTH_ARABIAN`
`UBLOCK_OLD_PERMIC`
`UBLOCK_OLD_PERSIAN`
`UBLOCK_OLD_SOGDIAN`
`UBLOCK_OLD_SOUTH_ARABIAN`
`UBLOCK_OLD_TURKIC`
`UBLOCK_OLD_UYGHUR`
`UBLOCK_OL_CHIKI`
`UBLOCK_OPTICAL_CHARACTER_RECOGNITION`
`UBLOCK_ORIYA`
`UBLOCK_ORNAMENTAL_DINGBATS`
`UBLOCK_OSAGE`
`UBLOCK_OSMANYA`
`UBLOCK_OTTOMAN_SIYAQ_NUMBERS`
`UBLOCK_PAHAWH_HMONG`
`UBLOCK_PALMYRENE`
`UBLOCK_PAU_CIN_HAU`
`UBLOCK_PHAGS_PA`
`UBLOCK_PHAISTOS_DISC`
`UBLOCK_PHOENICIAN`
`UBLOCK_PHONETIC_EXTENSIONS`
`UBLOCK_PHONETIC_EXTENSIONS_SUPPLEMENT`
`UBLOCK_PLAYING_CARDS`
`UBLOCK_PRIVATE_USE`	Same as UBLOCK_PRIVATE_USE_AREA. Until Unicode 3.1.1, the corresponding block name was "Private Use", and multiple code point ranges had this block. Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" and adds separate blocks for the supplementary PUAs.
`UBLOCK_PRIVATE_USE_AREA`	Same as UBLOCK_PRIVATE_USE. Until Unicode 3.1.1, the corresponding block name was "Private Use", and multiple code point ranges had this block. Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" and adds separate blocks for the supplementary PUAs.
`UBLOCK_PSALTER_PAHLAVI`
`UBLOCK_REJANG`
`UBLOCK_RUMI_NUMERAL_SYMBOLS`
`UBLOCK_RUNIC`
`UBLOCK_SAMARITAN`
`UBLOCK_SAURASHTRA`
`UBLOCK_SHARADA`
`UBLOCK_SHAVIAN`
`UBLOCK_SHORTHAND_FORMAT_CONTROLS`
`UBLOCK_SIDDHAM`
`UBLOCK_SINHALA`
`UBLOCK_SINHALA_ARCHAIC_NUMBERS`
`UBLOCK_SMALL_FORM_VARIANTS`
`UBLOCK_SMALL_KANA_EXTENSION`
`UBLOCK_SOGDIAN`
`UBLOCK_SORA_SOMPENG`
`UBLOCK_SOYOMBO`
`UBLOCK_SPACING_MODIFIER_LETTERS`
`UBLOCK_SPECIALS`
`UBLOCK_SUNDANESE`
`UBLOCK_SUNDANESE_SUPPLEMENT`
`UBLOCK_SUPERSCRIPTS_AND_SUBSCRIPTS`
`UBLOCK_SUPPLEMENTAL_ARROWS_A`
`UBLOCK_SUPPLEMENTAL_ARROWS_B`
`UBLOCK_SUPPLEMENTAL_ARROWS_C`
`UBLOCK_SUPPLEMENTAL_MATHEMATICAL_OPERATORS`
`UBLOCK_SUPPLEMENTAL_PUNCTUATION`
`UBLOCK_SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS`
`UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_A`
`UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B`
`UBLOCK_SUTTON_SIGNWRITING`
`UBLOCK_SYLOTI_NAGRI`
`UBLOCK_SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A`
`UBLOCK_SYMBOLS_FOR_LEGACY_COMPUTING`
`UBLOCK_SYRIAC`
`UBLOCK_SYRIAC_SUPPLEMENT`
`UBLOCK_TAGALOG`
`UBLOCK_TAGBANWA`
`UBLOCK_TAGS`
`UBLOCK_TAI_LE`
`UBLOCK_TAI_THAM`
`UBLOCK_TAI_VIET`
`UBLOCK_TAI_XUAN_JING_SYMBOLS`
`UBLOCK_TAKRI`
`UBLOCK_TAMIL`
`UBLOCK_TAMIL_SUPPLEMENT`
`UBLOCK_TANGSA`
`UBLOCK_TANGUT`
`UBLOCK_TANGUT_COMPONENTS`
`UBLOCK_TANGUT_SUPPLEMENT`
`UBLOCK_TELUGU`
`UBLOCK_THAANA`
`UBLOCK_THAI`
`UBLOCK_TIBETAN`
`UBLOCK_TIFINAGH`
`UBLOCK_TIRHUTA`
`UBLOCK_TOTO`
`UBLOCK_TRANSPORT_AND_MAP_SYMBOLS`
`UBLOCK_UGARITIC`
`UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS`
`UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED`
`UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A`
`UBLOCK_VAI`
`UBLOCK_VARIATION_SELECTORS`
`UBLOCK_VARIATION_SELECTORS_SUPPLEMENT`
`UBLOCK_VEDIC_EXTENSIONS`
`UBLOCK_VERTICAL_FORMS`
`UBLOCK_VITHKUQI`
`UBLOCK_WANCHO`
`UBLOCK_WARANG_CITI`
`UBLOCK_YEZIDI`
`UBLOCK_YIJING_HEXAGRAM_SYMBOLS`
`UBLOCK_YI_RADICALS`
`UBLOCK_YI_SYLLABLES`
`UBLOCK_ZANABAZAR_SQUARE`
`UBLOCK_ZNAMENNY_MUSICAL_NOTATION`

UBreakIteratorType

 UBreakIteratorType

The possible types of text boundaries.

Properties
`UBRK_CHARACTER`	Character breaks.
`UBRK_COUNT`	One more than the highest normal UBreakIteratorType value. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.
`UBRK_LINE`	Line breaks.
`UBRK_SENTENCE`	Sentence breaks.
`UBRK_TITLE`	Title Case breaks The iterator created using this type locates title boundaries as described for Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration, please use Word Boundary iterator. Deprecated. ICU 2.8 Use the word break iterator for titlecasing for Unicode 4 and later.
`UBRK_WORD`	Word breaks.

UCPMapRangeOption

 UCPMapRangeOption

Selectors for how ucpmap_getRange() etc.

should report value ranges overlapping with surrogates. Most users should use UCPMAP_RANGE_NORMAL.

See also:ucpmap_getRange See also:ucptrie_getRange See also:umutablecptrie_getRange

Properties

Properties
`UCPMAP_RANGE_FIXED_ALL_SURROGATES`	ucpmap_getRange() enumerates all same-value ranges as stored in the map, except that all surrogates (U+D800..U+DFFF) are treated as having the surrogateValue, which is passed to getRange() as a separate parameter. The surrogateValue is not transformed via filter(). See U_IS_SURROGATE(c). Most users should use UCPMAP_RANGE_NORMAL instead. This option is useful for maps that map surrogate code units to special values optimized for UTF-16 string processing or for special error behavior for unpaired surrogates, but those values are not to be associated with the lead surrogate code points.
`UCPMAP_RANGE_FIXED_LEAD_SURROGATES`	ucpmap_getRange() enumerates all same-value ranges as stored in the map, except that lead surrogates (U+D800..U+DBFF) are treated as having the surrogateValue, which is passed to getRange() as a separate parameter. The surrogateValue is not transformed via filter(). See U_IS_LEAD(c). Most users should use UCPMAP_RANGE_NORMAL instead. This option is useful for maps that map surrogate code units to special values optimized for UTF-16 string processing or for special error behavior for unpaired surrogates, but those values are not to be associated with the lead surrogate code points.
`UCPMAP_RANGE_NORMAL`	ucpmap_getRange() enumerates all same-value ranges as stored in the map. Most users should use this option.

UCPMAP_RANGE_FIXED_ALL_SURROGATES

ucpmap_getRange() enumerates all same-value ranges as stored in the map, except that all surrogates (U+D800..U+DFFF) are treated as having the surrogateValue, which is passed to getRange() as a separate parameter.

The surrogateValue is not transformed via filter(). See U_IS_SURROGATE(c).

Most users should use UCPMAP_RANGE_NORMAL instead.

This option is useful for maps that map surrogate code units to special values optimized for UTF-16 string processing or for special error behavior for unpaired surrogates, but those values are not to be associated with the lead surrogate code points.

UCPMAP_RANGE_FIXED_LEAD_SURROGATES

ucpmap_getRange() enumerates all same-value ranges as stored in the map, except that lead surrogates (U+D800..U+DBFF) are treated as having the surrogateValue, which is passed to getRange() as a separate parameter.

The surrogateValue is not transformed via filter(). See U_IS_LEAD(c).

Most users should use UCPMAP_RANGE_NORMAL instead.

UCPMAP_RANGE_NORMAL

ucpmap_getRange() enumerates all same-value ranges as stored in the map.

Most users should use this option.

UCharCategory

 UCharCategory

Data for enumerated Unicode general category types.

See http://www.unicode.org/Public/UNIDATA/UnicodeData.html .

Properties
`U_CHAR_CATEGORY_COUNT`	One higher than the last enum UCharCategory constant. This numeric value is stable (will not change), see http://www.unicode.org/policies/stability_policy.html#Property_Value
`U_COMBINING_SPACING_MARK`	Mc.
`U_CONNECTOR_PUNCTUATION`	Pc.
`U_CONTROL_CHAR`	Cc.
`U_CURRENCY_SYMBOL`	Sc.
`U_DASH_PUNCTUATION`	Pd.
`U_DECIMAL_DIGIT_NUMBER`	Nd.
`U_ENCLOSING_MARK`	Me.
`U_END_PUNCTUATION`	Pe.
`U_FINAL_PUNCTUATION`	Pf.
`U_FORMAT_CHAR`	Cf.
`U_GENERAL_OTHER_TYPES`	Cn "Other, Not Assigned (no characters in [UnicodeData.txt] have this property)" (same as U_UNASSIGNED!)
`U_INITIAL_PUNCTUATION`	Pi.
`U_LETTER_NUMBER`	Nl.
`U_LINE_SEPARATOR`	Zl.
`U_LOWERCASE_LETTER`	Ll.
`U_MATH_SYMBOL`	Sm.
`U_MODIFIER_LETTER`	Lm.
`U_MODIFIER_SYMBOL`	Sk.
`U_NON_SPACING_MARK`	Mn.
`U_OTHER_LETTER`	Lo.
`U_OTHER_NUMBER`	No.
`U_OTHER_PUNCTUATION`	Po.
`U_OTHER_SYMBOL`	So.
`U_PARAGRAPH_SEPARATOR`	Zp.
`U_PRIVATE_USE_CHAR`	Co.
`U_SPACE_SEPARATOR`	Zs.
`U_START_PUNCTUATION`	Ps.
`U_SURROGATE`	Cs.
`U_TITLECASE_LETTER`	Lt.
`U_UNASSIGNED`	Non-category for unassigned and non-character code points.
`U_UPPERCASE_LETTER`	Lu.

UCharDirection

 UCharDirection

This specifies the language directional property of a character set.

Properties
`U_ARABIC_NUMBER`	AN.
`U_BLOCK_SEPARATOR`	B.
`U_BOUNDARY_NEUTRAL`	BN.
`U_CHAR_DIRECTION_COUNT`	One more than the highest UCharDirection value. The highest value is available via u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS). Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.
`U_COMMON_NUMBER_SEPARATOR`	CS.
`U_DIR_NON_SPACING_MARK`	NSM.
`U_EUROPEAN_NUMBER`	EN.
`U_EUROPEAN_NUMBER_SEPARATOR`	ES.
`U_EUROPEAN_NUMBER_TERMINATOR`	ET.
`U_FIRST_STRONG_ISOLATE`	FSI.
`U_LEFT_TO_RIGHT`	L.
`U_LEFT_TO_RIGHT_EMBEDDING`	LRE.
`U_LEFT_TO_RIGHT_ISOLATE`	LRI.
`U_LEFT_TO_RIGHT_OVERRIDE`	LRO.
`U_OTHER_NEUTRAL`	ON.
`U_POP_DIRECTIONAL_FORMAT`	PDF.
`U_POP_DIRECTIONAL_ISOLATE`	PDI.
`U_RIGHT_TO_LEFT`	R.
`U_RIGHT_TO_LEFT_ARABIC`	AL.
`U_RIGHT_TO_LEFT_EMBEDDING`	RLE.
`U_RIGHT_TO_LEFT_ISOLATE`	RLI.
`U_RIGHT_TO_LEFT_OVERRIDE`	RLO.
`U_SEGMENT_SEPARATOR`	S.
`U_WHITE_SPACE_NEUTRAL`	WS.

UCharNameChoice

 UCharNameChoice

Selector constants for u_charName().

u_charName() returns the "modern" name of a Unicode character; or the name that was defined in Unicode version 1.0, before the Unicode standard merged with ISO-10646; or an "extended" name that gives each Unicode code point a unique name.

See also: u_charName

Properties
`U_CHAR_NAME_ALIAS`	Corrected name from NameAliases.txt.
`U_CHAR_NAME_CHOICE_COUNT`	One more than the highest normal UCharNameChoice value. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.
`U_EXTENDED_CHAR_NAME`	Standard or synthetic character name.
`U_UNICODE_10_CHAR_NAME`	The Unicode_1_Name property value which is of little practical value. Beginning with ICU 49, ICU APIs return an empty string for this name choice. Deprecated. ICU 49
`U_UNICODE_CHAR_NAME`	Unicode character name (Name property).

UColAttribute

 UColAttribute

Attributes that collation service understands.

All the attributes can take UCOL_DEFAULT value, as well as the values specific to each one.

Properties
`UCOL_ALTERNATE_HANDLING`	Attribute for handling variable elements. Acceptable values are UCOL_NON_IGNORABLE (default) which treats all the codepoints with non-ignorable primary weights in the same way, and UCOL_SHIFTED which causes codepoints with primary weights that are equal or below the variable top value to be ignored on primary level and moved to the quaternary level.
`UCOL_ATTRIBUTE_COUNT`	One more than the highest normal UColAttribute value. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.
`UCOL_CASE_FIRST`	Controls the ordering of upper and lower case letters. Acceptable values are UCOL_OFF (default), which orders upper and lower case letters in accordance to their tertiary weights, UCOL_UPPER_FIRST which forces upper case letters to sort before lower case letters, and UCOL_LOWER_FIRST which does the opposite.
`UCOL_CASE_LEVEL`	Controls whether an extra case level (positioned before the third level) is generated or not. Acceptable values are UCOL_OFF (default), when case level is not generated, and UCOL_ON which causes the case level to be generated. Contents of the case level are affected by the value of UCOL_CASE_FIRST attribute. A simple way to ignore accent differences in a string is to set the strength to UCOL_PRIMARY and enable case level.
`UCOL_DECOMPOSITION_MODE`	An alias for UCOL_NORMALIZATION_MODE attribute.
`UCOL_FRENCH_COLLATION`	Attribute for direction of secondary weights - used in Canadian French. Acceptable values are UCOL_ON, which results in secondary weights being considered backwards and UCOL_OFF which treats secondary weights in the order they appear.
`UCOL_HIRAGANA_QUATERNARY_MODE`	When turned on, this attribute positions Hiragana before all non-ignorables on quaternary level This is a sneaky way to produce JIS sort order. This attribute was an implementation detail of the CLDR Japanese tailoring. Since ICU 50, this attribute is not settable any more via API functions. Since CLDR 25/ICU 53, explicit quaternary relations are used to achieve the same Japanese sort order. Deprecated. ICU 50 Implementation detail, cannot be set via API, was removed from implementation.
`UCOL_NORMALIZATION_MODE`	Controls whether the normalization check and necessary normalizations are performed. When set to UCOL_OFF (default) no normalization check is performed. The correctness of the result is guaranteed only if the input data is in so-called FCD form (see users manual for more info). When set to UCOL_ON, an incremental check is performed to see whether the input data is in the FCD form. If the data is not in the FCD form, incremental NFD normalization is performed.
`UCOL_NUMERIC_COLLATION`	When turned on, this attribute makes substrings of digits sort according to their numeric values. This is a way to get '100' to sort AFTER '2'. Note that the longest digit substring that can be treated as a single unit is 254 digits (not counting leading zeros). If a digit substring is longer than that, the digits beyond the limit will be treated as a separate digit substring. A "digit" in this sense is a code point with General_Category=Nd, which does not include circled numbers, roman numerals, etc. Only a contiguous digit substring is considered, that is, non-negative integers without separators. There is no support for plus/minus signs, decimals, exponents, etc.
`UCOL_STRENGTH`	The strength attribute. Can be either UCOL_PRIMARY, UCOL_SECONDARY, UCOL_TERTIARY, UCOL_QUATERNARY or UCOL_IDENTICAL. The usual strength for most locales (except Japanese) is tertiary. Quaternary strength is useful when combined with shifted setting for alternate handling attribute and for JIS X 4061 collation, when it is used to distinguish between Katakana and Hiragana. Otherwise, quaternary level is affected only by the number of non-ignorable code points in the string. Identical strength is rarely useful, as it amounts to codepoints of the NFD form of the string.

UColAttributeValue

 UColAttributeValue

Enum containing attribute values for controlling collation behavior.

Here are all the allowable values. Not every attribute can take every value. The only universal value is UCOL_DEFAULT, which resets the attribute value to the predefined value for that locale

Properties
`UCOL_ATTRIBUTE_VALUE_COUNT`	One more than the highest normal UColAttributeValue value. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.
`UCOL_CE_STRENGTH_LIMIT`
`UCOL_DEFAULT`	accepted by most attributes
`UCOL_DEFAULT_STRENGTH`	Default collation strength.
`UCOL_IDENTICAL`	Identical collation strength.
`UCOL_LOWER_FIRST`	Valid for UCOL_CASE_FIRST - lower case sorts before upper case.
`UCOL_NON_IGNORABLE`	Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be non ignorable
`UCOL_OFF`	Turn the feature off - works for UCOL_FRENCH_COLLATION, UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE & UCOL_DECOMPOSITION_MODE.
`UCOL_ON`	Turn the feature on - works for UCOL_FRENCH_COLLATION, UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE & UCOL_DECOMPOSITION_MODE.
`UCOL_PRIMARY`	Primary collation strength.
`UCOL_QUATERNARY`	Quaternary collation strength.
`UCOL_SECONDARY`	Secondary collation strength.
`UCOL_SHIFTED`	Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be shifted
`UCOL_STRENGTH_LIMIT`
`UCOL_TERTIARY`	Tertiary collation strength.
`UCOL_UPPER_FIRST`	upper case sorts before lower case

UColBoundMode

 UColBoundMode

enum that is taken by ucol_getBound API See below for explanation do not change the values assigned to the members of this enum.

Underlying code depends on them having these numbers

Properties
`UCOL_BOUND_LOWER`	lower bound
`UCOL_BOUND_UPPER`	upper bound that will match strings of exact size
`UCOL_BOUND_UPPER_LONG`	upper bound that will match all the strings that have the same initial substring as the given string
`UCOL_BOUND_VALUE_COUNT`	One more than the highest normal UColBoundMode value. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

UColReorderCode

 UColReorderCode

Enum containing the codes for reordering segments of the collation table that are not script codes.

These reordering codes are to be used in conjunction with the script codes. See also:ucol_getReorderCodesSee also:ucol_setReorderCodesSee also:ucol_getEquivalentReorderCodesSee also:UScriptCode

Properties
`UCOL_REORDER_CODE_CURRENCY`	Characters with the currency property. This is equivalent to the rule value "currency".
`UCOL_REORDER_CODE_DEFAULT`	A special reordering code that is used to specify the default reordering codes for a locale.
`UCOL_REORDER_CODE_DIGIT`	Characters with the digit property. This is equivalent to the rule value "digit".
`UCOL_REORDER_CODE_FIRST`	The first entry in the enumeration of reordering groups. This is intended for use in range checking and enumeration of the reorder codes.
`UCOL_REORDER_CODE_LIMIT`	One more than the highest normal UColReorderCode value. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.
`UCOL_REORDER_CODE_NONE`	A special reordering code that is used to specify no reordering codes.
`UCOL_REORDER_CODE_OTHERS`	A special reordering code that is used to specify all other codes used for reordering except for the codes lised as UColReorderCode values and those listed explicitly in a reordering.
`UCOL_REORDER_CODE_PUNCTUATION`	Characters with the punctuation property. This is equivalent to the rule value "punct".
`UCOL_REORDER_CODE_SPACE`	Characters with the space property. This is equivalent to the rule value "space".
`UCOL_REORDER_CODE_SYMBOL`	Characters with the symbol property. This is equivalent to the rule value "symbol".

UCollationResult

 UCollationResult

UCOL_LESS is returned if source string is compared to be less than target string in the ucol_strcoll() method.

UCOL_EQUAL is returned if source string is compared to be equal to target string in the ucol_strcoll() method. UCOL_GREATER is returned if source string is compared to be greater than target string in the ucol_strcoll() method. See also:ucol_strcoll() Possible values for a comparison result

Properties
`UCOL_EQUAL`	string a == string b
`UCOL_GREATER`	string a > string b
`UCOL_LESS`	string a < string b

UDecompositionType

 UDecompositionType

Decomposition Type constants.

See also: UCHAR_DECOMPOSITION_TYPE

Properties
`U_DT_CANONICAL`
`U_DT_CIRCLE`
`U_DT_COMPAT`
`U_DT_COUNT`	One more than the highest normal UDecompositionType value. The highest value is available via u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE). Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.
`U_DT_FINAL`
`U_DT_FONT`
`U_DT_FRACTION`
`U_DT_INITIAL`
`U_DT_ISOLATED`
`U_DT_MEDIAL`
`U_DT_NARROW`
`U_DT_NOBREAK`
`U_DT_NONE`
`U_DT_SMALL`
`U_DT_SQUARE`
`U_DT_SUB`
`U_DT_SUPER`
`U_DT_VERTICAL`
`U_DT_WIDE`

UDialectHandling

 UDialectHandling

Enum used in LocaleDisplayNames::createInstance.

Properties

Properties
`ULDN_DIALECT_NAMES`	Use dialect names, when generating a locale name, e.g. en_GB displays as 'British English'.
`ULDN_STANDARD_NAMES`	Use standard names when generating a locale name, e.g. en_GB displays as 'English (United Kingdom)'.

ULDN_DIALECT_NAMES

Use dialect names, when generating a locale name, e.g.

en_GB displays as 'British English'.

ULDN_STANDARD_NAMES

Use standard names when generating a locale name, e.g.

en_GB displays as 'English (United Kingdom)'.

UDisplayContext

 UDisplayContext

Display context settings.

Note, the specific numeric values are internal and may change.

Properties
`UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE`	The capitalization context if a date, date symbol or display name is to be formatted with capitalization appropriate for the beginning of a sentence.
`UDISPCTX_CAPITALIZATION_FOR_MIDDLE_OF_SENTENCE`	The capitalization context if a date, date symbol or display name is to be formatted with capitalization appropriate for the middle of a sentence.
`UDISPCTX_CAPITALIZATION_FOR_STANDALONE`	The capitalization context if a date, date symbol or display name is to be formatted with capitalization appropriate for stand-alone usage such as an isolated name on a calendar page.
`UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU`	The capitalization context if a date, date symbol or display name is to be formatted with capitalization appropriate for a user-interface list or menu item.
`UDISPCTX_CAPITALIZATION_NONE`	CAPITALIZATION can be set to one of UDISPCTX_CAPITALIZATION_NONE, UDISPCTX_CAPITALIZATION_FOR_MIDDLE_OF_SENTENCE, UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE, UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU, or UDISPCTX_CAPITALIZATION_FOR_STANDALONE. Use UDisplayContextType UDISPCTX_TYPE_CAPITALIZATION to get the value. The capitalization context to be used is unknown (this is the default value).
`UDISPCTX_DIALECT_NAMES`	A possible setting for DIALECT_HANDLING: use dialect names, when generating a locale name, e.g. en_GB displays as 'British English'.
`UDISPCTX_LENGTH_FULL`	DISPLAY_LENGTH can be set to one of UDISPCTX_LENGTH_FULL or UDISPCTX_LENGTH_SHORT. Use UDisplayContextType UDISPCTX_TYPE_DISPLAY_LENGTH to get the value. A possible setting for DISPLAY_LENGTH: use full names when generating a locale name, e.g. "United States" for US.
`UDISPCTX_LENGTH_SHORT`	A possible setting for DISPLAY_LENGTH: use short names when generating a locale name, e.g. "U.S." for US.
`UDISPCTX_NO_SUBSTITUTE`	A possible setting for SUBSTITUTE_HANDLING: Returns a null value with error code set to U_ILLEGAL_ARGUMENT_ERROR when no data is available.
`UDISPCTX_STANDARD_NAMES`	DIALECT_HANDLING can be set to one of UDISPCTX_STANDARD_NAMES or UDISPCTX_DIALECT_NAMES. Use UDisplayContextType UDISPCTX_TYPE_DIALECT_HANDLING to get the value. A possible setting for DIALECT_HANDLING: use standard names when generating a locale name, e.g. en_GB displays as 'English (United Kingdom)'.
`UDISPCTX_SUBSTITUTE`	SUBSTITUTE_HANDLING can be set to one of UDISPCTX_SUBSTITUTE or UDISPCTX_NO_SUBSTITUTE. Use UDisplayContextType UDISPCTX_TYPE_SUBSTITUTE_HANDLING to get the value. A possible setting for SUBSTITUTE_HANDLING: Returns a fallback value (e.g., the input code) when no data is available. This is the default value.

UDisplayContextType

 UDisplayContextType

Display context types, for getting values of a particular setting.

Note, the specific numeric values are internal and may change.

Properties
`UDISPCTX_TYPE_CAPITALIZATION`	Type to retrieve the capitalization context setting, e.g. UDISPCTX_CAPITALIZATION_NONE, UDISPCTX_CAPITALIZATION_FOR_MIDDLE_OF_SENTENCE, UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE, etc.
`UDISPCTX_TYPE_DIALECT_HANDLING`	Type to retrieve the dialect handling setting, e.g. UDISPCTX_STANDARD_NAMES or UDISPCTX_DIALECT_NAMES.
`UDISPCTX_TYPE_DISPLAY_LENGTH`	Type to retrieve the display length setting, e.g. UDISPCTX_LENGTH_FULL, UDISPCTX_LENGTH_SHORT.
`UDISPCTX_TYPE_SUBSTITUTE_HANDLING`	Type to retrieve the substitute handling setting, e.g. UDISPCTX_SUBSTITUTE, UDISPCTX_NO_SUBSTITUTE.

UEastAsianWidth

 UEastAsianWidth

East Asian Width constants.

See also: UCHAR_EAST_ASIAN_WIDTH See also: u_getIntPropertyValue

Properties
`U_EA_AMBIGUOUS`
`U_EA_COUNT`	One more than the highest normal UEastAsianWidth value. The highest value is available via u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH). Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.
`U_EA_FULLWIDTH`
`U_EA_HALFWIDTH`
`U_EA_NARROW`
`U_EA_NEUTRAL`
`U_EA_WIDE`

UErrorCode

 UErrorCode

Standard ICU4C error code type, a substitute for exceptions.

Initialize the UErrorCode with U_ZERO_ERROR, and check for success or failure using U_SUCCESS() or U_FAILURE():

UErrorCode errorCode = U_ZERO_ERROR;
// call ICU API that needs an error code parameter.
if (U_FAILURE(errorCode)) {
    // An error occurred. Handle it here.
}

C++ code should use icu::ErrorCode, available in unicode/errorcode.h, or a suitable subclass.

For more information, see: https://unicode-org.github.io/icu/userguide/dev/codingguidelines#details-about-icu-error-codes

Note: By convention, ICU functions that take a reference (C++) or a pointer (C) to a UErrorCode first test:

if (U_FAILURE(errorCode)) { return immediately; }

so that in a chain of such functions the first one that sets an error code causes the following ones to not perform any operations.

Properties
`U_AMBIGUOUS_ALIAS_WARNING`	This converter alias can go to different converter implementations.
`U_ARGUMENT_TYPE_MISMATCH`	Argument name and argument index mismatch in MessageFormat functions.
`U_BAD_VARIABLE_DEFINITION`	Missing '$' or duplicate variable name.
`U_BRK_ASSIGN_ERROR`	Syntax error in RBBI rule assignment statement.
`U_BRK_ERROR_LIMIT`	One more than the highest normal BreakIterator error code. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.
`U_BRK_ERROR_START`	Start of codes indicating Break Iterator failures.
`U_BRK_HEX_DIGITS_EXPECTED`	Hex digits expected as part of a escaped char in a rule.
`U_BRK_INIT_ERROR`	Initialization failure. Probable missing ICU Data.
`U_BRK_INTERNAL_ERROR`	An internal error (bug) was detected.
`U_BRK_MALFORMED_RULE_TAG`	The {nnn} tag on a rule is malformed.
`U_BRK_MISMATCHED_PAREN`	Mis-matched parentheses in an RBBI rule.
`U_BRK_NEW_LINE_IN_QUOTED_STRING`	Missing closing quote in an RBBI rule.
`U_BRK_RULE_EMPTY_SET`	Rule contains an empty Unicode Set.
`U_BRK_RULE_SYNTAX`	Syntax error in RBBI rule.
`U_BRK_SEMICOLON_EXPECTED`	Missing ';' at the end of a RBBI rule.
`U_BRK_UNCLOSED_SET`	UnicodeSet writing an RBBI rule missing a closing ']'.
`U_BRK_UNDEFINED_VARIABLE`	Use of an undefined $Variable in an RBBI rule.
`U_BRK_UNRECOGNIZED_OPTION`	!!option in RBBI rules not recognized.
`U_BRK_VARIABLE_REDFINITION`	RBBI rule $Variable redefined.
`U_BUFFER_OVERFLOW_ERROR`	A result would not fit in the supplied buffer.
`U_CE_NOT_FOUND_ERROR`	Currently used only while setting variable top, but can be used generally.
`U_COLLATOR_VERSION_MISMATCH`	Collator version is not compatible with the base version.
`U_DECIMAL_NUMBER_SYNTAX_ERROR`	Decimal number syntax error.
`U_DEFAULT_KEYWORD_MISSING`	Missing DEFAULT rule in plural rules.
`U_DIFFERENT_UCA_VERSION`	ucol_open encountered a mismatch between UCA version and collator image version, so the collator was constructed from rules. No impact to further function
`U_DUPLICATE_KEYWORD`	Duplicate keyword in PluralFormat.
`U_ENUM_OUT_OF_SYNC_ERROR`	UEnumeration out of sync with underlying collection.
`U_ERROR_LIMIT`	One more than the highest normal error code. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.
`U_ERROR_WARNING_LIMIT`	One more than the highest normal UErrorCode warning value. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.
`U_ERROR_WARNING_START`	Start of information results (semantically successful)
`U_FILE_ACCESS_ERROR`	The requested file cannot be found.
`U_FMT_PARSE_ERROR_LIMIT`	One more than the highest normal formatting API error code. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.
`U_FMT_PARSE_ERROR_START`	Start of format library errors.
`U_FORMAT_INEXACT_ERROR`	Cannot format a number exactly and rounding mode is ROUND_UNNECESSARY.
`U_IDNA_ACE_PREFIX_ERROR`
`U_IDNA_CHECK_BIDI_ERROR`
`U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR`
`U_IDNA_ERROR_LIMIT`	One more than the highest normal IDNA error code. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.
`U_IDNA_ERROR_START`
`U_IDNA_LABEL_TOO_LONG_ERROR`
`U_IDNA_PROHIBITED_ERROR`
`U_IDNA_STD3_ASCII_RULES_ERROR`
`U_IDNA_UNASSIGNED_ERROR`
`U_IDNA_VERIFICATION_ERROR`
`U_IDNA_ZERO_LENGTH_LABEL_ERROR`
`U_ILLEGAL_ARGUMENT_ERROR`	Start of codes indicating failure.
`U_ILLEGAL_CHARACTER`	A special character is outside its allowed context.
`U_ILLEGAL_CHAR_FOUND`	Character conversion: Illegal input sequence/combination of input units.
`U_ILLEGAL_CHAR_IN_SEGMENT`	UNUSED as of ICU 2.4.
`U_ILLEGAL_ESCAPE_SEQUENCE`	ISO-2022 illegal escape sequence.
`U_ILLEGAL_PAD_POSITION`	Pad symbol misplaced in number pattern.
`U_INDEX_OUTOFBOUNDS_ERROR`	Trying to access the index that is out of bounds.
`U_INPUT_TOO_LONG_ERROR`	The input is impractically long for an operation. It is rejected because it may lead to problems such as excessive processing time, stack depth, or heap memory requirements.
`U_INTERNAL_PROGRAM_ERROR`	Indicates a bug in the library code.
`U_INTERNAL_TRANSLITERATOR_ERROR`	Internal transliterator system error.
`U_INVALID_CHAR_FOUND`	Character conversion: Unmappable input sequence. In other APIs: Invalid character.
`U_INVALID_FORMAT_ERROR`	Data format is not what is expected.
`U_INVALID_FUNCTION`	A "&fn()" rule specifies an unknown transliterator.
`U_INVALID_ID`	A "::id" rule specifies an unknown transliterator.
`U_INVALID_PROPERTY_PATTERN`	UNUSED as of ICU 2.4.
`U_INVALID_RBT_SYNTAX`	A "::id" rule was passed to the RuleBasedTransliterator parser.
`U_INVALID_STATE_ERROR`	Requested operation can not be completed with ICU in its current state.
`U_INVALID_TABLE_FILE`	Conversion table file not found.
`U_INVALID_TABLE_FORMAT`	Conversion table file found, but corrupted.
`U_INVARIANT_CONVERSION_ERROR`	Unable to convert a UChar* string to char* with the invariant converter.
`U_MALFORMED_EXPONENTIAL_PATTERN`	Grouping symbol in exponent pattern.
`U_MALFORMED_PRAGMA`	A 'use' pragma is invalid.
`U_MALFORMED_RULE`	Elements of a rule are misplaced.
`U_MALFORMED_SET`	A UnicodeSet pattern is invalid.
`U_MALFORMED_SYMBOL_REFERENCE`	UNUSED as of ICU 2.4.
`U_MALFORMED_UNICODE_ESCAPE`	A Unicode escape pattern is invalid.
`U_MALFORMED_VARIABLE_DEFINITION`	A variable definition is invalid.
`U_MALFORMED_VARIABLE_REFERENCE`	A variable reference is invalid.
`U_MEMORY_ALLOCATION_ERROR`	Memory allocation error.
`U_MESSAGE_PARSE_ERROR`	Unable to parse a message (message format)
`U_MISMATCHED_SEGMENT_DELIMITERS`	UNUSED as of ICU 2.4.
`U_MISPLACED_ANCHOR_START`	A start anchor appears at an illegal position.
`U_MISPLACED_COMPOUND_FILTER`	A compound filter is in an invalid location.
`U_MISPLACED_CURSOR_OFFSET`	A cursor offset occurs at an illegal position.
`U_MISPLACED_QUANTIFIER`	A quantifier appears after a segment close delimiter.
`U_MISSING_OPERATOR`	A rule contains no operator.
`U_MISSING_RESOURCE_ERROR`	The requested resource cannot be found.
`U_MISSING_SEGMENT_CLOSE`	UNUSED as of ICU 2.4.
`U_MULTIPLE_ANTE_CONTEXTS`	More than one ante context.
`U_MULTIPLE_COMPOUND_FILTERS`	More than one compound filter.
`U_MULTIPLE_CURSORS`	More than one cursor.
`U_MULTIPLE_DECIMAL_SEPARATORS`	More than one decimal separator in number pattern.
`U_MULTIPLE_DECIMAL_SEPERATORS`	Typo: kept for backward compatibility. Use U_MULTIPLE_DECIMAL_SEPARATORS
`U_MULTIPLE_EXPONENTIAL_SYMBOLS`	More than one exponent symbol in number pattern.
`U_MULTIPLE_PAD_SPECIFIERS`	More than one pad symbol in number pattern.
`U_MULTIPLE_PERCENT_SYMBOLS`	More than one percent symbol in number pattern.
`U_MULTIPLE_PERMILL_SYMBOLS`	More than one permill symbol in number pattern.
`U_MULTIPLE_POST_CONTEXTS`	More than one post context.
`U_NO_SPACE_AVAILABLE`	No space available for in-buffer expansion for Arabic shaping.
`U_NO_WRITE_PERMISSION`	Attempt to modify read-only or constant data.
`U_NUMBER_ARG_OUTOFBOUNDS_ERROR`	The argument to a NumberFormatter helper method was out of bounds; the bounds are usually 0 to 999.
`U_NUMBER_SKELETON_SYNTAX_ERROR`	The number skeleton passed to C++ NumberFormatter or C UNumberFormatter was invalid or contained a syntax error.
`U_PARSE_ERROR`	Equivalent to Java ParseException.
`U_PARSE_ERROR_LIMIT`	One more than the highest normal Transliterator error code. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.
`U_PARSE_ERROR_START`	Start of Transliterator errors.
`U_PATTERN_SYNTAX_ERROR`	Syntax error in format pattern.
`U_PLUGIN_CHANGED_LEVEL_WARNING`	A plugin caused a level change. May not be an error, but later plugins may not load.
`U_PLUGIN_DIDNT_SET_LEVEL`	The plugin didn't call uplug_setPlugLevel in response to a QUERY.
`U_PLUGIN_ERROR_LIMIT`	One more than the highest normal plug-in error code. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.
`U_PLUGIN_ERROR_START`	Start of codes indicating plugin failures.
`U_PLUGIN_TOO_HIGH`	The plugin's level is too high to be loaded right now.
`U_PRIMARY_TOO_LONG_ERROR`	User tried to set variable top to a primary that is longer than two bytes.
`U_REGEX_BAD_ESCAPE_SEQUENCE`	Unrecognized backslash escape sequence in pattern.
`U_REGEX_BAD_INTERVAL`	Error in {min,max} interval.
`U_REGEX_ERROR_LIMIT`	One more than the highest normal regular expression error code. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.
`U_REGEX_ERROR_START`	Start of codes indicating Regexp failures.
`U_REGEX_INTERNAL_ERROR`	An internal error (bug) was detected.
`U_REGEX_INVALID_BACK_REF`	Back-reference to a non-existent capture group.
`U_REGEX_INVALID_CAPTURE_GROUP_NAME`	Invalid capture group name.
`U_REGEX_INVALID_FLAG`	Invalid value for match mode flags.
`U_REGEX_INVALID_RANGE`	In a character range [x-y], x is greater than y.
`U_REGEX_INVALID_STATE`	RegexMatcher in invalid state for requested operation.
`U_REGEX_LOOK_BEHIND_LIMIT`	Look-Behind pattern matches must have a bounded maximum length.
`U_REGEX_MAX_LT_MIN`	In {min,max}, max is less than min.
`U_REGEX_MISMATCHED_PAREN`	Incorrectly nested parentheses in regexp pattern.
`U_REGEX_MISSING_CLOSE_BRACKET`	Missing closing bracket on a bracket expression.
`U_REGEX_NUMBER_TOO_BIG`	Decimal number is too large.
`U_REGEX_OCTAL_TOO_BIG`	Octal character constants must be <= 0377. Deprecated. ICU 54. This error cannot occur.
`U_REGEX_PATTERN_TOO_BIG`	Pattern exceeds limits on size or complexity.
`U_REGEX_PROPERTY_SYNTAX`	Incorrect Unicode property.
`U_REGEX_RULE_SYNTAX`	Syntax error in regexp pattern.
`U_REGEX_SET_CONTAINS_STRING`	Regexps cannot have UnicodeSets containing strings.
`U_REGEX_STACK_OVERFLOW`	Regular expression backtrack stack overflow.
`U_REGEX_STOPPED_BY_CALLER`	Matching operation aborted by user callback fn.
`U_REGEX_TIME_OUT`	Maximum allowed match time exceeded.
`U_REGEX_UNIMPLEMENTED`	Use of regexp feature that is not yet implemented.
`U_RESOURCE_TYPE_MISMATCH`	an operation is requested over a resource that does not support it
`U_RULE_MASK_ERROR`	A rule is hidden by an earlier more general rule.
`U_SAFECLONE_ALLOCATED_WARNING`	A SafeClone operation required allocating memory (informational only)
`U_SORT_KEY_TOO_SHORT_WARNING`	Number of levels requested in getBound is higher than the number of levels in the sort key.
`U_STANDARD_ERROR_LIMIT`	One more than the highest standard error code. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.
`U_STATE_OLD_WARNING`	ICU has to use compatibility layer to construct the service. Expect performance/memory usage degradation. Consider upgrading
`U_STATE_TOO_OLD_ERROR`	ICU cannot construct a service from this state, as it is no longer supported.
`U_STRINGPREP_CHECK_BIDI_ERROR`
`U_STRINGPREP_PROHIBITED_ERROR`
`U_STRINGPREP_UNASSIGNED_ERROR`
`U_STRING_NOT_TERMINATED_WARNING`	An output string could not be NUL-terminated because output length==destCapacity.
`U_TOO_MANY_ALIASES_ERROR`	There are too many aliases in the path to the requested resource. It is very possible that a circular alias definition has occurred
`U_TRAILING_BACKSLASH`	A dangling backslash.
`U_TRUNCATED_CHAR_FOUND`	Character conversion: Incomplete input sequence.
`U_UNCLOSED_SEGMENT`	A closing ')' is missing.
`U_UNDEFINED_KEYWORD`	Undefined Plural keyword.
`U_UNDEFINED_SEGMENT_REFERENCE`	A segment reference does not correspond to a defined segment.
`U_UNDEFINED_VARIABLE`	A variable reference does not correspond to a defined variable.
`U_UNEXPECTED_TOKEN`	Syntax error in format pattern.
`U_UNMATCHED_BRACES`	Braces do not match in message pattern.
`U_UNQUOTED_SPECIAL`	A special character was not quoted or escaped.
`U_UNSUPPORTED_ATTRIBUTE`	UNUSED as of ICU 2.4.
`U_UNSUPPORTED_ERROR`	Requested operation not supported in current context.
`U_UNSUPPORTED_ESCAPE_SEQUENCE`	ISO-2022 unsupported escape sequence.
`U_UNSUPPORTED_PROPERTY`	UNUSED as of ICU 2.4.
`U_UNTERMINATED_QUOTE`	A closing single quote is missing.
`U_USELESS_COLLATOR_ERROR`	Collator is options only and no base is specified.
`U_USING_DEFAULT_WARNING`	A resource bundle lookup returned a result from the root locale (not an error)
`U_USING_FALLBACK_WARNING`	A resource bundle lookup returned a fallback result (not an error)
`U_VARIABLE_RANGE_EXHAUSTED`	Too many stand-ins generated for the given variable range.
`U_VARIABLE_RANGE_OVERLAP`	The variable range overlaps characters used in rules.
`U_ZERO_ERROR`	No error, no warning.

UGraphemeClusterBreak

 UGraphemeClusterBreak

Grapheme Cluster Break constants.

See also: UCHAR_GRAPHEME_CLUSTER_BREAK

Properties
`U_GCB_CONTROL`
`U_GCB_COUNT`	One more than the highest normal UGraphemeClusterBreak value. The highest value is available via u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK). Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.
`U_GCB_CR`
`U_GCB_EXTEND`
`U_GCB_E_BASE`
`U_GCB_E_BASE_GAZ`
`U_GCB_E_MODIFIER`
`U_GCB_GLUE_AFTER_ZWJ`
`U_GCB_L`
`U_GCB_LF`
`U_GCB_LV`
`U_GCB_LVT`
`U_GCB_OTHER`
`U_GCB_PREPEND`
`U_GCB_REGIONAL_INDICATOR`
`U_GCB_SPACING_MARK`
`U_GCB_T`
`U_GCB_V`
`U_GCB_ZWJ`

UHangulSyllableType

 UHangulSyllableType

Hangul Syllable Type constants.

See also: UCHAR_HANGUL_SYLLABLE_TYPE

Properties
`U_HST_COUNT`	One more than the highest normal UHangulSyllableType value. The highest value is available via u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE). Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.
`U_HST_LEADING_JAMO`
`U_HST_LVT_SYLLABLE`
`U_HST_LV_SYLLABLE`
`U_HST_NOT_APPLICABLE`
`U_HST_TRAILING_JAMO`
`U_HST_VOWEL_JAMO`

UIndicPositionalCategory

 UIndicPositionalCategory

Indic Positional Category constants.

See also: UCHAR_INDIC_POSITIONAL_CATEGORY

Properties
`U_INPC_BOTTOM`
`U_INPC_BOTTOM_AND_LEFT`
`U_INPC_BOTTOM_AND_RIGHT`
`U_INPC_LEFT`
`U_INPC_LEFT_AND_RIGHT`
`U_INPC_NA`
`U_INPC_OVERSTRUCK`
`U_INPC_RIGHT`
`U_INPC_TOP`
`U_INPC_TOP_AND_BOTTOM`
`U_INPC_TOP_AND_BOTTOM_AND_LEFT`
`U_INPC_TOP_AND_BOTTOM_AND_RIGHT`
`U_INPC_TOP_AND_LEFT`
`U_INPC_TOP_AND_LEFT_AND_RIGHT`
`U_INPC_TOP_AND_RIGHT`
`U_INPC_VISUAL_ORDER_LEFT`

UIndicSyllabicCategory

 UIndicSyllabicCategory

Indic Syllabic Category constants.

See also: UCHAR_INDIC_SYLLABIC_CATEGORY

Properties
`U_INSC_AVAGRAHA`
`U_INSC_BINDU`
`U_INSC_BRAHMI_JOINING_NUMBER`
`U_INSC_CANTILLATION_MARK`
`U_INSC_CONSONANT`
`U_INSC_CONSONANT_DEAD`
`U_INSC_CONSONANT_FINAL`
`U_INSC_CONSONANT_HEAD_LETTER`
`U_INSC_CONSONANT_INITIAL_POSTFIXED`
`U_INSC_CONSONANT_KILLER`
`U_INSC_CONSONANT_MEDIAL`
`U_INSC_CONSONANT_PLACEHOLDER`
`U_INSC_CONSONANT_PRECEDING_REPHA`
`U_INSC_CONSONANT_PREFIXED`
`U_INSC_CONSONANT_SUBJOINED`
`U_INSC_CONSONANT_SUCCEEDING_REPHA`
`U_INSC_CONSONANT_WITH_STACKER`
`U_INSC_GEMINATION_MARK`
`U_INSC_INVISIBLE_STACKER`
`U_INSC_JOINER`
`U_INSC_MODIFYING_LETTER`
`U_INSC_NON_JOINER`
`U_INSC_NUKTA`
`U_INSC_NUMBER`
`U_INSC_NUMBER_JOINER`
`U_INSC_OTHER`
`U_INSC_PURE_KILLER`
`U_INSC_REGISTER_SHIFTER`
`U_INSC_SYLLABLE_MODIFIER`
`U_INSC_TONE_LETTER`
`U_INSC_TONE_MARK`
`U_INSC_VIRAMA`
`U_INSC_VISARGA`
`U_INSC_VOWEL`
`U_INSC_VOWEL_DEPENDENT`
`U_INSC_VOWEL_INDEPENDENT`

UJoiningGroup

 UJoiningGroup

Joining Group constants.

See also: UCHAR_JOINING_GROUP

Properties
`U_JG_AFRICAN_FEH`
`U_JG_AFRICAN_NOON`
`U_JG_AFRICAN_QAF`
`U_JG_AIN`
`U_JG_ALAPH`
`U_JG_ALEF`
`U_JG_BEH`
`U_JG_BETH`
`U_JG_BURUSHASKI_YEH_BARREE`
`U_JG_COUNT`	One more than the highest normal UJoiningGroup value. The highest value is available via u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP). Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.
`U_JG_DAL`
`U_JG_DALATH_RISH`
`U_JG_E`
`U_JG_FARSI_YEH`
`U_JG_FE`
`U_JG_FEH`
`U_JG_FINAL_SEMKATH`
`U_JG_GAF`
`U_JG_GAMAL`
`U_JG_HAH`
`U_JG_HAMZA_ON_HEH_GOAL`
`U_JG_HANIFI_ROHINGYA_KINNA_YA`
`U_JG_HANIFI_ROHINGYA_PA`
`U_JG_HE`
`U_JG_HEH`
`U_JG_HEH_GOAL`
`U_JG_HETH`
`U_JG_KAF`
`U_JG_KAPH`
`U_JG_KHAPH`
`U_JG_KNOTTED_HEH`
`U_JG_LAM`
`U_JG_LAMADH`
`U_JG_MALAYALAM_BHA`
`U_JG_MALAYALAM_JA`
`U_JG_MALAYALAM_LLA`
`U_JG_MALAYALAM_LLLA`
`U_JG_MALAYALAM_NGA`
`U_JG_MALAYALAM_NNA`
`U_JG_MALAYALAM_NNNA`
`U_JG_MALAYALAM_NYA`
`U_JG_MALAYALAM_RA`
`U_JG_MALAYALAM_SSA`
`U_JG_MALAYALAM_TTA`
`U_JG_MANICHAEAN_ALEPH`
`U_JG_MANICHAEAN_AYIN`
`U_JG_MANICHAEAN_BETH`
`U_JG_MANICHAEAN_DALETH`
`U_JG_MANICHAEAN_DHAMEDH`
`U_JG_MANICHAEAN_FIVE`
`U_JG_MANICHAEAN_GIMEL`
`U_JG_MANICHAEAN_HETH`
`U_JG_MANICHAEAN_HUNDRED`
`U_JG_MANICHAEAN_KAPH`
`U_JG_MANICHAEAN_LAMEDH`
`U_JG_MANICHAEAN_MEM`
`U_JG_MANICHAEAN_NUN`
`U_JG_MANICHAEAN_ONE`
`U_JG_MANICHAEAN_PE`
`U_JG_MANICHAEAN_QOPH`
`U_JG_MANICHAEAN_RESH`
`U_JG_MANICHAEAN_SADHE`
`U_JG_MANICHAEAN_SAMEKH`
`U_JG_MANICHAEAN_TAW`
`U_JG_MANICHAEAN_TEN`
`U_JG_MANICHAEAN_TETH`
`U_JG_MANICHAEAN_THAMEDH`
`U_JG_MANICHAEAN_TWENTY`
`U_JG_MANICHAEAN_WAW`
`U_JG_MANICHAEAN_YODH`
`U_JG_MANICHAEAN_ZAYIN`
`U_JG_MEEM`
`U_JG_MIM`
`U_JG_NOON`
`U_JG_NO_JOINING_GROUP`
`U_JG_NUN`
`U_JG_NYA`
`U_JG_PE`
`U_JG_QAF`
`U_JG_QAPH`
`U_JG_REH`
`U_JG_REVERSED_PE`
`U_JG_ROHINGYA_YEH`
`U_JG_SAD`
`U_JG_SADHE`
`U_JG_SEEN`
`U_JG_SEMKATH`
`U_JG_SHIN`
`U_JG_STRAIGHT_WAW`
`U_JG_SWASH_KAF`
`U_JG_SYRIAC_WAW`
`U_JG_TAH`
`U_JG_TAW`
`U_JG_TEH_MARBUTA`
`U_JG_TEH_MARBUTA_GOAL`
`U_JG_TETH`
`U_JG_THIN_YEH`
`U_JG_VERTICAL_TAIL`
`U_JG_WAW`
`U_JG_YEH`
`U_JG_YEH_BARREE`
`U_JG_YEH_WITH_TAIL`
`U_JG_YUDH`
`U_JG_YUDH_HE`
`U_JG_ZAIN`
`U_JG_ZHAIN`

UJoiningType

 UJoiningType

Joining Type constants.

See also: UCHAR_JOINING_TYPE

Properties
`U_JT_COUNT`	One more than the highest normal UJoiningType value. The highest value is available via u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE). Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.
`U_JT_DUAL_JOINING`
`U_JT_JOIN_CAUSING`
`U_JT_LEFT_JOINING`
`U_JT_NON_JOINING`
`U_JT_RIGHT_JOINING`
`U_JT_TRANSPARENT`

ULayoutType

 ULayoutType

enums for the return value for the character and line orientation functions.

Properties
`ULOC_LAYOUT_BTT`
`ULOC_LAYOUT_LTR`
`ULOC_LAYOUT_RTL`
`ULOC_LAYOUT_TTB`
`ULOC_LAYOUT_UNKNOWN`

ULineBreak

 ULineBreak

Line Break constants.

See also: UCHAR_LINE_BREAK

Properties
`U_LB_ALPHABETIC`
`U_LB_AMBIGUOUS`
`U_LB_BREAK_AFTER`
`U_LB_BREAK_BEFORE`
`U_LB_BREAK_BOTH`
`U_LB_BREAK_SYMBOLS`
`U_LB_CARRIAGE_RETURN`
`U_LB_CLOSE_PARENTHESIS`
`U_LB_CLOSE_PUNCTUATION`
`U_LB_COMBINING_MARK`
`U_LB_COMPLEX_CONTEXT`
`U_LB_CONDITIONAL_JAPANESE_STARTER`
`U_LB_CONTINGENT_BREAK`
`U_LB_COUNT`	One more than the highest normal ULineBreak value. The highest value is available via u_getIntPropertyMaxValue(UCHAR_LINE_BREAK). Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.
`U_LB_EXCLAMATION`
`U_LB_E_BASE`
`U_LB_E_MODIFIER`
`U_LB_GLUE`
`U_LB_H2`
`U_LB_H3`
`U_LB_HEBREW_LETTER`
`U_LB_HYPHEN`
`U_LB_IDEOGRAPHIC`
`U_LB_INFIX_NUMERIC`
`U_LB_INSEPARABLE`	Renamed from the misspelled "inseperable" in Unicode 4.0.1/ICU 3.0.
`U_LB_INSEPERABLE`
`U_LB_JL`
`U_LB_JT`
`U_LB_JV`
`U_LB_LINE_FEED`
`U_LB_MANDATORY_BREAK`
`U_LB_NEXT_LINE`
`U_LB_NONSTARTER`
`U_LB_NUMERIC`
`U_LB_OPEN_PUNCTUATION`
`U_LB_POSTFIX_NUMERIC`
`U_LB_PREFIX_NUMERIC`
`U_LB_QUOTATION`
`U_LB_REGIONAL_INDICATOR`
`U_LB_SPACE`
`U_LB_SURROGATE`
`U_LB_UNKNOWN`
`U_LB_WORD_JOINER`
`U_LB_ZWJ`
`U_LB_ZWSPACE`

ULineBreakTag

 ULineBreakTag

Enum constants for the line break tags returned by getRuleStatus().

A range of values is defined for each category of word, to allow for further subdivisions of a category in future releases. Applications should check for tag values falling within the range, rather than for single individual values.

The numeric values of all of these constants are stable (will not change).

Properties
`UBRK_LINE_HARD`	Tag value for a hard, or mandatory line break.
`UBRK_LINE_HARD_LIMIT`	Upper bound for hard line breaks.
`UBRK_LINE_SOFT`	Tag value for soft line breaks, positions at which a line break is acceptable but not required.
`UBRK_LINE_SOFT_LIMIT`	Upper bound for soft line breaks.

ULocAvailableType

 ULocAvailableType

Types for uloc_getAvailableByType and uloc_countAvailableByType.

Properties
`ULOC_AVAILABLE_COUNT`
`ULOC_AVAILABLE_DEFAULT`	Locales that return data when passed to ICU APIs, but not including legacy or alias locales.
`ULOC_AVAILABLE_ONLY_LEGACY_ALIASES`	Legacy or alias locales that return data when passed to ICU APIs. Examples of supported legacy or alias locales: iw (alias to he) mo (alias to ro) zh_CN (alias to zh_Hans_CN) sr_BA (alias to sr_Cyrl_BA) ars (alias to ar_SA) The locales in this set are disjoint from the ones in ULOC_AVAILABLE_DEFAULT. To get both sets at the same time, use ULOC_AVAILABLE_WITH_LEGACY_ALIASES.
`ULOC_AVAILABLE_WITH_LEGACY_ALIASES`	The union of the locales in ULOC_AVAILABLE_DEFAULT and ULOC_AVAILABLE_ONLY_LEGACY_ALIAS.

ULocDataLocaleType

 ULocDataLocaleType

Constants for *_getLocale() Allow user to select whether she wants information on requested, valid or actual locale.

For example, a collator for "en_US_CALIFORNIA" was requested. In the current state of ICU (2.0), the requested locale is "en_US_CALIFORNIA", the valid locale is "en_US" (most specific locale supported by ICU) and the actual locale is "root" (the collation data comes unmodified from the UCA) The locale is considered supported by ICU if there is a core ICU bundle for that locale (although it may be empty).

Properties
`ULOC_ACTUAL_LOCALE`	This is locale the data actually comes from.
`ULOC_DATA_LOCALE_TYPE_LIMIT`	One more than the highest normal ULocDataLocaleType value. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.
`ULOC_REQUESTED_LOCALE`	This is the requested locale. Deprecated. ICU 2.8
`ULOC_VALID_LOCALE`	This is the most specific locale supported by ICU.

UNormalization2Mode

 UNormalization2Mode

Constants for normalization modes.

For details about standard Unicode normalization forms and about the algorithms which are also used with custom mapping tables see http://www.unicode.org/unicode/reports/tr15/

Properties
`UNORM2_COMPOSE`	Decomposition followed by composition. Same as standard NFC when using an "nfc" instance. Same as standard NFKC when using an "nfkc" instance. For details about standard Unicode normalization forms see http://www.unicode.org/unicode/reports/tr15/
`UNORM2_COMPOSE_CONTIGUOUS`	Compose only contiguously. Also known as "FCC" or "Fast C Contiguous". The result will often but not always be in NFC. The result will conform to FCD which is useful for processing. Not a standard Unicode normalization form. For details see http://www.unicode.org/notes/tn5/#FCC
`UNORM2_DECOMPOSE`	Map, and reorder canonically. Same as standard NFD when using an "nfc" instance. Same as standard NFKD when using an "nfkc" instance. For details about standard Unicode normalization forms see http://www.unicode.org/unicode/reports/tr15/
`UNORM2_FCD`	"Fast C or D" form. If a string is in this form, then further decomposition without reordering would yield the same form as DECOMPOSE. Text in "Fast C or D" form can be processed efficiently with data tables that are "canonically closed", that is, that provide equivalent data for equivalent text, without having to be fully normalized. Not a standard Unicode normalization form. Not a unique form: Different FCD strings can be canonically equivalent. For details see http://www.unicode.org/notes/tn5/#FCD

UNormalizationCheckResult

 UNormalizationCheckResult

Result values for normalization quick check functions.

For details see http://www.unicode.org/reports/tr15/#Detecting_Normalization_Forms

Properties

Properties
`UNORM_MAYBE`	The input string may or may not be in the normalization form. This value is only returned for composition forms like NFC and FCC, when a backward-combining character is found for which the surrounding text would have to be analyzed further.
`UNORM_NO`	The input string is not in the normalization form.
`UNORM_YES`	The input string is in the normalization form.

UNORM_MAYBE

The input string may or may not be in the normalization form.

This value is only returned for composition forms like NFC and FCC, when a backward-combining character is found for which the surrounding text would have to be analyzed further.

UNORM_NO

The input string is not in the normalization form.

UNORM_YES

The input string is in the normalization form.

UNumericType

 UNumericType

Numeric Type constants.

See also: UCHAR_NUMERIC_TYPE

Properties
`U_NT_COUNT`	One more than the highest normal UNumericType value. The highest value is available via u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE). Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.
`U_NT_DECIMAL`
`U_NT_DIGIT`
`U_NT_NONE`
`U_NT_NUMERIC`

UProperty

 UProperty

Selection constants for Unicode properties.

These constants are used in functions like u_hasBinaryProperty to select one of the Unicode properties.

The properties APIs are intended to reflect Unicode properties as defined in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR).

For details about the properties see UAX #44: Unicode Character Database (http://www.unicode.org/reports/tr44/).

Important: If ICU is built with UCD files from Unicode versions below, e.g., 3.2, then properties marked with "new in Unicode 3.2" are not or not fully available. Check u_getUnicodeVersion to be sure.

See also: u_hasBinaryProperty See also: u_getIntPropertyValue See also: u_getUnicodeVersion

Properties
`UCHAR_AGE`	String property Age. Corresponds to u_charAge.
`UCHAR_ALPHABETIC`	Binary property Alphabetic. Same as u_isUAlphabetic, different from u_isalpha. Lu+Ll+Lt+Lm+Lo+Nl+Other_Alphabetic
`UCHAR_ASCII_HEX_DIGIT`	Binary property ASCII_Hex_Digit. 0-9 A-F a-f
`UCHAR_BASIC_EMOJI`	Binary property of strings Basic_Emoji. See https://www.unicode.org/reports/tr51/#Emoji_Sets
`UCHAR_BIDI_CLASS`	Enumerated property Bidi_Class. Same as u_charDirection, returns UCharDirection values.
`UCHAR_BIDI_CONTROL`	Binary property Bidi_Control. Format controls which have specific functions in the Bidi Algorithm.
`UCHAR_BIDI_MIRRORED`	Binary property Bidi_Mirrored. Characters that may change display in RTL text. Same as u_isMirrored. See Bidi Algorithm, UTR 9.
`UCHAR_BIDI_MIRRORING_GLYPH`	String property Bidi_Mirroring_Glyph. Corresponds to u_charMirror.
`UCHAR_BIDI_PAIRED_BRACKET`	String property Bidi_Paired_Bracket (new in Unicode 6.3). Corresponds to u_getBidiPairedBracket.
`UCHAR_BIDI_PAIRED_BRACKET_TYPE`	Enumerated property Bidi_Paired_Bracket_Type (new in Unicode 6.3). Used in UAX #9: Unicode Bidirectional Algorithm (http://www.unicode.org/reports/tr9/) Returns UBidiPairedBracketType values.
`UCHAR_BINARY_LIMIT`	One more than the last constant for binary Unicode properties. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.
`UCHAR_BINARY_START`	First constant for binary Unicode properties.
`UCHAR_BLOCK`	Enumerated property Block. Same as ublock_getCode, returns UBlockCode values.
`UCHAR_CANONICAL_COMBINING_CLASS`	Enumerated property Canonical_Combining_Class. Same as u_getCombiningClass, returns 8-bit numeric values.
`UCHAR_CASED`	Binary property Cased. For Lowercase, Uppercase and Titlecase characters.
`UCHAR_CASE_FOLDING`	String property Case_Folding. Corresponds to u_strFoldCase in ustring.h.
`UCHAR_CASE_IGNORABLE`	Binary property Case_Ignorable. Used in context-sensitive case mappings.
`UCHAR_CASE_SENSITIVE`	Binary property Case_Sensitive. Either the source of a case mapping or in the target of a case mapping. Not the same as the general category Cased_Letter.
`UCHAR_CHANGES_WHEN_CASEFOLDED`	Binary property Changes_When_Casefolded.
`UCHAR_CHANGES_WHEN_CASEMAPPED`	Binary property Changes_When_Casemapped.
`UCHAR_CHANGES_WHEN_LOWERCASED`	Binary property Changes_When_Lowercased.
`UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED`	Binary property Changes_When_NFKC_Casefolded.
`UCHAR_CHANGES_WHEN_TITLECASED`	Binary property Changes_When_Titlecased.
`UCHAR_CHANGES_WHEN_UPPERCASED`	Binary property Changes_When_Uppercased.
`UCHAR_DASH`	Binary property Dash. Variations of dashes.
`UCHAR_DECOMPOSITION_TYPE`	Enumerated property Decomposition_Type. Returns UDecompositionType values.
`UCHAR_DEFAULT_IGNORABLE_CODE_POINT`	Binary property Default_Ignorable_Code_Point (new in Unicode 3.2). Ignorable in most processing. <2060..206F, FFF0..FFFB, E0000..E0FFF>+Other_Default_Ignorable_Code_Point+(Cf+Cc+Cs-White_Space)
`UCHAR_DEPRECATED`	Binary property Deprecated (new in Unicode 3.2). The usage of deprecated characters is strongly discouraged.
`UCHAR_DIACRITIC`	Binary property Diacritic. Characters that linguistically modify the meaning of another character to which they apply.
`UCHAR_DOUBLE_LIMIT`	One more than the last constant for double Unicode properties. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.
`UCHAR_DOUBLE_START`	First constant for double Unicode properties.
`UCHAR_EAST_ASIAN_WIDTH`	Enumerated property East_Asian_Width. See http://www.unicode.org/reports/tr11/ Returns UEastAsianWidth values.
`UCHAR_EMOJI`	Binary property Emoji. See http://www.unicode.org/reports/tr51/#Emoji_Properties
`UCHAR_EMOJI_COMPONENT`	Binary property Emoji_Component. See http://www.unicode.org/reports/tr51/#Emoji_Properties
`UCHAR_EMOJI_KEYCAP_SEQUENCE`	Binary property of strings Emoji_Keycap_Sequence. See https://www.unicode.org/reports/tr51/#Emoji_Sets
`UCHAR_EMOJI_MODIFIER`	Binary property Emoji_Modifier. See http://www.unicode.org/reports/tr51/#Emoji_Properties
`UCHAR_EMOJI_MODIFIER_BASE`	Binary property Emoji_Modifier_Base. See http://www.unicode.org/reports/tr51/#Emoji_Properties
`UCHAR_EMOJI_PRESENTATION`	Binary property Emoji_Presentation. See http://www.unicode.org/reports/tr51/#Emoji_Properties
`UCHAR_EXTENDED_PICTOGRAPHIC`	Binary property Extended_Pictographic. See http://www.unicode.org/reports/tr51/#Emoji_Properties
`UCHAR_EXTENDER`	Binary property Extender. Extend the value or shape of a preceding alphabetic character, e.g., length and iteration marks.
`UCHAR_FULL_COMPOSITION_EXCLUSION`	Binary property Full_Composition_Exclusion. CompositionExclusions.txt+Singleton Decompositions+ Non-Starter Decompositions.
`UCHAR_GENERAL_CATEGORY`	Enumerated property General_Category. Same as u_charType, returns UCharCategory values.
`UCHAR_GENERAL_CATEGORY_MASK`	Bitmask property General_Category_Mask. This is the General_Category property returned as a bit mask. When used in u_getIntPropertyValue(c), same as U_MASK(u_charType(c)), returns bit masks for UCharCategory values where exactly one bit is set. When used with u_getPropertyValueName() and u_getPropertyValueEnum(), a multi-bit mask is used for sets of categories like "Letters". Mask values should be cast to uint32_t.
`UCHAR_GRAPHEME_BASE`	Binary property Grapheme_Base (new in Unicode 3.2). For programmatic determination of grapheme cluster boundaries. [0..10FFFF]-Cc-Cf-Cs-Co-Cn-Zl-Zp-Grapheme_Link-Grapheme_Extend-CGJ
`UCHAR_GRAPHEME_CLUSTER_BREAK`	Enumerated property Grapheme_Cluster_Break (new in Unicode 4.1). Used in UAX #29: Text Boundaries (http://www.unicode.org/reports/tr29/) Returns UGraphemeClusterBreak values.
`UCHAR_GRAPHEME_EXTEND`	Binary property Grapheme_Extend (new in Unicode 3.2). For programmatic determination of grapheme cluster boundaries. Me+Mn+Mc+Other_Grapheme_Extend-Grapheme_Link-CGJ
`UCHAR_GRAPHEME_LINK`	Binary property Grapheme_Link (new in Unicode 3.2). For programmatic determination of grapheme cluster boundaries.
`UCHAR_HANGUL_SYLLABLE_TYPE`	Enumerated property Hangul_Syllable_Type, new in Unicode 4. Returns UHangulSyllableType values.
`UCHAR_HEX_DIGIT`	Binary property Hex_Digit. Characters commonly used for hexadecimal numbers.
`UCHAR_HYPHEN`	Binary property Hyphen. Dashes used to mark connections between pieces of words, plus the Katakana middle dot.
`UCHAR_IDEOGRAPHIC`	Binary property Ideographic. CJKV ideographs.
`UCHAR_IDS_BINARY_OPERATOR`	Binary property IDS_Binary_Operator (new in Unicode 3.2). For programmatic determination of Ideographic Description Sequences.
`UCHAR_IDS_TRINARY_OPERATOR`	Binary property IDS_Trinary_Operator (new in Unicode 3.2). For programmatic determination of Ideographic Description Sequences.
`UCHAR_ID_CONTINUE`	Binary property ID_Continue. Characters that can continue an identifier. DerivedCoreProperties.txt also says "NOTE: Cf characters should be filtered out." ID_Start+Mn+Mc+Nd+Pc
`UCHAR_ID_START`	Binary property ID_Start. Characters that can start an identifier. Lu+Ll+Lt+Lm+Lo+Nl
`UCHAR_INDIC_POSITIONAL_CATEGORY`	Enumerated property Indic_Positional_Category. New in Unicode 6.0 as provisional property Indic_Matra_Category; renamed and changed to informative in Unicode 8.0. See http://www.unicode.org/reports/tr44/#IndicPositionalCategory.txt
`UCHAR_INDIC_SYLLABIC_CATEGORY`	Enumerated property Indic_Syllabic_Category. New in Unicode 6.0 as provisional; informative since Unicode 8.0. See http://www.unicode.org/reports/tr44/#IndicSyllabicCategory.txt
`UCHAR_INT_LIMIT`	One more than the last constant for enumerated/integer Unicode properties. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.
`UCHAR_INT_START`	First constant for enumerated/integer Unicode properties.
`UCHAR_INVALID_CODE`	Represents a nonexistent or invalid property or property value.
`UCHAR_ISO_COMMENT`	Deprecated string property ISO_Comment. Corresponds to u_getISOComment. Deprecated. ICU 49
`UCHAR_JOINING_GROUP`	Enumerated property Joining_Group. Returns UJoiningGroup values.
`UCHAR_JOINING_TYPE`	Enumerated property Joining_Type. Returns UJoiningType values.
`UCHAR_JOIN_CONTROL`	Binary property Join_Control. Format controls for cursive joining and ligation.
`UCHAR_LEAD_CANONICAL_COMBINING_CLASS`	Enumerated property Lead_Canonical_Combining_Class. ICU-specific property for the ccc of the first code point of the decomposition, or lccc(c)=ccc(NFD(c)[0]). Useful for checking for canonically ordered text; see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD . Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS.
`UCHAR_LINE_BREAK`	Enumerated property Line_Break. Returns ULineBreak values.
`UCHAR_LOGICAL_ORDER_EXCEPTION`	Binary property Logical_Order_Exception (new in Unicode 3.2). Characters that do not use logical order and require special handling in most processing.
`UCHAR_LOWERCASE`	Binary property Lowercase. Same as u_isULowercase, different from u_islower. Ll+Other_Lowercase
`UCHAR_LOWERCASE_MAPPING`	String property Lowercase_Mapping. Corresponds to u_strToLower in ustring.h.
`UCHAR_MASK_LIMIT`	One more than the last constant for bit-mask Unicode properties. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.
`UCHAR_MASK_START`	First constant for bit-mask Unicode properties.
`UCHAR_MATH`	Binary property Math. Sm+Other_Math
`UCHAR_NAME`	String property Name. Corresponds to u_charName.
`UCHAR_NFC_INERT`	Binary property NFC_Inert. ICU-specific property for characters that are inert under NFC, i.e., they do not interact with adjacent characters. See the documentation for the Normalizer2 class and the Normalizer2::isInert() method.
`UCHAR_NFC_QUICK_CHECK`	Enumerated property NFC_Quick_Check. Returns UNormalizationCheckResult values.
`UCHAR_NFD_INERT`	Binary property NFD_Inert. ICU-specific property for characters that are inert under NFD, i.e., they do not interact with adjacent characters. See the documentation for the Normalizer2 class and the Normalizer2::isInert() method.
`UCHAR_NFD_QUICK_CHECK`	Enumerated property NFD_Quick_Check. Returns UNormalizationCheckResult values.
`UCHAR_NFKC_INERT`	Binary property NFKC_Inert. ICU-specific property for characters that are inert under NFKC, i.e., they do not interact with adjacent characters. See the documentation for the Normalizer2 class and the Normalizer2::isInert() method.
`UCHAR_NFKC_QUICK_CHECK`	Enumerated property NFKC_Quick_Check. Returns UNormalizationCheckResult values.
`UCHAR_NFKD_INERT`	Binary property NFKD_Inert. ICU-specific property for characters that are inert under NFKD, i.e., they do not interact with adjacent characters. See the documentation for the Normalizer2 class and the Normalizer2::isInert() method.
`UCHAR_NFKD_QUICK_CHECK`	Enumerated property NFKD_Quick_Check. Returns UNormalizationCheckResult values.
`UCHAR_NONCHARACTER_CODE_POINT`	Binary property Noncharacter_Code_Point. Code points that are explicitly defined as illegal for the encoding of characters.
`UCHAR_NUMERIC_TYPE`	Enumerated property Numeric_Type. Returns UNumericType values.
`UCHAR_NUMERIC_VALUE`	Double property Numeric_Value. Corresponds to u_getNumericValue.
`UCHAR_OTHER_PROPERTY_LIMIT`	One more than the last constant for Unicode properties with unusual value types. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.
`UCHAR_OTHER_PROPERTY_START`	First constant for Unicode properties with unusual value types.
`UCHAR_PATTERN_SYNTAX`	Binary property Pattern_Syntax (new in Unicode 4.1). See UAX #31 Identifier and Pattern Syntax (http://www.unicode.org/reports/tr31/)
`UCHAR_PATTERN_WHITE_SPACE`	Binary property Pattern_White_Space (new in Unicode 4.1). See UAX #31 Identifier and Pattern Syntax (http://www.unicode.org/reports/tr31/)
`UCHAR_POSIX_ALNUM`	Binary property alnum (a C/POSIX character class). Implemented according to the UTS #18 Annex C Standard Recommendation. See the uchar.h file documentation.
`UCHAR_POSIX_BLANK`	Binary property blank (a C/POSIX character class). Implemented according to the UTS #18 Annex C Standard Recommendation. See the uchar.h file documentation.
`UCHAR_POSIX_GRAPH`	Binary property graph (a C/POSIX character class). Implemented according to the UTS #18 Annex C Standard Recommendation. See the uchar.h file documentation.
`UCHAR_POSIX_PRINT`	Binary property print (a C/POSIX character class). Implemented according to the UTS #18 Annex C Standard Recommendation. See the uchar.h file documentation.
`UCHAR_POSIX_XDIGIT`	Binary property xdigit (a C/POSIX character class). Implemented according to the UTS #18 Annex C Standard Recommendation. See the uchar.h file documentation.
`UCHAR_PREPENDED_CONCATENATION_MARK`	Binary property Prepended_Concatenation_Mark.
`UCHAR_QUOTATION_MARK`	Binary property Quotation_Mark.
`UCHAR_RADICAL`	Binary property Radical (new in Unicode 3.2). For programmatic determination of Ideographic Description Sequences.
`UCHAR_REGIONAL_INDICATOR`	Binary property Regional_Indicator.
`UCHAR_RGI_EMOJI`	Binary property of strings RGI_Emoji. See https://www.unicode.org/reports/tr51/#Emoji_Sets
`UCHAR_RGI_EMOJI_FLAG_SEQUENCE`	Binary property of strings RGI_Emoji_Flag_Sequence. See https://www.unicode.org/reports/tr51/#Emoji_Sets
`UCHAR_RGI_EMOJI_MODIFIER_SEQUENCE`	Binary property of strings RGI_Emoji_Modifier_Sequence. See https://www.unicode.org/reports/tr51/#Emoji_Sets
`UCHAR_RGI_EMOJI_TAG_SEQUENCE`	Binary property of strings RGI_Emoji_Tag_Sequence. See https://www.unicode.org/reports/tr51/#Emoji_Sets
`UCHAR_RGI_EMOJI_ZWJ_SEQUENCE`	Binary property of strings RGI_Emoji_ZWJ_Sequence. See https://www.unicode.org/reports/tr51/#Emoji_Sets
`UCHAR_SCRIPT`	Enumerated property Script. Same as uscript_getScript, returns UScriptCode values.
`UCHAR_SCRIPT_EXTENSIONS`	Miscellaneous property Script_Extensions (new in Unicode 6.0). Some characters are commonly used in multiple scripts. For more information, see UAX #24: http://www.unicode.org/reports/tr24/. Corresponds to uscript_hasScript and uscript_getScriptExtensions in uscript.h.
`UCHAR_SEGMENT_STARTER`	Binary Property Segment_Starter. ICU-specific property for characters that are starters in terms of Unicode normalization and combining character sequences. They have ccc=0 and do not occur in non-initial position of the canonical decomposition of any character (like a-umlaut in NFD and a Jamo T in an NFD(Hangul LVT)). ICU uses this property for segmenting a string for generating a set of canonically equivalent strings, e.g. for canonical closure while processing collation tailoring rules.
`UCHAR_SENTENCE_BREAK`	Enumerated property Sentence_Break (new in Unicode 4.1). Used in UAX #29: Text Boundaries (http://www.unicode.org/reports/tr29/) Returns USentenceBreak values.
`UCHAR_SIMPLE_CASE_FOLDING`	String property Simple_Case_Folding. Corresponds to u_foldCase.
`UCHAR_SIMPLE_LOWERCASE_MAPPING`	String property Simple_Lowercase_Mapping. Corresponds to u_tolower.
`UCHAR_SIMPLE_TITLECASE_MAPPING`	String property Simple_Titlecase_Mapping. Corresponds to u_totitle.
`UCHAR_SIMPLE_UPPERCASE_MAPPING`	String property Simple_Uppercase_Mapping. Corresponds to u_toupper.
`UCHAR_SOFT_DOTTED`	Binary property Soft_Dotted (new in Unicode 3.2). Characters with a "soft dot", like i or j. An accent placed on these characters causes the dot to disappear.
`UCHAR_STRING_LIMIT`	One more than the last constant for string Unicode properties. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.
`UCHAR_STRING_START`	First constant for string Unicode properties.
`UCHAR_S_TERM`	Binary property STerm (new in Unicode 4.0.1). Sentence Terminal. Used in UAX #29: Text Boundaries (http://www.unicode.org/reports/tr29/)
`UCHAR_TERMINAL_PUNCTUATION`	Binary property Terminal_Punctuation. Punctuation characters that generally mark the end of textual units.
`UCHAR_TITLECASE_MAPPING`	String property Titlecase_Mapping. Corresponds to u_strToTitle in ustring.h.
`UCHAR_TRAIL_CANONICAL_COMBINING_CLASS`	Enumerated property Trail_Canonical_Combining_Class. ICU-specific property for the ccc of the last code point of the decomposition, or tccc(c)=ccc(NFD(c)[last]). Useful for checking for canonically ordered text; see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD . Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS.
`UCHAR_UNICODE_1_NAME`	String property Unicode_1_Name. This property is of little practical value. Beginning with ICU 49, ICU APIs return an empty string for this property. Corresponds to u_charName(U_UNICODE_10_CHAR_NAME). Deprecated. ICU 49
`UCHAR_UNIFIED_IDEOGRAPH`	Binary property Unified_Ideograph (new in Unicode 3.2). For programmatic determination of Ideographic Description Sequences.
`UCHAR_UPPERCASE`	Binary property Uppercase. Same as u_isUUppercase, different from u_isupper. Lu+Other_Uppercase
`UCHAR_UPPERCASE_MAPPING`	String property Uppercase_Mapping. Corresponds to u_strToUpper in ustring.h.
`UCHAR_VARIATION_SELECTOR`	Binary property Variation_Selector (new in Unicode 4.0.1). Indicates all those characters that qualify as Variation Selectors. For details on the behavior of these characters, see StandardizedVariants.html and 15.6 Variation Selectors.
`UCHAR_VERTICAL_ORIENTATION`	Enumerated property Vertical_Orientation. Used for UAX #50 Unicode Vertical Text Layout (https://www.unicode.org/reports/tr50/). New as a UCD property in Unicode 10.0.
`UCHAR_WHITE_SPACE`	Binary property White_Space. Same as u_isUWhiteSpace, different from u_isspace and u_isWhitespace. Space characters+TAB+CR+LF-ZWSP-ZWNBSP
`UCHAR_WORD_BREAK`	Enumerated property Word_Break (new in Unicode 4.1). Used in UAX #29: Text Boundaries (http://www.unicode.org/reports/tr29/) Returns UWordBreakValues values.
`UCHAR_XID_CONTINUE`	Binary property XID_Continue. ID_Continue modified to allow closure under normalization forms NFKC and NFKD.
`UCHAR_XID_START`	Binary property XID_Start. ID_Start modified to allow closure under normalization forms NFKC and NFKD.

UPropertyNameChoice

 UPropertyNameChoice

Selector constants for u_getPropertyName() and u_getPropertyValueName().

These selectors are used to choose which name is returned for a given property or value. All properties and values have a long name. Most have a short name, but some do not. Unicode allows for additional names, beyond the long and short name, which would be indicated by U_LONG_PROPERTY_NAME + i, where i=1, 2,...

See also: u_getPropertyName() See also: u_getPropertyValueName()

Properties

U_LONG_PROPERTY_NAME

Properties
`U_LONG_PROPERTY_NAME`
`U_PROPERTY_NAME_CHOICE_COUNT`	One more than the highest normal UPropertyNameChoice value. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.
`U_SHORT_PROPERTY_NAME`

U_PROPERTY_NAME_CHOICE_COUNT

One more than the highest normal UPropertyNameChoice value.

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

U_SHORT_PROPERTY_NAME

UScriptCode

 UScriptCode

Constants for ISO 15924 script codes.

The current set of script code constants supports at least all scripts that are encoded in the version of Unicode which ICU currently supports. The names of the constants are usually derived from the Unicode script property value aliases. See UAX #24 Unicode Script Property (http://www.unicode.org/reports/tr24/) and http://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt .

In addition, constants for many ISO 15924 script codes are included, for use with language tags, CLDR data, and similar. Some of those codes are not used in the Unicode Character Database (UCD). For example, there are no characters that have a UCD script property value of Hans or Hant. All Han ideographs have the Hani script property value in Unicode.

Private-use codes Qaaa..Qabx are not included, except as used in the UCD or in CLDR.

Starting with ICU 55, script codes are only added when their scripts have been or will certainly be encoded in Unicode, and have been assigned Unicode script property value aliases, to ensure that their script names are stable and match the names of the constants. Script codes like Latf and Aran that are not subject to separate encoding may be added at any time.

Properties
`USCRIPT_ADLAM`
`USCRIPT_AFAKA`
`USCRIPT_AHOM`
`USCRIPT_ANATOLIAN_HIEROGLYPHS`
`USCRIPT_ARABIC`
`USCRIPT_ARMENIAN`
`USCRIPT_AVESTAN`
`USCRIPT_BALINESE`
`USCRIPT_BAMUM`
`USCRIPT_BASSA_VAH`
`USCRIPT_BATAK`
`USCRIPT_BENGALI`
`USCRIPT_BHAIKSUKI`
`USCRIPT_BLISSYMBOLS`
`USCRIPT_BOOK_PAHLAVI`
`USCRIPT_BOPOMOFO`
`USCRIPT_BRAHMI`
`USCRIPT_BRAILLE`
`USCRIPT_BUGINESE`
`USCRIPT_BUHID`
`USCRIPT_CANADIAN_ABORIGINAL`	Canadian_Aboriginal script.
`USCRIPT_CARIAN`
`USCRIPT_CAUCASIAN_ALBANIAN`
`USCRIPT_CHAKMA`
`USCRIPT_CHAM`
`USCRIPT_CHEROKEE`
`USCRIPT_CHORASMIAN`
`USCRIPT_CIRTH`
`USCRIPT_CODE_LIMIT`	One more than the highest normal UScriptCode value. The highest value is available via u_getIntPropertyMaxValue(UCHAR_SCRIPT). Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.
`USCRIPT_COMMON`
`USCRIPT_COPTIC`
`USCRIPT_CUNEIFORM`
`USCRIPT_CYPRIOT`
`USCRIPT_CYPRO_MINOAN`
`USCRIPT_CYRILLIC`
`USCRIPT_DEMOTIC_EGYPTIAN`
`USCRIPT_DESERET`
`USCRIPT_DEVANAGARI`
`USCRIPT_DIVES_AKURU`
`USCRIPT_DOGRA`
`USCRIPT_DUPLOYAN`
`USCRIPT_DUPLOYAN_SHORTAND`	Deprecated. ICU 54 Typo, use USCRIPT_DUPLOYAN
`USCRIPT_EASTERN_SYRIAC`
`USCRIPT_EGYPTIAN_HIEROGLYPHS`
`USCRIPT_ELBASAN`
`USCRIPT_ELYMAIC`
`USCRIPT_ESTRANGELO_SYRIAC`
`USCRIPT_ETHIOPIC`
`USCRIPT_GEORGIAN`
`USCRIPT_GLAGOLITIC`
`USCRIPT_GOTHIC`
`USCRIPT_GRANTHA`
`USCRIPT_GREEK`
`USCRIPT_GUJARATI`
`USCRIPT_GUNJALA_GONDI`
`USCRIPT_GURMUKHI`
`USCRIPT_HAN`
`USCRIPT_HANGUL`
`USCRIPT_HANIFI_ROHINGYA`
`USCRIPT_HANUNOO`
`USCRIPT_HAN_WITH_BOPOMOFO`
`USCRIPT_HARAPPAN_INDUS`
`USCRIPT_HATRAN`
`USCRIPT_HEBREW`
`USCRIPT_HIERATIC_EGYPTIAN`
`USCRIPT_HIRAGANA`
`USCRIPT_IMPERIAL_ARAMAIC`
`USCRIPT_INHERITED`
`USCRIPT_INSCRIPTIONAL_PAHLAVI`
`USCRIPT_INSCRIPTIONAL_PARTHIAN`
`USCRIPT_INVALID_CODE`
`USCRIPT_JAMO`
`USCRIPT_JAPANESE`
`USCRIPT_JAVANESE`
`USCRIPT_JURCHEN`
`USCRIPT_KAITHI`
`USCRIPT_KANNADA`
`USCRIPT_KATAKANA`
`USCRIPT_KATAKANA_OR_HIRAGANA`	New script code in Unicode 4.0.1.
`USCRIPT_KAWI`
`USCRIPT_KAYAH_LI`
`USCRIPT_KHAROSHTHI`
`USCRIPT_KHITAN_SMALL_SCRIPT`
`USCRIPT_KHMER`
`USCRIPT_KHOJKI`
`USCRIPT_KHUDAWADI`
`USCRIPT_KHUTSURI`
`USCRIPT_KOREAN`
`USCRIPT_KPELLE`
`USCRIPT_LANNA`
`USCRIPT_LAO`
`USCRIPT_LATIN`
`USCRIPT_LATIN_FRAKTUR`
`USCRIPT_LATIN_GAELIC`
`USCRIPT_LEPCHA`
`USCRIPT_LIMBU`
`USCRIPT_LINEAR_A`
`USCRIPT_LINEAR_B`
`USCRIPT_LISU`
`USCRIPT_LOMA`
`USCRIPT_LYCIAN`
`USCRIPT_LYDIAN`
`USCRIPT_MAHAJANI`
`USCRIPT_MAKASAR`
`USCRIPT_MALAYALAM`
`USCRIPT_MANDAEAN`
`USCRIPT_MANDAIC`
`USCRIPT_MANICHAEAN`
`USCRIPT_MARCHEN`
`USCRIPT_MASARAM_GONDI`
`USCRIPT_MATHEMATICAL_NOTATION`
`USCRIPT_MAYAN_HIEROGLYPHS`
`USCRIPT_MEDEFAIDRIN`
`USCRIPT_MEITEI_MAYEK`
`USCRIPT_MENDE`	Mende Kikakui.
`USCRIPT_MEROITIC`
`USCRIPT_MEROITIC_CURSIVE`
`USCRIPT_MEROITIC_HIEROGLYPHS`
`USCRIPT_MIAO`
`USCRIPT_MODI`
`USCRIPT_MONGOLIAN`
`USCRIPT_MOON`
`USCRIPT_MRO`
`USCRIPT_MULTANI`
`USCRIPT_MYANMAR`
`USCRIPT_NABATAEAN`
`USCRIPT_NAG_MUNDARI`
`USCRIPT_NAKHI_GEBA`
`USCRIPT_NANDINAGARI`
`USCRIPT_NEWA`
`USCRIPT_NEW_TAI_LUE`
`USCRIPT_NKO`
`USCRIPT_NUSHU`
`USCRIPT_NYIAKENG_PUACHUE_HMONG`
`USCRIPT_OGHAM`
`USCRIPT_OLD_CHURCH_SLAVONIC_CYRILLIC`
`USCRIPT_OLD_HUNGARIAN`
`USCRIPT_OLD_ITALIC`
`USCRIPT_OLD_NORTH_ARABIAN`
`USCRIPT_OLD_PERMIC`
`USCRIPT_OLD_PERSIAN`
`USCRIPT_OLD_SOGDIAN`
`USCRIPT_OLD_SOUTH_ARABIAN`
`USCRIPT_OLD_UYGHUR`
`USCRIPT_OL_CHIKI`
`USCRIPT_ORIYA`
`USCRIPT_ORKHON`
`USCRIPT_OSAGE`
`USCRIPT_OSMANYA`
`USCRIPT_PAHAWH_HMONG`
`USCRIPT_PALMYRENE`
`USCRIPT_PAU_CIN_HAU`
`USCRIPT_PHAGS_PA`
`USCRIPT_PHOENICIAN`
`USCRIPT_PHONETIC_POLLARD`
`USCRIPT_PSALTER_PAHLAVI`
`USCRIPT_REJANG`
`USCRIPT_RONGORONGO`
`USCRIPT_RUNIC`
`USCRIPT_SAMARITAN`
`USCRIPT_SARATI`
`USCRIPT_SAURASHTRA`
`USCRIPT_SHARADA`
`USCRIPT_SHAVIAN`
`USCRIPT_SIDDHAM`
`USCRIPT_SIGN_WRITING`	Sutton SignWriting.
`USCRIPT_SIMPLIFIED_HAN`
`USCRIPT_SINDHI`
`USCRIPT_SINHALA`
`USCRIPT_SOGDIAN`
`USCRIPT_SORA_SOMPENG`
`USCRIPT_SOYOMBO`
`USCRIPT_SUNDANESE`
`USCRIPT_SYLOTI_NAGRI`
`USCRIPT_SYMBOLS`
`USCRIPT_SYMBOLS_EMOJI`
`USCRIPT_SYRIAC`
`USCRIPT_TAGALOG`
`USCRIPT_TAGBANWA`
`USCRIPT_TAI_LE`
`USCRIPT_TAI_VIET`
`USCRIPT_TAKRI`
`USCRIPT_TAMIL`
`USCRIPT_TANGSA`
`USCRIPT_TANGUT`
`USCRIPT_TELUGU`
`USCRIPT_TENGWAR`
`USCRIPT_THAANA`
`USCRIPT_THAI`
`USCRIPT_TIBETAN`
`USCRIPT_TIFINAGH`
`USCRIPT_TIRHUTA`
`USCRIPT_TOTO`
`USCRIPT_TRADITIONAL_HAN`
`USCRIPT_UCAS`	Canadian_Aboriginal script (alias).
`USCRIPT_UGARITIC`
`USCRIPT_UNKNOWN`
`USCRIPT_UNWRITTEN_LANGUAGES`
`USCRIPT_VAI`
`USCRIPT_VISIBLE_SPEECH`
`USCRIPT_VITHKUQI`
`USCRIPT_WANCHO`
`USCRIPT_WARANG_CITI`
`USCRIPT_WESTERN_SYRIAC`
`USCRIPT_WOLEAI`
`USCRIPT_YEZIDI`
`USCRIPT_YI`
`USCRIPT_ZANABAZAR_SQUARE`

UScriptUsage

 UScriptUsage

Script usage constants.

See UAX #31 Unicode Identifier and Pattern Syntax. http://www.unicode.org/reports/tr31/#Table_Candidate_Characters_for_Exclusion_from_Identifiers

Properties
`USCRIPT_USAGE_ASPIRATIONAL`	Aspirational Use script.
`USCRIPT_USAGE_EXCLUDED`	Candidate for Exclusion from Identifiers.
`USCRIPT_USAGE_LIMITED_USE`	Limited Use script.
`USCRIPT_USAGE_NOT_ENCODED`	Not encoded in Unicode.
`USCRIPT_USAGE_RECOMMENDED`	Recommended script.
`USCRIPT_USAGE_UNKNOWN`	Unknown script usage.

USentenceBreak

 USentenceBreak

Sentence Break constants.

See also: UCHAR_SENTENCE_BREAK

Properties
`U_SB_ATERM`
`U_SB_CLOSE`
`U_SB_COUNT`	One more than the highest normal USentenceBreak value. The highest value is available via u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK). Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.
`U_SB_CR`
`U_SB_EXTEND`
`U_SB_FORMAT`
`U_SB_LF`
`U_SB_LOWER`
`U_SB_NUMERIC`
`U_SB_OLETTER`
`U_SB_OTHER`
`U_SB_SCONTINUE`
`U_SB_SEP`
`U_SB_SP`
`U_SB_STERM`
`U_SB_UPPER`

USentenceBreakTag

 USentenceBreakTag

Enum constants for the sentence break tags returned by getRuleStatus().

A range of values is defined for each category of sentence, to allow for further subdivisions of a category in future releases. Applications should check for tag values falling within the range, rather than for single individual values.

The numeric values of all of these constants are stable (will not change).

Properties
`UBRK_SENTENCE_SEP`	Tag value for for sentences that do not contain an ending sentence terminator ('. ', '?', '!', etc.) character, but are ended only by a hard separator (CR, LF, PS, etc.) or end of input.
`UBRK_SENTENCE_SEP_LIMIT`	Upper bound for tags for sentences ended by a separator.
`UBRK_SENTENCE_TERM`	Tag value for for sentences ending with a sentence terminator ('. ', '?', '!', etc.) character, possibly followed by a hard separator (CR, LF, PS, etc.)
`UBRK_SENTENCE_TERM_LIMIT`	Upper bound for tags for sentences ended by sentence terminators.

UTransDirection

 UTransDirection

Direction constant indicating the direction in a transliterator, e.g., the forward or reverse rules of a RuleBasedTransliterator.

Specified when a transliterator is opened. An "A-B" transliterator transliterates A to B when operating in the forward direction, and B to A when operating in the reverse direction.

Properties

Properties
`UTRANS_FORWARD`	UTRANS_FORWARD means from to for a transliterator with ID -. For a transliterator opened using a rule, it means forward direction rules, e.g., "A > B".
`UTRANS_REVERSE`	UTRANS_REVERSE means from to for a transliterator with ID -. For a transliterator opened using a rule, it means reverse direction rules, e.g., "A < B".

UTRANS_FORWARD

UTRANS_FORWARD means from to for a transliterator with ID -.

For a transliterator opened using a rule, it means forward direction rules, e.g., "A > B".

UTRANS_REVERSE

UTRANS_REVERSE means from to for a transliterator with ID -.

For a transliterator opened using a rule, it means reverse direction rules, e.g., "A < B".

UVerticalOrientation

 UVerticalOrientation

Vertical Orientation constants.

See also: UCHAR_VERTICAL_ORIENTATION

Properties
`U_VO_ROTATED`
`U_VO_TRANSFORMED_ROTATED`
`U_VO_TRANSFORMED_UPRIGHT`
`U_VO_UPRIGHT`

UWordBreak

 UWordBreak

Enum constants for the word break tags returned by getRuleStatus().

The numeric values of all of these constants are stable (will not change).

Properties
`UBRK_WORD_IDEO`	Tag value for words containing ideographic characters, lower limit.
`UBRK_WORD_IDEO_LIMIT`	Tag value for words containing ideographic characters, upper limit.
`UBRK_WORD_KANA`	Tag value for words containing kana characters, lower limit.
`UBRK_WORD_KANA_LIMIT`	Tag value for words containing kana characters, upper limit.
`UBRK_WORD_LETTER`	Tag value for words that contain letters, excluding hiragana, katakana or ideographic characters, lower limit.
`UBRK_WORD_LETTER_LIMIT`	Tag value for words containing letters, upper limit.
`UBRK_WORD_NONE`	Tag value for "words" that do not fit into any of other categories. Includes spaces and most punctuation.
`UBRK_WORD_NONE_LIMIT`	Upper bound for tags for uncategorized words.
`UBRK_WORD_NUMBER`	Tag value for words that appear to be numbers, lower limit.
`UBRK_WORD_NUMBER_LIMIT`	Tag value for words that appear to be numbers, upper limit.

UWordBreakValues

 UWordBreakValues

Word Break constants.

(UWordBreak is a pre-existing enum type in ubrk.h for word break status tags.)

See also: UCHAR_WORD_BREAK

Properties
`U_WB_ALETTER`
`U_WB_COUNT`	One more than the highest normal UWordBreakValues value. The highest value is available via u_getIntPropertyMaxValue(UCHAR_WORD_BREAK). Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.
`U_WB_CR`
`U_WB_DOUBLE_QUOTE`
`U_WB_EXTEND`
`U_WB_EXTENDNUMLET`
`U_WB_E_BASE`
`U_WB_E_BASE_GAZ`
`U_WB_E_MODIFIER`
`U_WB_FORMAT`
`U_WB_GLUE_AFTER_ZWJ`
`U_WB_HEBREW_LETTER`
`U_WB_KATAKANA`
`U_WB_LF`
`U_WB_MIDLETTER`
`U_WB_MIDNUM`
`U_WB_MIDNUMLET`
`U_WB_NEWLINE`
`U_WB_NUMERIC`
`U_WB_OTHER`
`U_WB_REGIONAL_INDICATOR`
`U_WB_SINGLE_QUOTE`
`U_WB_WSEGSPACE`
`U_WB_ZWJ`

Typedefs

OldUChar

uint16_t OldUChar

Default ICU 58 definition of UChar.

A base type for UTF-16 code units and pointers. Unsigned 16-bit integer.

Define OldUChar to be wchar_t if that is 16 bits wide. If wchar_t is not 16 bits wide, then define UChar to be uint16_t.

This makes the definition of OldUChar platform-dependent but allows direct string type compatibility with platforms with 16-bit wchar_t types.

This is how UChar was defined in ICU 58, for transition convenience. Exception: ICU 58 UChar was defined to UCHAR_TYPE if that macro was defined. The current UChar responds to UCHAR_TYPE but OldUChar does not.

UBidiPairedBracketType

enum UBidiPairedBracketType UBidiPairedBracketType

Bidi Paired Bracket Type constants.

See also: UCHAR_BIDI_PAIRED_BRACKET_TYPE

UBlockCode

enum UBlockCode UBlockCode

UBool

int8_t UBool

The ICU boolean type, a signed-byte integer.

ICU-specific for historical reasons: The C and C++ standards used to not define type bool. Also provides a fixed type definition, as opposed to type bool whose details (e.g., sizeof) may vary by compiler and between C and C++.

UBreakIteratorType

enum UBreakIteratorType UBreakIteratorType

The possible types of text boundaries.

UCPMap

struct UCPMap UCPMap

Abstract map from Unicode code points (U+0000..U+10FFFF) to integer values.

See also:UCPTrie See also:UMutableCPTrie

UCPMapValueFilter

uint32_t U_CALLCONV UCPMapValueFilter(const void *context, uint32_t value)

Callback function type: Modifies a map value.

Optionally called by ucpmap_getRange()/ucptrie_getRange()/umutablecptrie_getRange(). The modified value will be returned by the getRange function.

Can be used to ignore some of the value bits, make a filter for one of several values, return a value index computed from the map value, etc.

Details

Parameters

`context`	an opaque pointer, as passed into the getRange function
`value`	a value from the map

Returns

the modified value

UChar

uint16_t UChar

The base type for UTF-16 code units and pointers.

Unsigned 16-bit integer. Starting with ICU 59, C++ API uses char16_t directly, while C API continues to use UChar.

UChar is configurable by defining the macro UCHAR_TYPE on the preprocessor or compiler command line: -DUCHAR_TYPE=uint16_t or -DUCHAR_TYPE=wchar_t (if U_SIZEOF_WCHAR_T==2) etc. (The UCHAR_TYPE can also be #defined earlier in this file, for outside the ICU library code.) This is for transitional use from application code that uses uint16_t or wchar_t for UTF-16.

The default is UChar=char16_t.

C++11 defines char16_t as bit-compatible with uint16_t, but as a distinct type.

In C, char16_t is a simple typedef of uint_least16_t. ICU requires uint_least16_t=uint16_t for data memory mapping. On macOS, char16_t is not available because the uchar.h standard header is missing.

UChar32

int32_t UChar32

Define UChar32 as a type for single Unicode code points.

UChar32 is a signed 32-bit integer (same as int32_t).

The Unicode code point range is 0..0x10ffff. All other values (negative or >=0x110000) are illegal as Unicode code points. They may be used as sentinel values to indicate "done", "error" or similar non-code point conditions.

Before ICU 2.4 (Jitterbug 2146), UChar32 was defined to be wchar_t if that is 32 bits wide (wchar_t may be signed or unsigned) or else to be uint32_t. That is, the definition of UChar32 was platform-dependent.

See also: U_SENTINEL

UCharCategory

enum UCharCategory UCharCategory

Data for enumerated Unicode general category types.

See http://www.unicode.org/Public/UNIDATA/UnicodeData.html .

UCharDirection

enum UCharDirection UCharDirection

This specifies the language directional property of a character set.

UCharEnumTypeRange

UBoolU_CALLCONV UCharEnumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type)

Callback from u_enumCharTypes(), is called for each contiguous range of code points c (where start<=c

The callback function can stop the enumeration by returning false.

Details

Parameters

`context`	an opaque pointer, as passed into utrie_enum()
`start`	the first code point in a contiguous range with value
`limit`	one past the last code point in a contiguous range with value
`type`	the general category for all code points in [start..limit[

Returns

false to stop the enumeration

UCharNameChoice

enum UCharNameChoice UCharNameChoice

Selector constants for u_charName().

See also: u_charName

UColAttribute

enum UColAttribute UColAttribute

Attributes that collation service understands.

All the attributes can take UCOL_DEFAULT value, as well as the values specific to each one.

UColAttributeValue

enum UColAttributeValue UColAttributeValue

Enum containing attribute values for controlling collation behavior.

Here are all the allowable values. Not every attribute can take every value. The only universal value is UCOL_DEFAULT, which resets the attribute value to the predefined value for that locale

UColBoundMode

enum UColBoundMode UColBoundMode

enum that is taken by ucol_getBound API See below for explanation do not change the values assigned to the members of this enum.

Underlying code depends on them having these numbers

UColReorderCode

enum UColReorderCode UColReorderCode

Enum containing the codes for reordering segments of the collation table that are not script codes.

These reordering codes are to be used in conjunction with the script codes. See also:ucol_getReorderCodesSee also:ucol_setReorderCodesSee also:ucol_getEquivalentReorderCodesSee also:UScriptCode

UCollationResult

enum UCollationResult UCollationResult

UCOL_LESS is returned if source string is compared to be less than target string in the ucol_strcoll() method.

UCollationStrength

UColAttributeValue UCollationStrength

Base letter represents a primary difference.

Set comparison level to UCOL_PRIMARY to ignore secondary and tertiary differences. Use this to set the strength of a Collator object. Example of primary difference, "abc" < "abd"

Diacritical differences on the same base letter represent a secondary difference. Set comparison level to UCOL_SECONDARY to ignore tertiary differences. Use this to set the strength of a Collator object. Example of secondary difference, "ä" >> "a".

Uppercase and lowercase versions of the same character represents a tertiary difference. Set comparison level to UCOL_TERTIARY to include all comparison differences. Use this to set the strength of a Collator object. Example of tertiary difference, "abc" <<< "ABC".

Two characters are considered "identical" when they have the same unicode spellings. UCOL_IDENTICAL. For example, "ä" == "ä".

UCollationStrength is also used to determine the strength of sort keys generated from UCollator objects These values can be now found in the UColAttributeValue enum.

UCollator

struct UCollator UCollator

structure representing a collator object instance

UDate

double UDate

Date and Time data type.

This is a primitive data type that holds the date and time as the number of milliseconds since 1970-jan-01, 00:00 UTC. UTC leap seconds are ignored.

UDecompositionType

enum UDecompositionType UDecompositionType

Decomposition Type constants.

See also: UCHAR_DECOMPOSITION_TYPE

UDisplayContext

enum UDisplayContext UDisplayContext

UDisplayContextType

enum UDisplayContextType UDisplayContextType

UEastAsianWidth

enum UEastAsianWidth UEastAsianWidth

East Asian Width constants.

See also: UCHAR_EAST_ASIAN_WIDTH See also: u_getIntPropertyValue

UEnumCharNamesFn

UBoolU_CALLCONV UEnumCharNamesFn(void *context, UChar32 code, UCharNameChoice nameChoice, const char *name, int32_t length)

Type of a callback function for u_enumCharNames() that gets called for each Unicode character with the code point value and the character name.

If such a function returns false, then the enumeration is stopped.

See also: UCharNameChoice See also: u_enumCharNames

Details

Parameters

`context`	The context pointer that was passed to u_enumCharNames().
`code`	The Unicode code point for the character with this name.
`nameChoice`	Selector for which kind of names is enumerated.
`name`	The character's name, zero-terminated.
`length`	The length of the name.

Returns

true if the enumeration should continue, false to stop it.

UEnumeration

struct UEnumeration UEnumeration

structure representing an enumeration object instance

UErrorCode

enum UErrorCode UErrorCode

Standard ICU4C error code type, a substitute for exceptions.

Initialize the UErrorCode with U_ZERO_ERROR, and check for success or failure using U_SUCCESS() or U_FAILURE():

UErrorCode errorCode = U_ZERO_ERROR;
// call ICU API that needs an error code parameter.
if (U_FAILURE(errorCode)) {
    // An error occurred. Handle it here.
}

C++ code should use icu::ErrorCode, available in unicode/errorcode.h, or a suitable subclass.

For more information, see: https://unicode-org.github.io/icu/userguide/dev/codingguidelines#details-about-icu-error-codes

Note: By convention, ICU functions that take a reference (C++) or a pointer (C) to a UErrorCode first test:

if (U_FAILURE(errorCode)) { return immediately; }

so that in a chain of such functions the first one that sets an error code causes the following ones to not perform any operations.

UGraphemeClusterBreak

enum UGraphemeClusterBreak UGraphemeClusterBreak

Grapheme Cluster Break constants.

See also: UCHAR_GRAPHEME_CLUSTER_BREAK

UHangulSyllableType

enum UHangulSyllableType UHangulSyllableType

Hangul Syllable Type constants.

See also: UCHAR_HANGUL_SYLLABLE_TYPE

UIndicPositionalCategory

enum UIndicPositionalCategory UIndicPositionalCategory

Indic Positional Category constants.

See also: UCHAR_INDIC_POSITIONAL_CATEGORY

UIndicSyllabicCategory

enum UIndicSyllabicCategory UIndicSyllabicCategory

Indic Syllabic Category constants.

See also: UCHAR_INDIC_SYLLABIC_CATEGORY

UJoiningGroup

enum UJoiningGroup UJoiningGroup

Joining Group constants.

See also: UCHAR_JOINING_GROUP

UJoiningType

enum UJoiningType UJoiningType

Joining Type constants.

See also: UCHAR_JOINING_TYPE

ULineBreak

enum ULineBreak ULineBreak

Line Break constants.

See also: UCHAR_LINE_BREAK

ULineBreakTag

enum ULineBreakTag ULineBreakTag

Enum constants for the line break tags returned by getRuleStatus().

The numeric values of all of these constants are stable (will not change).

ULocAvailableType

enum ULocAvailableType ULocAvailableType

Types for uloc_getAvailableByType and uloc_countAvailableByType.

ULocaleData

struct ULocaleData ULocaleData

A locale data object.

ULocaleDisplayNames

struct ULocaleDisplayNames ULocaleDisplayNames

C typedef for struct ULocaleDisplayNames.

UNormalizationCheckResult

enum UNormalizationCheckResult UNormalizationCheckResult

Result values for normalization quick check functions.

For details see http://www.unicode.org/reports/tr15/#Detecting_Normalization_Forms

UNormalizer2

struct UNormalizer2 UNormalizer2

C typedef for struct UNormalizer2.

UNumericType

enum UNumericType UNumericType

Numeric Type constants.

See also: UCHAR_NUMERIC_TYPE

UParseError

struct UParseError UParseError

A UParseError struct is used to returned detailed information about parsing errors.

It is used by ICU parsing engines that parse long rules, patterns, or programs, where the text being parsed is long enough that more information than a UErrorCode is needed to localize the error.

The line, offset, and context fields are optional; parsing engines may choose not to use to use them.

The preContext and postContext strings include some part of the context surrounding the error. If the source text is "let for=7" and "for" is the error (e.g., because it is a reserved word), then some examples of what a parser might produce are the following:

preContext   postContext
""           ""            The parser does not support context
"let "       "=7"          Pre- and post-context only
"let "       "for=7"       Pre- and post-context and error text
""           "for"         Error text only

Examples of engines which use UParseError (or may use it in the future) are Transliterator, RuleBasedBreakIterator, and RegexPattern.

UProperty

enum UProperty UProperty

Selection constants for Unicode properties.

These constants are used in functions like u_hasBinaryProperty to select one of the Unicode properties.

The properties APIs are intended to reflect Unicode properties as defined in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR).

For details about the properties see UAX #44: Unicode Character Database (http://www.unicode.org/reports/tr44/).

See also: u_hasBinaryProperty See also: u_getIntPropertyValue See also: u_getUnicodeVersion

UPropertyNameChoice

enum UPropertyNameChoice UPropertyNameChoice

Selector constants for u_getPropertyName() and u_getPropertyValueName().

See also: u_getPropertyName() See also: u_getPropertyValueName()

UReplaceable

void * UReplaceable

An opaque replaceable text object.

This will be manipulated only through the caller-supplied UReplaceableFunctor struct. Related to the C++ class Replaceable. This is currently only used in the Transliterator C API, see utrans.h .

UReplaceableCallbacks

struct UReplaceableCallbacks UReplaceableCallbacks

A set of function pointers that transliterators use to manipulate a UReplaceable.

The caller should supply the required functions to manipulate their text appropriately. Related to the C++ class Replaceable.

UScriptCode

enum UScriptCode UScriptCode

Constants for ISO 15924 script codes.

Private-use codes Qaaa..Qabx are not included, except as used in the UCD or in CLDR.

UScriptUsage

enum UScriptUsage UScriptUsage

Script usage constants.

See UAX #31 Unicode Identifier and Pattern Syntax. http://www.unicode.org/reports/tr31/#Table_Candidate_Characters_for_Exclusion_from_Identifiers

USentenceBreak

enum USentenceBreak USentenceBreak

Sentence Break constants.

See also: UCHAR_SENTENCE_BREAK

USentenceBreakTag

enum USentenceBreakTag USentenceBreakTag

Enum constants for the sentence break tags returned by getRuleStatus().

The numeric values of all of these constants are stable (will not change).

UText

struct UText UText

C typedef for struct UText.

UTransDirection

enum UTransDirection UTransDirection

Direction constant indicating the direction in a transliterator, e.g., the forward or reverse rules of a RuleBasedTransliterator.

Specified when a transliterator is opened. An "A-B" transliterator transliterates A to B when operating in the forward direction, and B to A when operating in the reverse direction.

UTransPosition

struct UTransPosition UTransPosition

Position structure for utrans_transIncremental() incremental transliteration.

This structure defines two substrings of the text being transliterated. The first region, [contextStart, contextLimit), defines what characters the transliterator will read as context. The second region, [start, limit), defines what characters will actually be transliterated. The second region should be a subset of the first.

After a transliteration operation, some of the indices in this structure will be modified. See the field descriptions for details.

contextStart <= start <= limit <= contextLimit

Note: All index values in this structure must be at code point boundaries. That is, none of them may occur between two code units of a surrogate pair. If any index does split a surrogate pair, results are unspecified.

UTransliterator

void * UTransliterator

An opaque transliterator for use in C.

Open with utrans_openxxx() and close with utrans_close() when done. Equivalent to the C++ class Transliterator and its subclasses. See also:Transliterator

UVersionInfo

uint8_t UVersionInfo[U_MAX_VERSION_LENGTH]

The binary form of a version on ICU APIs is an array of 4 uint8_t.

To compare two versions, use memcmp(v1,v2,sizeof(UVersionInfo)).

UVerticalOrientation

enum UVerticalOrientation UVerticalOrientation

Vertical Orientation constants.

See also: UCHAR_VERTICAL_ORIENTATION

UWordBreak

enum UWordBreak UWordBreak

Enum constants for the word break tags returned by getRuleStatus().

The numeric values of all of these constants are stable (will not change).

UWordBreakValues

enum UWordBreakValues UWordBreakValues

Word Break constants.

(UWordBreak is a pre-existing enum type in ubrk.h for word break status tags.)

See also: UCHAR_WORD_BREAK

Variables

context

U_CDECL_BEGIN typedef void * context

Functions

UChar

U_CDECL_BEGIN typedef UChar(
  U_CALLCONV *UNESCAPE_CHAR_AT
)(int32_t offset

Callback function for u_unescapeAt() that returns a character of the source text given an offset and a context pointer.

The context pointer will be whatever is passed into u_unescapeAt().

See also:u_unescapeAt

Details

Parameters

`offset`	pointer to the offset that will be passed to u_unescapeAt().
`context`	an opaque pointer passed directly into u_unescapeAt()

Returns

the character represented by the escape sequence at offset

u_charAge

U_CAPI void U_EXPORT2 u_charAge(
  UChar32 c,
  UVersionInfo versionArray
)

Get the "age" of the code point.

The "age" is the Unicode version when the code point was first designated (as a non-character or for Private Use) or assigned a character. This can be useful to avoid emitting code points to receiving processes that do not accept newer characters. The data is from the UCD file DerivedAge.txt.

Details

Parameters

`c`	The code point.
`versionArray`	The Unicode version number array, to be filled in.

u_charDigitValue

U_CAPI int32_t U_EXPORT2 u_charDigitValue(
  UChar32 c
)

Returns the decimal digit value of a decimal digit character.

Such characters have the general category "Nd" (decimal digit numbers) and a Numeric_Type of Decimal.

Unlike ICU releases before 2.6, no digit values are returned for any Han characters because Han number characters are often used with a special Chinese-style number format (with characters for powers of 10 in between) instead of in decimal-positional notation. Unicode 4 explicitly assigns Han number characters the Numeric_Type Numeric instead of Decimal. See Jitterbug 1483 for more details.

Use u_getIntPropertyValue(c, UCHAR_NUMERIC_TYPE) and u_getNumericValue() for complete numeric Unicode properties.

See also: u_getNumericValue

Details

Parameters

`c`	the code point for which to get the decimal digit value

Returns

the decimal digit value of c, or -1 if c is not a decimal digit character

u_charDirection

U_CAPIUCharDirection U_EXPORT2 u_charDirection(
  UChar32 c
)

Returns the bidirectional category value for the code point, which is used in the Unicode bidirectional algorithm (UAX #9 http://www.unicode.org/reports/tr9/).

Note that some unassigned code points have bidi values of R or AL because they are in blocks that are reserved for Right-To-Left scripts.

Same as java.lang.Character.getDirectionality()

See also: UCharDirection

Details

Parameters

`c`	the code point to be tested

Returns

the bidirectional category (UCharDirection) value

u_charFromName

U_CAPIUChar32 U_EXPORT2 u_charFromName(
  UCharNameChoice nameChoice,
  const char *name,
  UErrorCode *pErrorCode
)

Find a Unicode character by its name and return its code point value.

The name is matched exactly and completely. If the name does not correspond to a code point, pErrorCode is set to U_INVALID_CHAR_FOUND. A Unicode 1.0 name is matched only if it differs from the modern name. Unicode names are all uppercase. Extended names are lowercase followed by an uppercase hexadecimal number, and within angle brackets.

See also: UCharNameChoice See also: u_charName See also: u_enumCharNames

Details

Parameters

`nameChoice`	Selector for which name to match.
`name`	The name to match.
`pErrorCode`	Pointer to a UErrorCode variable

Returns

The Unicode value of the code point with the given name, or an undefined value if there is no such code point.

u_charMirror

U_CAPIUChar32 U_EXPORT2 u_charMirror(
  UChar32 c
)

Maps the specified character to a "mirror-image" character.

For characters with the Bidi_Mirrored property, implementations sometimes need a "poor man's" mapping to another Unicode character (code point) such that the default glyph may serve as the mirror-image of the default glyph of the specified character. This is useful for text conversion to and from codepages with visual order, and for displays without glyph selection capabilities.

See also: UCHAR_BIDI_MIRRORED See also: u_isMirrored

Details

Parameters

`c`	the code point to be mapped

Returns

another Unicode code point that may serve as a mirror-image substitute, or c itself if there is no such mapping or c does not have the Bidi_Mirrored property

u_charName

U_CAPI int32_t U_EXPORT2 u_charName(
  UChar32 code,
  UCharNameChoice nameChoice,
  char *buffer,
  int32_t bufferLength,
  UErrorCode *pErrorCode
)

Retrieve the name of a Unicode character.

Depending on nameChoice, the character name written into the buffer is the "modern" name or the name that was defined in Unicode version 1.0. The name contains only "invariant" characters like A-Z, 0-9, space, and '-'. Unicode 1.0 names are only retrieved if they are different from the modern names and if the data file contains the data for them. gennames may or may not be called with a command line option to include 1.0 names in unames.dat.

See also: UCharNameChoice See also: u_charFromName See also: u_enumCharNames

Details

Parameters

`code`	The character (code point) for which to get the name. It must be `0<=code<=0x10ffff`.
`nameChoice`	Selector for which name to get.
`buffer`	Destination address for copying the name. The name will always be zero-terminated. If there is no name, then the buffer will be set to the empty string.
`bufferLength`	`==sizeof(buffer)`
`pErrorCode`	Pointer to a UErrorCode variable; check for `U_SUCCESS()` after `u_charName()` returns.

Returns

The length of the name, or 0 if there is no name for this character. If the bufferLength is less than or equal to the length, then the buffer contains the truncated name and the returned length indicates the full length of the name. The length does not include the zero-termination.

u_charType

U_CAPI int8_t U_EXPORT2 u_charType(
  UChar32 c
)

Returns the general category value for the code point.

Same as java.lang.Character.getType().

See also: UCharCategory

Details

Parameters

`c`	the code point to be tested

Returns

the general category (UCharCategory) value

u_countChar32

U_CAPI int32_t U_EXPORT2 u_countChar32(
  const UChar *s,
  int32_t length
)

Count Unicode code points in the length UChar code units of the string.

A code point may occupy either one or two UChar code units. Counting code points involves reading all code units.

This functions is basically the inverse of the U16_FWD_N() macro (see utf.h).

Details

Parameters

`s`	The input string.
`length`	The number of UChar code units to be checked, or -1 to count all code points before the first NUL (U+0000).

Returns

The number of code points in the specified code units.

u_digit

U_CAPI int32_t U_EXPORT2 u_digit(
  UChar32 ch,
  int8_t radix
)

Returns the decimal digit value of the code point in the specified radix.

If the radix is not in the range 2<=radix<=36 or if the value of c is not a valid digit in the specified radix, -1 is returned. A character is a valid digit if at least one of the following is true:

The character has a decimal digit value. Such characters have the general category "Nd" (decimal digit numbers) and a Numeric_Type of Decimal. In this case the value is the character's decimal digit value.
The character is one of the uppercase Latin letters 'A' through 'Z'. In this case the value is c-'A'+10.
The character is one of the lowercase Latin letters 'a' through 'z'. In this case the value is ch-'a'+10.
Latin letters from both the ASCII range (0061..007A, 0041..005A) as well as from the Fullwidth ASCII range (FF41..FF5A, FF21..FF3A) are recognized.

Same as java.lang.Character.digit().

See also: UCHAR_NUMERIC_TYPE See also: u_forDigit See also: u_charDigitValue See also: u_isdigit

Details

Parameters

`ch`	the code point to be tested.
`radix`	the radix.

Returns

the numeric value represented by the character in the specified radix, or -1 if there is no value or if the value exceeds the radix.

u_enumCharNames

U_CAPI void U_EXPORT2 u_enumCharNames(
  UChar32 start,
  UChar32 limit,
  UEnumCharNamesFn *fn,
  void *context,
  UCharNameChoice nameChoice,
  UErrorCode *pErrorCode
)

Enumerate all assigned Unicode characters between the start and limit code points (start inclusive, limit exclusive) and call a function for each, passing the code point value and the character name.

For Unicode 1.0 names, only those are enumerated that differ from the modern names.

See also: UCharNameChoice See also: UEnumCharNamesFn See also: u_charName See also: u_charFromName

Details

Parameters

`start`	The first code point in the enumeration range.
`limit`	One more than the last code point in the enumeration range (the first one after the range).
`fn`	The function that is to be called for each character name.
`context`	An arbitrary pointer that is passed to the function.
`nameChoice`	Selector for which kind of names to enumerate.
`pErrorCode`	Pointer to a UErrorCode variable

u_enumCharTypes

U_CAPI void U_EXPORT2 u_enumCharTypes(
  UCharEnumTypeRange *enumRange,
  const void *context
)

Enumerate efficiently all code points with their Unicode general categories.

This is useful for building data structures (e.g., UnicodeSet's), for enumerating all assigned code points (type!=U_UNASSIGNED), etc.

For each contiguous range of code points with a given general category ("character type"), the UCharEnumTypeRange function is called. Adjacent ranges have different types. The Unicode Standard guarantees that the numeric value of the type is 0..31.

Details

Parameters

`enumRange`	a pointer to a function that is called for each contiguous range of code points with the same general category
`context`	an opaque pointer that is passed on to the callback function

u_errorName

U_CAPI const char *U_EXPORT2 u_errorName(
  UErrorCode code
)

Return a string for a UErrorCode value.

The string will be the same as the name of the error code constant in the UErrorCode enum above.

u_foldCase

U_CAPIUChar32 U_EXPORT2 u_foldCase(
  UChar32 c,
  uint32_t options
)

The given character is mapped to its case folding equivalent according to UnicodeData.txt and CaseFolding.txt; if the character has no case folding equivalent, the character itself is returned.

This function only returns the simple, single-code point case mapping. Full case mappings should be used whenever possible because they produce better results by working on whole strings. They take into account the string context and the language and can map to a result string with a different length as appropriate. Full case mappings are applied by the string case mapping functions, see ustring.h and the UnicodeString class. See also the User Guide chapter on C/POSIX migration: https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings

Details

Parameters

`c`	the code point to be mapped
`options`	Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I

Returns

the Simple_Case_Folding of the code point, if any; otherwise the code point itself.

u_forDigit

U_CAPIUChar32 U_EXPORT2 u_forDigit(
  int32_t digit,
  int8_t radix
)

Determines the character representation for a specific digit in the specified radix.

If the value of radix is not a valid radix, or the value of digit is not a valid digit in the specified radix, the null character (U+0000) is returned.

The radix argument is valid if it is greater than or equal to 2 and less than or equal to 36. The digit argument is valid if 0 <= digit < radix.

If the digit is less than 10, then '0' + digit is returned. Otherwise, the value 'a' + digit - 10 is returned.

Same as java.lang.Character.forDigit().

See also: u_digit See also: u_charDigitValue See also: u_isdigit

Details

Parameters

`digit`	the number to convert to a character.
`radix`	the radix.

Returns

the char representation of the specified digit in the specified radix.

u_getBidiPairedBracket

U_CAPIUChar32 U_EXPORT2 u_getBidiPairedBracket(
  UChar32 c
)

Maps the specified character to its paired bracket character.

For Bidi_Paired_Bracket_Type!=None, this is the same as u_charMirror(). Otherwise c itself is returned. See http://www.unicode.org/reports/tr9/

See also: UCHAR_BIDI_PAIRED_BRACKET See also: UCHAR_BIDI_PAIRED_BRACKET_TYPE See also: u_charMirror

Details

Parameters

`c`	the code point to be mapped

Returns

the paired bracket code point, or c itself if there is no such mapping (Bidi_Paired_Bracket_Type=None)

u_getCombiningClass

U_CAPI uint8_t U_EXPORT2 u_getCombiningClass(
  UChar32 c
)

Returns the combining class of the code point as specified in UnicodeData.txt.

Details

Parameters

`c`	the code point of the character

Returns

the combining class of the character

u_getIntPropertyMaxValue

U_CAPI int32_t U_EXPORT2 u_getIntPropertyMaxValue(
  UProperty which
)

Get the maximum value for an enumerated/integer/binary Unicode property.

Can be used together with u_getIntPropertyMinValue to allocate arrays of UnicodeSet or similar.

Examples for min/max values (for Unicode 3.2):

UCHAR_BIDI_CLASS: 0/18 (U_LEFT_TO_RIGHT/U_BOUNDARY_NEUTRAL)
UCHAR_SCRIPT: 0/45 (USCRIPT_COMMON/USCRIPT_TAGBANWA)
UCHAR_IDEOGRAPHIC: 0/1 (false/true)

For undefined UProperty constant values, min/max values will be 0/-1.

See also: UProperty See also: u_hasBinaryProperty See also: u_getUnicodeVersion See also: u_getIntPropertyMaxValue See also: u_getIntPropertyValue

Details

Parameters

which

UProperty selector constant, identifies which binary property to check. Must be UCHAR_BINARY_START<=which

Returns

Maximum value returned by u_getIntPropertyValue for a Unicode property. <=0 if the property selector is out of range.

u_getIntPropertyMinValue

U_CAPI int32_t U_EXPORT2 u_getIntPropertyMinValue(
  UProperty which
)

Get the minimum value for an enumerated/integer/binary Unicode property.

Can be used together with u_getIntPropertyMaxValue to allocate arrays of UnicodeSet or similar.

See also: UProperty See also: u_hasBinaryProperty See also: u_getUnicodeVersion See also: u_getIntPropertyMaxValue See also: u_getIntPropertyValue

Details

Parameters

which

UProperty selector constant, identifies which binary property to check. Must be UCHAR_BINARY_START<=which

Returns

Minimum value returned by u_getIntPropertyValue for a Unicode property. 0 if the property selector is out of range.

u_getIntPropertyValue

U_CAPI int32_t U_EXPORT2 u_getIntPropertyValue(
  UChar32 c,
  UProperty which
)

Get the property value for an enumerated or integer Unicode property for a code point.

Also returns binary and mask property values.

Unicode, especially in version 3.2, defines many more properties than the original set in UnicodeData.txt.

The properties APIs are intended to reflect Unicode properties as defined in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR). For details about the properties see http://www.unicode.org/ . For names of Unicode properties see the UCD file PropertyAliases.txt.

Sample usage: UEastAsianWidth ea=(UEastAsianWidth)u_getIntPropertyValue(c, UCHAR_EAST_ASIAN_WIDTH); UBool b=(UBool)u_getIntPropertyValue(c, UCHAR_IDEOGRAPHIC);

See also:UPropertySee also:u_hasBinaryPropertySee also:u_getIntPropertyMinValueSee also:u_getIntPropertyMaxValueSee also:u_getIntPropertyMap See also:u_getUnicodeVersion

Details

Parameters

`c`	Code point to test.
`which`	UProperty selector constant, identifies which property to check. Must be UCHAR_BINARY_START<=which

Returns

Numeric value that is directly the property value or, for enumerated properties, corresponds to the numeric value of the enumerated constant of the respective property value enumeration type (cast to enum type if necessary). Returns 0 or 1 (for false/true) for binary Unicode properties. Returns a bit-mask for mask properties. Returns 0 if 'which' is out of bounds or if the Unicode version does not have data for the property at all, or not for this code point.

u_getNumericValue

U_CAPI double U_EXPORT2 u_getNumericValue(
  UChar32 c
)

Get the numeric value for a Unicode code point as defined in the Unicode Character Database.

A "double" return type is necessary because some numeric values are fractions, negative, or too large for int32_t.

For characters without any numeric values in the Unicode Character Database, this function will return U_NO_NUMERIC_VALUE. Note: This is different from the Unicode Standard which specifies NaN as the default value. (NaN is not available on all platforms.)

Similar to java.lang.Character.getNumericValue(), but u_getNumericValue() also supports negative values, large values, and fractions, while Java's getNumericValue() returns values 10..35 for ASCII letters.

See also: U_NO_NUMERIC_VALUE

Details

Parameters

`c`	Code point to get the numeric value for.

Returns

Numeric value of c, or U_NO_NUMERIC_VALUE if none is defined.

u_getPropertyEnum

U_CAPIUProperty U_EXPORT2 u_getPropertyEnum(
  const char *alias
)

Return the UProperty enum for a given property name, as specified in the Unicode database file PropertyAliases.txt.

Short, long, and any other variants are recognized.

In addition, this function maps the synthetic names "gcm" / "General_Category_Mask" to the property UCHAR_GENERAL_CATEGORY_MASK. These names are not in PropertyAliases.txt.

See also: UProperty

Details

Parameters

alias

the property name to be matched. The name is compared using "loose matching" as described in PropertyAliases.txt.

Returns

a UProperty enum, or UCHAR_INVALID_CODE if the given name does not match any property.

u_getPropertyName

U_CAPI const char *U_EXPORT2 u_getPropertyName(
  UProperty property,
  UPropertyNameChoice nameChoice
)

Return the Unicode name for a given property, as given in the Unicode database file PropertyAliases.txt.

In addition, this function maps the property UCHAR_GENERAL_CATEGORY_MASK to the synthetic names "gcm" / "General_Category_Mask". These names are not in PropertyAliases.txt.

See also: UProperty See also: UPropertyNameChoice

Details

Parameters

`property`	UProperty selector other than UCHAR_INVALID_CODE. If out of range, NULL is returned.
`nameChoice`	selector for which name to get. If out of range, NULL is returned. All properties have a long name. Most have a short name, but some do not. Unicode allows for additional names; if present these will be returned by U_LONG_PROPERTY_NAME + i, where i=1, 2,...

Returns

a pointer to the name, or NULL if either the property or the nameChoice is out of range. If a given nameChoice returns NULL, then all larger values of nameChoice will return NULL, with one exception: if NULL is returned for U_SHORT_PROPERTY_NAME, then U_LONG_PROPERTY_NAME (and higher) may still return a non-NULL value. The returned pointer is valid until u_cleanup() is called.

u_getPropertyValueEnum

U_CAPI int32_t U_EXPORT2 u_getPropertyValueEnum(
  UProperty property,
  const char *alias
)

Return the property value integer for a given value name, as specified in the Unicode database file PropertyValueAliases.txt.

Short, long, and any other variants are recognized.

Note: Some of the names in PropertyValueAliases.txt will only be recognized with UCHAR_GENERAL_CATEGORY_MASK, not UCHAR_GENERAL_CATEGORY. These include: "C" / "Other", "L" / "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P" / "Punctuation", "S" / "Symbol", and "Z" / "Separator".

See also: UProperty

Details

Parameters

`property`	UProperty selector constant. Must be UCHAR_BINARY_START<=which
`alias`	the value name to be matched. The name is compared using "loose matching" as described in PropertyValueAliases.txt.

Returns

a value integer or UCHAR_INVALID_CODE if the given name does not match any value of the given property, or if the property is invalid. Note: UCHAR_GENERAL_CATEGORY_MASK values are not values of UCharCategory, but rather mask values produced by U_GET_GC_MASK(). This allows grouped categories such as [:L:] to be represented.

u_getPropertyValueName

U_CAPI const char *U_EXPORT2 u_getPropertyValueName(
  UProperty property,
  int32_t value,
  UPropertyNameChoice nameChoice
)

Return the Unicode name for a given property value, as given in the Unicode database file PropertyValueAliases.txt.

Note: Some of the names in PropertyValueAliases.txt can only be retrieved using UCHAR_GENERAL_CATEGORY_MASK, not UCHAR_GENERAL_CATEGORY. These include: "C" / "Other", "L" / "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P" / "Punctuation", "S" / "Symbol", and "Z" / "Separator".

See also: UProperty See also: UPropertyNameChoice

Details

Parameters

`property`	UProperty selector constant. Must be UCHAR_BINARY_START<=which
`value`	selector for a value for the given property. If out of range, NULL is returned. In general, valid values range from 0 up to some maximum. There are a few exceptions: (1.) UCHAR_BLOCK values begin at the non-zero value UBLOCK_BASIC_LATIN. (2.) UCHAR_CANONICAL_COMBINING_CLASS values are not contiguous and range from 0..240. (3.) UCHAR_GENERAL_CATEGORY_MASK values are not values of UCharCategory, but rather mask values produced by U_GET_GC_MASK(). This allows grouped categories such as [:L:] to be represented. Mask values range non-contiguously from 1..U_GC_P_MASK.
`nameChoice`	selector for which name to get. If out of range, NULL is returned. All values have a long name. Most have a short name, but some do not. Unicode allows for additional names; if present these will be returned by U_LONG_PROPERTY_NAME + i, where i=1, 2,...

Returns

u_getUnicodeVersion

U_CAPI void U_EXPORT2 u_getUnicodeVersion(
  UVersionInfo versionArray
)

Gets the Unicode version information.

The version array is filled in with the version information for the Unicode standard that is currently used by ICU. For example, Unicode version 3.1.1 is represented as an array with the values { 3, 1, 1, 0 }.

Details

Parameters

versionArray

an output array that will be filled in with the Unicode version number

u_getVersion

U_CAPI void U_EXPORT2 u_getVersion(
  UVersionInfo versionArray
)

Gets the ICU release version.

The version array stores the version information for ICU. For example, release "1.3.31.2" is then represented as 0x01031F02. Definition of this function lives in putil.c

Details

Parameters

versionArray

the version # information, the result will be filled in

u_hasBinaryProperty

U_CAPIUBool U_EXPORT2 u_hasBinaryProperty(
  UChar32 c,
  UProperty which
)

Check a binary Unicode property for a code point.

Unicode, especially in version 3.2, defines many more properties than the original set in UnicodeData.txt.

The properties APIs are intended to reflect Unicode properties as defined in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR). For details about the properties see http://www.unicode.org/ucd/ . For names of Unicode properties see the UCD file PropertyAliases.txt.

Important: If ICU is built with UCD files from Unicode versions below 3.2, then properties marked with "new in Unicode 3.2" are not or not fully available.

See also:UPropertySee also:u_getBinaryPropertySet See also:u_getIntPropertyValueSee also:u_getUnicodeVersion

Details

Parameters

`c`	Code point to test.
`which`	UProperty selector constant, identifies which binary property to check. Must be UCHAR_BINARY_START<=which

Returns

true or false according to the binary Unicode property value for c. Also false if 'which' is out of bounds or if the Unicode version does not have data for the property at all.

u_isIDIgnorable

U_CAPIUBool U_EXPORT2 u_isIDIgnorable(
  UChar32 c
)

Determines if the specified character should be regarded as an ignorable character in an identifier, according to Java.

True for characters with general category "Cf" (format controls) as well as non-whitespace ISO controls (U+0000..U+0008, U+000E..U+001B, U+007F..U+009F).

Same as java.lang.Character.isIdentifierIgnorable().

Note that Unicode just recommends to ignore Cf (format controls).

See also: UCHAR_DEFAULT_IGNORABLE_CODE_POINT See also: u_isIDStart See also: u_isIDPart

Details

Parameters

`c`	the code point to be tested

Returns

true if the code point is ignorable in identifiers according to Java

u_isIDPart

U_CAPIUBool U_EXPORT2 u_isIDPart(
  UChar32 c
)

Determines if the specified character is permissible in an identifier according to Java.

True for characters with general categories "L" (letters), "Nl" (letter numbers), "Nd" (decimal digits), "Mc" and "Mn" (combining marks), "Pc" (connecting punctuation), and u_isIDIgnorable(c).

Same as java.lang.Character.isUnicodeIdentifierPart(). Almost the same as Unicode's ID_Continue (UCHAR_ID_CONTINUE) except that Unicode recommends to ignore Cf which is less than u_isIDIgnorable(c).

See also: UCHAR_ID_CONTINUE See also: u_isIDStart See also: u_isIDIgnorable

Details

Parameters

`c`	the code point to be tested

Returns

true if the code point may occur in an identifier according to Java

u_isIDStart

U_CAPIUBool U_EXPORT2 u_isIDStart(
  UChar32 c
)

Determines if the specified character is permissible as the first character in an identifier according to Unicode (The Unicode Standard, Version 3.0, chapter 5.16 Identifiers).

True for characters with general categories "L" (letters) and "Nl" (letter numbers).

Same as java.lang.Character.isUnicodeIdentifierStart(). Same as UCHAR_ID_START

See also: UCHAR_ID_START See also: u_isalpha See also: u_isIDPart

Details

Parameters

`c`	the code point to be tested

Returns

true if the code point may start an identifier

u_isISOControl

U_CAPIUBool U_EXPORT2 u_isISOControl(
  UChar32 c
)

Determines whether the specified code point is an ISO control code.

True for U+0000..U+001f and U+007f..U+009f (general category "Cc").

Same as java.lang.Character.isISOControl().

See also: u_iscntrl

Details

Parameters

`c`	the code point to be tested

Returns

true if the code point is an ISO control code

u_isJavaIDPart

U_CAPIUBool U_EXPORT2 u_isJavaIDPart(
  UChar32 c
)

Determines if the specified character is permissible in a Java identifier.

In addition to u_isIDPart(c), true for characters with general category "Sc" (currency symbols).

Same as java.lang.Character.isJavaIdentifierPart().

See also: u_isIDIgnorable See also: u_isJavaIDStart See also: u_isalpha See also: u_isdigit See also: u_isIDPart

Details

Parameters

`c`	the code point to be tested

Returns

true if the code point may occur in a Java identifier

u_isJavaIDStart

U_CAPIUBool U_EXPORT2 u_isJavaIDStart(
  UChar32 c
)

Determines if the specified character is permissible as the first character in a Java identifier.

In addition to u_isIDStart(c), true for characters with general categories "Sc" (currency symbols) and "Pc" (connecting punctuation).

Same as java.lang.Character.isJavaIdentifierStart().

See also: u_isJavaIDPart See also: u_isalpha See also: u_isIDStart

Details

Parameters

`c`	the code point to be tested

Returns

true if the code point may start a Java identifier

u_isJavaSpaceChar

U_CAPIUBool U_EXPORT2 u_isJavaSpaceChar(
  UChar32 c
)

Determine if the specified code point is a space character according to Java.

True for characters with general categories "Z" (separators), which does not include control codes (e.g., TAB or Line Feed).

Same as java.lang.Character.isSpaceChar().

Note: There are several ICU whitespace functions; please see the uchar.h file documentation for a detailed comparison.

See also: u_isspace See also: u_isWhitespace See also: u_isUWhiteSpace

Details

Parameters

`c`	the code point to be tested

Returns

true if the code point is a space character according to Character.isSpaceChar()

u_isMirrored

U_CAPIUBool U_EXPORT2 u_isMirrored(
  UChar32 c
)

Determines whether the code point has the Bidi_Mirrored property.

This property is set for characters that are commonly used in Right-To-Left contexts and need to be displayed with a "mirrored" glyph.

Same as java.lang.Character.isMirrored(). Same as UCHAR_BIDI_MIRRORED

See also: UCHAR_BIDI_MIRRORED

Details

Parameters

`c`	the code point to be tested

Returns

true if the character has the Bidi_Mirrored property

u_isUAlphabetic

U_CAPIUBool U_EXPORT2 u_isUAlphabetic(
  UChar32 c
)

Check if a code point has the Alphabetic Unicode property.

Same as u_hasBinaryProperty(c, UCHAR_ALPHABETIC). This is different from u_isalpha! See also:UCHAR_ALPHABETICSee also:u_isalphaSee also:u_hasBinaryProperty

Details

Parameters

`c`	Code point to test

Returns

true if the code point has the Alphabetic Unicode property, false otherwise

u_isULowercase

U_CAPIUBool U_EXPORT2 u_isULowercase(
  UChar32 c
)

Check if a code point has the Lowercase Unicode property.

Same as u_hasBinaryProperty(c, UCHAR_LOWERCASE). This is different from u_islower! See also:UCHAR_LOWERCASESee also:u_islowerSee also:u_hasBinaryProperty

Details

Parameters

`c`	Code point to test

Returns

true if the code point has the Lowercase Unicode property, false otherwise

u_isUUppercase

U_CAPIUBool U_EXPORT2 u_isUUppercase(
  UChar32 c
)

Check if a code point has the Uppercase Unicode property.

Same as u_hasBinaryProperty(c, UCHAR_UPPERCASE). This is different from u_isupper! See also:UCHAR_UPPERCASESee also:u_isupperSee also:u_hasBinaryProperty

Details

Parameters

`c`	Code point to test

Returns

true if the code point has the Uppercase Unicode property, false otherwise

u_isUWhiteSpace

U_CAPIUBool U_EXPORT2 u_isUWhiteSpace(
  UChar32 c
)

Check if a code point has the White_Space Unicode property.

Same as u_hasBinaryProperty(c, UCHAR_WHITE_SPACE). This is different from both u_isspace and u_isWhitespace!

Note: There are several ICU whitespace functions; please see the uchar.h file documentation for a detailed comparison.

See also: UCHAR_WHITE_SPACE See also: u_isWhitespace See also: u_isspace See also: u_isJavaSpaceChar See also: u_hasBinaryProperty

Details

Parameters

`c`	Code point to test

Returns

true if the code point has the White_Space Unicode property, false otherwise.

u_isWhitespace

U_CAPIUBool U_EXPORT2 u_isWhitespace(
  UChar32 c
)

Determines if the specified code point is a whitespace character according to Java/ICU.

A character is considered to be a Java whitespace character if and only if it satisfies one of the following criteria:

It is a Unicode Separator character (categories "Z" = "Zs" or "Zl" or "Zp"), but is not also a non-breaking space (U+00A0 NBSP or U+2007 Figure Space or U+202F Narrow NBSP).
It is U+0009 HORIZONTAL TABULATION.
It is U+000A LINE FEED.
It is U+000B VERTICAL TABULATION.
It is U+000C FORM FEED.
It is U+000D CARRIAGE RETURN.
It is U+001C FILE SEPARATOR.
It is U+001D GROUP SEPARATOR.
It is U+001E RECORD SEPARATOR.
It is U+001F UNIT SEPARATOR.

This API tries to sync with the semantics of Java's java.lang.Character.isWhitespace(), but it may not return the exact same results because of the Unicode version difference.

Note: Unicode 4.0.1 changed U+200B ZERO WIDTH SPACE from a Space Separator (Zs) to a Format Control (Cf). Since then, isWhitespace(0x200b) returns false. See http://www.unicode.org/versions/Unicode4.0.1/

Note: There are several ICU whitespace functions; please see the uchar.h file documentation for a detailed comparison.

See also: u_isspace See also: u_isJavaSpaceChar See also: u_isUWhiteSpace

Details

Parameters

`c`	the code point to be tested

Returns

true if the code point is a whitespace character according to Java/ICU

u_isalnum

U_CAPIUBool U_EXPORT2 u_isalnum(
  UChar32 c
)

Determines whether the specified code point is an alphanumeric character (letter or digit) according to Java.

True for characters with general categories "L" (letters) and "Nd" (decimal digit numbers).

Same as java.lang.Character.isLetterOrDigit().

In addition to being equivalent to a Java function, this also serves as a C/POSIX migration function. See the comments about C/POSIX character classification functions in the documentation at the top of this header file.

Details

Parameters

`c`	the code point to be tested

Returns

true if the code point is an alphanumeric character according to Character.isLetterOrDigit()

u_isalpha

U_CAPIUBool U_EXPORT2 u_isalpha(
  UChar32 c
)

Determines whether the specified code point is a letter character.

True for general categories "L" (letters).

Same as java.lang.Character.isLetter().

See also: u_isdigit See also: u_isalnum

Details

Parameters

`c`	the code point to be tested

Returns

true if the code point is a letter character

u_isbase

U_CAPIUBool U_EXPORT2 u_isbase(
  UChar32 c
)

Non-standard: Determines whether the specified code point is a base character.

True for general categories "L" (letters), "N" (numbers), "Mc" (spacing combining marks), and "Me" (enclosing marks).

Note that this is different from the Unicode Standard definition in chapter 3.6, conformance clause D51 “Base character”, which defines base characters as the code points with general categories Letter (L), Number (N), Punctuation (P), Symbol (S), or Space Separator (Zs).

See also: u_isalpha See also: u_isdigit

Details

Parameters

`c`	the code point to be tested

Returns

true if the code point is a base character according to this function

u_isblank

U_CAPIUBool U_EXPORT2 u_isblank(
  UChar32 c
)

Determines whether the specified code point is a "blank" or "horizontal space", a character that visibly separates words on a line.

The following are equivalent definitions:

true for Unicode White_Space characters except for "vertical space controls" where "vertical space controls" are the following characters: U+000A (LF) U+000B (VT) U+000C (FF) U+000D (CR) U+0085 (NEL) U+2028 (LS) U+2029 (PS)

same as

true for U+0009 (TAB) and characters with general category "Zs" (space separators).

Note: There are several ICU whitespace functions; please see the uchar.h file documentation for a detailed comparison.

This is a C/POSIX migration function. See the comments about C/POSIX character classification functions in the documentation at the top of this header file.

Details

Parameters

`c`	the code point to be tested

Returns

true if the code point is a "blank"

u_iscntrl

U_CAPIUBool U_EXPORT2 u_iscntrl(
  UChar32 c
)

Determines whether the specified code point is a control character (as defined by this function).

A control character is one of the following:

ISO 8-bit control character (U+0000..U+001f and U+007f..U+009f)
U_CONTROL_CHAR (Cc)
U_FORMAT_CHAR (Cf)
U_LINE_SEPARATOR (Zl)
U_PARAGRAPH_SEPARATOR (Zp)

This is a C/POSIX migration function. See the comments about C/POSIX character classification functions in the documentation at the top of this header file.

See also: UCHAR_DEFAULT_IGNORABLE_CODE_POINT See also: u_isprint

Details

Parameters

`c`	the code point to be tested

Returns

true if the code point is a control character

u_isdefined

U_CAPIUBool U_EXPORT2 u_isdefined(
  UChar32 c
)

Determines whether the specified code point is "defined", which usually means that it is assigned a character.

True for general categories other than "Cn" (other, not assigned), i.e., true for all code points mentioned in UnicodeData.txt.

Note that non-character code points (e.g., U+FDD0) are not "defined" (they are Cn), but surrogate code points are "defined" (Cs).

Same as java.lang.Character.isDefined().

See also: u_isdigit See also: u_isalpha See also: u_isalnum See also: u_isupper See also: u_islower See also: u_istitle

Details

Parameters

`c`	the code point to be tested

Returns

true if the code point is assigned a character

u_isdigit

U_CAPIUBool U_EXPORT2 u_isdigit(
  UChar32 c
)

Determines whether the specified code point is a digit character according to Java.

True for characters with general category "Nd" (decimal digit numbers). Beginning with Unicode 4, this is the same as testing for the Numeric_Type of Decimal.

Same as java.lang.Character.isDigit().

Details

Parameters

`c`	the code point to be tested

Returns

true if the code point is a digit character according to Character.isDigit()

u_isgraph

U_CAPIUBool U_EXPORT2 u_isgraph(
  UChar32 c
)

Determines whether the specified code point is a "graphic" character (printable, excluding spaces).

true for all characters except those with general categories "Cc" (control codes), "Cf" (format controls), "Cs" (surrogates), "Cn" (unassigned), and "Z" (separators).

This is a C/POSIX migration function. See the comments about C/POSIX character classification functions in the documentation at the top of this header file.

Details

Parameters

`c`	the code point to be tested

Returns

true if the code point is a "graphic" character

u_islower

U_CAPIUBool U_EXPORT2 u_islower(
  UChar32 c
)

Determines whether the specified code point has the general category "Ll" (lowercase letter).

Same as java.lang.Character.isLowerCase().

This misses some characters that are also lowercase but have a different general category value. In order to include those, use UCHAR_LOWERCASE.

See also: UCHAR_LOWERCASE See also: u_isupper See also: u_istitle

Details

Parameters

`c`	the code point to be tested

Returns

true if the code point is an Ll lowercase letter

u_isprint

U_CAPIUBool U_EXPORT2 u_isprint(
  UChar32 c
)

Determines whether the specified code point is a printable character.

True for general categories other than "C" (controls).

This is a C/POSIX migration function. See the comments about C/POSIX character classification functions in the documentation at the top of this header file.

See also: UCHAR_DEFAULT_IGNORABLE_CODE_POINT See also: u_iscntrl

Details

Parameters

`c`	the code point to be tested

Returns

true if the code point is a printable character

u_ispunct

U_CAPIUBool U_EXPORT2 u_ispunct(
  UChar32 c
)

Determines whether the specified code point is a punctuation character.

True for characters with general categories "P" (punctuation).

This is a C/POSIX migration function. See the comments about C/POSIX character classification functions in the documentation at the top of this header file.

Details

Parameters

`c`	the code point to be tested

Returns

true if the code point is a punctuation character

u_isspace

U_CAPIUBool U_EXPORT2 u_isspace(
  UChar32 c
)

Determines if the specified character is a space character or not.

Note: There are several ICU whitespace functions; please see the uchar.h file documentation for a detailed comparison.

This is a C/POSIX migration function. See the comments about C/POSIX character classification functions in the documentation at the top of this header file.

See also: u_isJavaSpaceChar See also: u_isWhitespace See also: u_isUWhiteSpace

Details

Parameters

`c`	the character to be tested

Returns

true if the character is a space character; false otherwise.

u_istitle

U_CAPIUBool U_EXPORT2 u_istitle(
  UChar32 c
)

Determines whether the specified code point is a titlecase letter.

True for general category "Lt" (titlecase letter).

Same as java.lang.Character.isTitleCase().

See also: u_isupper See also: u_islower See also: u_totitle

Details

Parameters

`c`	the code point to be tested

Returns

true if the code point is an Lt titlecase letter

u_isupper

U_CAPIUBool U_EXPORT2 u_isupper(
  UChar32 c
)

Determines whether the specified code point has the general category "Lu" (uppercase letter).

Same as java.lang.Character.isUpperCase().

This misses some characters that are also uppercase but have a different general category value. In order to include those, use UCHAR_UPPERCASE.

See also: UCHAR_UPPERCASE See also: u_islower See also: u_istitle See also: u_tolower

Details

Parameters

`c`	the code point to be tested

Returns

true if the code point is an Lu uppercase letter

u_isxdigit

U_CAPIUBool U_EXPORT2 u_isxdigit(
  UChar32 c
)

Determines whether the specified code point is a hexadecimal digit.

This is equivalent to u_digit(c, 16)>=0. True for characters with general category "Nd" (decimal digit numbers) as well as Latin letters a-f and A-F in both ASCII and Fullwidth ASCII. (That is, for letters with code points 0041..0046, 0061..0066, FF21..FF26, FF41..FF46.)

In order to narrow the definition of hexadecimal digits to only ASCII characters, use (c<=0x7f && u_isxdigit(c)).

This is a C/POSIX migration function. See the comments about C/POSIX character classification functions in the documentation at the top of this header file.

Details

Parameters

`c`	the code point to be tested

Returns

true if the code point is a hexadecimal digit

u_memcasecmp

U_CAPI int32_t U_EXPORT2 u_memcasecmp(
  const UChar *s1,
  const UChar *s2,
  int32_t length,
  uint32_t options
)

Compare two strings case-insensitively using full case folding.

This is equivalent to u_strcmp(u_strFoldCase(s1, n, options), u_strFoldCase(s2, n, options)).

Details

Parameters

`s1`	A string to compare.
`s2`	A string to compare.
`length`	The number of characters in each string to case-fold and then compare.
`options`	A bit set of options: U_FOLD_CASE_DEFAULT or 0 is used for default options: Comparison in code unit order with default case folding. U_COMPARE_CODE_POINT_ORDER Set to choose code point order instead of code unit order (see u_strCompare for details). U_FOLD_CASE_EXCLUDE_SPECIAL_I

Returns

A negative, zero, or positive integer indicating the comparison result.

u_memchr

U_CAPIUChar *U_EXPORT2 u_memchr(
  const UChar *s,
  UChar c,
  int32_t count
)

Find the first occurrence of a BMP code point in a string.

A surrogate code point is found only if its match in the text is not part of a surrogate pair. A NUL character is found at the string terminator.

Details

Parameters

`s`	The string to search (contains `count` UChars).
`c`	The BMP code point to find.
`count`	The length of the string.

Returns

A pointer to the first occurrence of c in s or NULL if c is not in s.

See also: u_strchr See also: u_memchr32 See also: u_strFindFirst

u_memchr32

U_CAPIUChar *U_EXPORT2 u_memchr32(
  const UChar *s,
  UChar32 c,
  int32_t count
)

Find the first occurrence of a code point in a string.

A surrogate code point is found only if its match in the text is not part of a surrogate pair. A NUL character is found at the string terminator.

Details

Parameters

`s`	The string to search (contains `count` UChars).
`c`	The code point to find.
`count`	The length of the string.

Returns

A pointer to the first occurrence of c in s or NULL if c is not in s.

See also: u_strchr32 See also: u_memchr See also: u_strFindFirst

u_memcmp

U_CAPI int32_t U_EXPORT2 u_memcmp(
  const UChar *buf1,
  const UChar *buf2,
  int32_t count
)

Compare the first count UChars of each buffer.

Details

Parameters

`buf1`	The first string to compare.
`buf2`	The second string to compare.
`count`	The maximum number of UChars to compare.

Returns

When buf1 < buf2, a negative number is returned. When buf1 == buf2, 0 is returned. When buf1 > buf2, a positive number is returned.

u_memcmpCodePointOrder

U_CAPI int32_t U_EXPORT2 u_memcmpCodePointOrder(
  const UChar *s1,
  const UChar *s2,
  int32_t count
)

Compare two Unicode strings in code point order.

This is different in UTF-16 from u_memcmp() if supplementary characters are present. For details, see u_strCompare().

Details

Parameters

`s1`	A string to compare.
`s2`	A string to compare.
`count`	The maximum number of characters to compare.

Returns

a negative/zero/positive integer corresponding to whether the first string is less than/equal to/greater than the second one in code point order

u_memcpy

U_CAPIUChar *U_EXPORT2 u_memcpy(
  UChar *dest,
  const UChar *src,
  int32_t count
)

Synonym for memcpy(), but with UChars only.

Details

Parameters

`dest`	The destination string
`src`	The source string (can be NULL/invalid if count<=0)
`count`	The number of characters to copy; no-op if <=0

Returns

A pointer to dest

u_memmove

U_CAPIUChar *U_EXPORT2 u_memmove(
  UChar *dest,
  const UChar *src,
  int32_t count
)

Synonym for memmove(), but with UChars only.

Details

Parameters

`dest`	The destination string
`src`	The source string (can be NULL/invalid if count<=0)
`count`	The number of characters to move; no-op if <=0

Returns

A pointer to dest

u_memrchr

U_CAPIUChar *U_EXPORT2 u_memrchr(
  const UChar *s,
  UChar c,
  int32_t count
)

Find the last occurrence of a BMP code point in a string.

A surrogate code point is found only if its match in the text is not part of a surrogate pair. A NUL character is found at the string terminator.

Details

Parameters

`s`	The string to search (contains `count` UChars).
`c`	The BMP code point to find.
`count`	The length of the string.

Returns

A pointer to the last occurrence of c in s or NULL if c is not in s.

See also: u_strrchr See also: u_memrchr32 See also: u_strFindLast

u_memrchr32

U_CAPIUChar *U_EXPORT2 u_memrchr32(
  const UChar *s,
  UChar32 c,
  int32_t count
)

Find the last occurrence of a code point in a string.

A surrogate code point is found only if its match in the text is not part of a surrogate pair. A NUL character is found at the string terminator.

Details

Parameters

`s`	The string to search (contains `count` UChars).
`c`	The code point to find.
`count`	The length of the string.

Returns

A pointer to the last occurrence of c in s or NULL if c is not in s.

See also: u_strrchr32 See also: u_memrchr See also: u_strFindLast

u_memset

U_CAPIUChar *U_EXPORT2 u_memset(
  UChar *dest,
  UChar c,
  int32_t count
)

Initialize count characters of dest to c.

Details

Parameters

`dest`	The destination string.
`c`	The character to initialize the string.
`count`	The maximum number of characters to set.

Returns

A pointer to dest.

u_strCaseCompare

U_CAPI int32_t U_EXPORT2 u_strCaseCompare(
  const UChar *s1,
  int32_t length1,
  const UChar *s2,
  int32_t length2,
  uint32_t options,
  UErrorCode *pErrorCode
)

Compare two strings case-insensitively using full case folding.

This is equivalent to u_strCompare(u_strFoldCase(s1, options), u_strFoldCase(s2, options), (options&U_COMPARE_CODE_POINT_ORDER)!=0).

The comparison can be done in UTF-16 code unit order or in code point order. They differ only when comparing supplementary code points (U+10000..U+10ffff) to BMP code points near the end of the BMP (i.e., U+e000..U+ffff). In code unit order, high BMP code points sort after supplementary code points because they are stored as pairs of surrogates which are at U+d800..U+dfff.

This functions works with strings of different explicitly specified lengths unlike the ANSI C-like u_strcmp() and u_memcmp() etc. NUL-terminated strings are possible with length arguments of -1.

Details

Parameters

`s1`	First source string.
`length1`	Length of first source string, or -1 if NUL-terminated.
`s2`	Second source string.
`length2`	Length of second source string, or -1 if NUL-terminated.
`options`	A bit set of options: U_FOLD_CASE_DEFAULT or 0 is used for default options: Comparison in code unit order with default case folding. U_COMPARE_CODE_POINT_ORDER Set to choose code point order instead of code unit order (see u_strCompare for details). U_FOLD_CASE_EXCLUDE_SPECIAL_I
`pErrorCode`	Must be a valid pointer to an error code value, which must not indicate a failure before the function call.

Returns

<0 or 0 or >0 as usual for string comparisons

u_strCompare

U_CAPI int32_t U_EXPORT2 u_strCompare(
  const UChar *s1,
  int32_t length1,
  const UChar *s2,
  int32_t length2,
  UBool codePointOrder
)

Compare two Unicode strings (binary order).

The comparison can be done in code unit order or in code point order. They differ only in UTF-16 when comparing supplementary code points (U+10000..U+10ffff) to BMP code points near the end of the BMP (i.e., U+e000..U+ffff). In code unit order, high BMP code points sort after supplementary code points because they are stored as pairs of surrogates which are at U+d800..U+dfff.

This functions works with strings of different explicitly specified lengths unlike the ANSI C-like u_strcmp() and u_memcmp() etc. NUL-terminated strings are possible with length arguments of -1.

Details

Parameters

`s1`	First source string.
`length1`	Length of first source string, or -1 if NUL-terminated.
`s2`	Second source string.
`length2`	Length of second source string, or -1 if NUL-terminated.
`codePointOrder`	Choose between code unit order (false) and code point order (true).

Returns

<0 or 0 or >0 as usual for string comparisons

u_strFindFirst

U_CAPIUChar *U_EXPORT2 u_strFindFirst(
  const UChar *s,
  int32_t length,
  const UChar *substring,
  int32_t subLength
)

Find the first occurrence of a substring in a string.

The substring is found at code point boundaries. That means that if the substring begins with a trail surrogate or ends with a lead surrogate, then it is found only if these surrogates stand alone in the text. Otherwise, the substring edge units would be matched against halves of surrogate pairs.

Details

Parameters

`s`	The string to search.
`length`	The length of s (number of UChars), or -1 if it is NUL-terminated.
`substring`	The substring to find (NUL-terminated).
`subLength`	The length of substring (number of UChars), or -1 if it is NUL-terminated.

Returns

A pointer to the first occurrence of substring in s, or s itself if the substring is empty, or NULL if substring is not in s.

See also: u_strstr See also: u_strFindLast

u_strFindLast

U_CAPIUChar *U_EXPORT2 u_strFindLast(
  const UChar *s,
  int32_t length,
  const UChar *substring,
  int32_t subLength
)

Find the last occurrence of a substring in a string.

Details

Parameters

`s`	The string to search.
`length`	The length of s (number of UChars), or -1 if it is NUL-terminated.
`substring`	The substring to find (NUL-terminated).
`subLength`	The length of substring (number of UChars), or -1 if it is NUL-terminated.

Returns

A pointer to the last occurrence of substring in s, or s itself if the substring is empty, or NULL if substring is not in s.

See also: u_strstr See also: u_strFindLast

u_strFoldCase

U_CAPI int32_t U_EXPORT2 u_strFoldCase(
  UChar *dest,
  int32_t destCapacity,
  const UChar *src,
  int32_t srcLength,
  uint32_t options,
  UErrorCode *pErrorCode
)

Case-folds the characters in a string.

Case-folding is locale-independent and not context-sensitive, but there is an option for whether to include or exclude mappings for dotted I and dotless i that are marked with 'T' in CaseFolding.txt.

The result may be longer or shorter than the original. The source string and the destination buffer are allowed to overlap.

Details

Parameters

`dest`	A buffer for the result string. The result will be zero-terminated if the buffer is large enough.
`destCapacity`	The size of the buffer (number of UChars). If it is 0, then dest may be NULL and the function will only return the length of the result without writing any of the result string.
`src`	The original string
`srcLength`	The length of the original string. If -1, then src must be zero-terminated.
`options`	Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
`pErrorCode`	Must be a valid pointer to an error code value, which must not indicate a failure before the function call.

Returns

The length of the result string. It may be greater than destCapacity. In that case, only some of the result was written to the destination buffer.

u_strFromUTF32

U_CAPIUChar *U_EXPORT2 u_strFromUTF32(
  UChar *dest,
  int32_t destCapacity,
  int32_t *pDestLength,
  const UChar32 *src,
  int32_t srcLength,
  UErrorCode *pErrorCode
)

Convert a UTF-32 string to UTF-16.

If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set.

See also: u_strFromUTF32WithSub See also: u_strToUTF32

Details

Parameters

`dest`	A buffer for the result string. The result will be zero-terminated if the buffer is large enough.
`destCapacity`	The size of the buffer (number of UChars). If it is 0, then dest may be NULL and the function will only return the length of the result without writing any of the result string (pre-flighting).
`pDestLength`	A pointer to receive the number of units written to the destination. If pDestLength!=NULL then *pDestLength is always set to the number of output units corresponding to the transformation of all the input units, even in case of a buffer overflow.
`src`	The original source string
`srcLength`	The length of the original string. If -1, then src must be zero-terminated.
`pErrorCode`	Must be a valid pointer to an error code value, which must not indicate a failure before the function call.

Returns

The pointer to destination buffer.

u_strFromUTF32WithSub

U_CAPIUChar *U_EXPORT2 u_strFromUTF32WithSub(
  UChar *dest,
  int32_t destCapacity,
  int32_t *pDestLength,
  const UChar32 *src,
  int32_t srcLength,
  UChar32 subchar,
  int32_t *pNumSubstitutions,
  UErrorCode *pErrorCode
)

Convert a UTF-32 string to UTF-16.

Same as u_strFromUTF32() except for the additional subchar which is output for illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code. With subchar==U_SENTINEL, this function behaves exactly like u_strFromUTF32().

See also: u_strFromUTF32 See also: u_strToUTF32WithSub

Details

Parameters

`dest`	A buffer for the result string. The result will be zero-terminated if the buffer is large enough.
`destCapacity`	The size of the buffer (number of UChars). If it is 0, then dest may be NULL and the function will only return the length of the result without writing any of the result string (pre-flighting).
`pDestLength`	A pointer to receive the number of units written to the destination. If pDestLength!=NULL then *pDestLength is always set to the number of output units corresponding to the transformation of all the input units, even in case of a buffer overflow.
`src`	The original source string
`srcLength`	The length of the original string. If -1, then src must be zero-terminated.
`subchar`	The substitution character to use in place of an illegal input sequence, or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead. A substitution character can be any valid Unicode code point (up to U+10FFFF) except for surrogate code points (U+D800..U+DFFF). The recommended value is U+FFFD "REPLACEMENT CHARACTER".
`pNumSubstitutions`	Output parameter receiving the number of substitutions if subchar>=0. Set to 0 if no substitutions occur or subchar<0. pNumSubstitutions can be NULL.
`pErrorCode`	Pointer to a standard ICU error code. Its input value must pass the U_SUCCESS() test, or else the function returns immediately. Check for U_FAILURE() on output or use with function chaining. (See User Guide for details.)

Returns

The pointer to destination buffer.

u_strFromUTF8

U_CAPIUChar *U_EXPORT2 u_strFromUTF8(
  UChar *dest,
  int32_t destCapacity,
  int32_t *pDestLength,
  const char *src,
  int32_t srcLength,
  UErrorCode *pErrorCode
)

Convert a UTF-8 string to UTF-16.

If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set.

Details

Parameters

`dest`	A buffer for the result string. The result will be zero-terminated if the buffer is large enough.
`destCapacity`	The size of the buffer (number of UChars). If it is 0, then dest may be NULL and the function will only return the length of the result without writing any of the result string (pre-flighting).
`pDestLength`	A pointer to receive the number of units written to the destination. If pDestLength!=NULL then *pDestLength is always set to the number of output units corresponding to the transformation of all the input units, even in case of a buffer overflow.
`src`	The original source string
`srcLength`	The length of the original string. If -1, then src must be zero-terminated.
`pErrorCode`	Must be a valid pointer to an error code value, which must not indicate a failure before the function call.

Returns

The pointer to destination buffer.

u_strFromUTF8Lenient

U_CAPIUChar *U_EXPORT2 u_strFromUTF8Lenient(
  UChar *dest,
  int32_t destCapacity,
  int32_t *pDestLength,
  const char *src,
  int32_t srcLength,
  UErrorCode *pErrorCode
)

Convert a UTF-8 string to UTF-16.

Same as u_strFromUTF8() except that this function is designed to be very fast, which it achieves by being lenient about malformed UTF-8 sequences. This function is intended for use in environments where UTF-8 text is expected to be well-formed.

Its semantics are:

Well-formed UTF-8 text is correctly converted to well-formed UTF-16 text.
The function will not read beyond the input string, nor write beyond the destCapacity.
Malformed UTF-8 results in "garbage" 16-bit Unicode strings which may not be well-formed UTF-16. The function will resynchronize to valid code point boundaries within a small number of code points after an illegal sequence.
Non-shortest forms are not detected and will result in "spoofing" output.

For further performance improvement, if srcLength is given (>=0), then it must be destCapacity>=srcLength.

There is no inverse u_strToUTF8Lenient() function because there is practically no performance gain from not checking that a UTF-16 string is well-formed.

See also: u_strFromUTF8 See also: u_strFromUTF8WithSub See also: u_strToUTF8WithSub

Details

Parameters

`dest`	A buffer for the result string. The result will be zero-terminated if the buffer is large enough.
`destCapacity`	The size of the buffer (number of UChars). If it is 0, then dest may be NULL and the function will only return the length of the result without writing any of the result string (pre-flighting). Unlike for other ICU functions, if srcLength>=0 then it must be destCapacity>=srcLength.
`pDestLength`	A pointer to receive the number of units written to the destination. If pDestLength!=NULL then *pDestLength is always set to the number of output units corresponding to the transformation of all the input units, even in case of a buffer overflow. Unlike for other ICU functions, if srcLength>=0 but destCapacity
`src`	The original source string
`srcLength`	The length of the original string. If -1, then src must be zero-terminated.
`pErrorCode`	Pointer to a standard ICU error code. Its input value must pass the U_SUCCESS() test, or else the function returns immediately. Check for U_FAILURE() on output or use with function chaining. (See User Guide for details.)

Returns

The pointer to destination buffer.

u_strFromUTF8WithSub

U_CAPIUChar *U_EXPORT2 u_strFromUTF8WithSub(
  UChar *dest,
  int32_t destCapacity,
  int32_t *pDestLength,
  const char *src,
  int32_t srcLength,
  UChar32 subchar,
  int32_t *pNumSubstitutions,
  UErrorCode *pErrorCode
)

Convert a UTF-8 string to UTF-16.

Same as u_strFromUTF8() except for the additional subchar which is output for illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code. With subchar==U_SENTINEL, this function behaves exactly like u_strFromUTF8().

See also: u_strFromUTF8 See also: u_strFromUTF8Lenient See also: u_strToUTF8WithSub

Details

Parameters

`dest`	A buffer for the result string. The result will be zero-terminated if the buffer is large enough.
`destCapacity`	The size of the buffer (number of UChars). If it is 0, then dest may be NULL and the function will only return the length of the result without writing any of the result string (pre-flighting).
`pDestLength`	A pointer to receive the number of units written to the destination. If pDestLength!=NULL then *pDestLength is always set to the number of output units corresponding to the transformation of all the input units, even in case of a buffer overflow.
`src`	The original source string
`srcLength`	The length of the original string. If -1, then src must be zero-terminated.
`subchar`	The substitution character to use in place of an illegal input sequence, or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead. A substitution character can be any valid Unicode code point (up to U+10FFFF) except for surrogate code points (U+D800..U+DFFF). The recommended value is U+FFFD "REPLACEMENT CHARACTER".
`pNumSubstitutions`	Output parameter receiving the number of substitutions if subchar>=0. Set to 0 if no substitutions occur or subchar<0. pNumSubstitutions can be NULL.
`pErrorCode`	Pointer to a standard ICU error code. Its input value must pass the U_SUCCESS() test, or else the function returns immediately. Check for U_FAILURE() on output or use with function chaining. (See User Guide for details.)

Returns

The pointer to destination buffer.

u_strHasMoreChar32Than

U_CAPIUBool U_EXPORT2 u_strHasMoreChar32Than(
  const UChar *s,
  int32_t length,
  int32_t number
)

Check if the string contains more Unicode code points than a certain number.

This is more efficient than counting all code points in the entire string and comparing that number with a threshold. This function may not need to scan the string at all if the length is known (not -1 for NUL-termination) and falls within a certain range, and never needs to count more than 'number+1' code points. Logically equivalent to (u_countChar32(s, length)>number). A Unicode code point may occupy either one or two UChar code units.

Details

Parameters

`s`	The input string.
`length`	The length of the string, or -1 if it is NUL-terminated.
`number`	The number of code points in the string is compared against the 'number' parameter.

Returns

Boolean value for whether the string contains more Unicode code points than 'number'. Same as (u_countChar32(s, length)>number).

u_strToLower

U_CAPI int32_t U_EXPORT2 u_strToLower(
  UChar *dest,
  int32_t destCapacity,
  const UChar *src,
  int32_t srcLength,
  const char *locale,
  UErrorCode *pErrorCode
)

Lowercase the characters in a string.

Casing is locale-dependent and context-sensitive. The result may be longer or shorter than the original. The source string and the destination buffer are allowed to overlap.

Details

Parameters

`dest`	A buffer for the result string. The result will be zero-terminated if the buffer is large enough.
`destCapacity`	The size of the buffer (number of UChars). If it is 0, then dest may be NULL and the function will only return the length of the result without writing any of the result string.
`src`	The original string
`srcLength`	The length of the original string. If -1, then src must be zero-terminated.
`locale`	The locale to consider, or "" for the root locale or NULL for the default locale.
`pErrorCode`	Must be a valid pointer to an error code value, which must not indicate a failure before the function call.

Returns

The length of the result string. It may be greater than destCapacity. In that case, only some of the result was written to the destination buffer.

u_strToTitle

U_CAPI int32_t U_EXPORT2 u_strToTitle(
  UChar *dest,
  int32_t destCapacity,
  const UChar *src,
  int32_t srcLength,
  UBreakIterator *titleIter,
  const char *locale,
  UErrorCode *pErrorCode
)

Titlecase a string.

Casing is locale-dependent and context-sensitive. Titlecasing uses a break iterator to find the first characters of words that are to be titlecased. It titlecases those characters and lowercases all others.

The titlecase break iterator can be provided to customize for arbitrary styles, using rules and dictionaries beyond the standard iterators. It may be more efficient to always provide an iterator to avoid opening and closing one for each string. The standard titlecase iterator for the root locale implements the algorithm of Unicode TR 21.

This function uses only the setText(), first() and next() methods of the provided break iterator.

The result may be longer or shorter than the original. The source string and the destination buffer are allowed to overlap.

Details

Parameters

`dest`	A buffer for the result string. The result will be zero-terminated if the buffer is large enough.
`destCapacity`	The size of the buffer (number of UChars). If it is 0, then dest may be NULL and the function will only return the length of the result without writing any of the result string.
`src`	The original string
`srcLength`	The length of the original string. If -1, then src must be zero-terminated.
`titleIter`	A break iterator to find the first characters of words that are to be titlecased. If none is provided (NULL), then a standard titlecase break iterator is opened.
`locale`	The locale to consider, or "" for the root locale or NULL for the default locale.
`pErrorCode`	Must be a valid pointer to an error code value, which must not indicate a failure before the function call.

Returns

The length of the result string. It may be greater than destCapacity. In that case, only some of the result was written to the destination buffer.

u_strToUTF32

U_CAPIUChar32 *U_EXPORT2 u_strToUTF32(
  UChar32 *dest,
  int32_t destCapacity,
  int32_t *pDestLength,
  const UChar *src,
  int32_t srcLength,
  UErrorCode *pErrorCode
)

Convert a UTF-16 string to UTF-32.

If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set.

See also: u_strToUTF32WithSub See also: u_strFromUTF32

Details

Parameters

`dest`	A buffer for the result string. The result will be zero-terminated if the buffer is large enough.
`destCapacity`	The size of the buffer (number of UChar32s). If it is 0, then dest may be NULL and the function will only return the length of the result without writing any of the result string (pre-flighting).
`pDestLength`	A pointer to receive the number of units written to the destination. If pDestLength!=NULL then *pDestLength is always set to the number of output units corresponding to the transformation of all the input units, even in case of a buffer overflow.
`src`	The original source string
`srcLength`	The length of the original string. If -1, then src must be zero-terminated.
`pErrorCode`	Must be a valid pointer to an error code value, which must not indicate a failure before the function call.

Returns

The pointer to destination buffer.

u_strToUTF32WithSub

U_CAPIUChar32 *U_EXPORT2 u_strToUTF32WithSub(
  UChar32 *dest,
  int32_t destCapacity,
  int32_t *pDestLength,
  const UChar *src,
  int32_t srcLength,
  UChar32 subchar,
  int32_t *pNumSubstitutions,
  UErrorCode *pErrorCode
)

Convert a UTF-16 string to UTF-32.

Same as u_strToUTF32() except for the additional subchar which is output for illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code. With subchar==U_SENTINEL, this function behaves exactly like u_strToUTF32().

See also: u_strToUTF32 See also: u_strFromUTF32WithSub

Details

Parameters

`dest`	A buffer for the result string. The result will be zero-terminated if the buffer is large enough.
`destCapacity`	The size of the buffer (number of UChar32s). If it is 0, then dest may be NULL and the function will only return the length of the result without writing any of the result string (pre-flighting).
`pDestLength`	A pointer to receive the number of units written to the destination. If pDestLength!=NULL then *pDestLength is always set to the number of output units corresponding to the transformation of all the input units, even in case of a buffer overflow.
`src`	The original source string
`srcLength`	The length of the original string. If -1, then src must be zero-terminated.
`subchar`	The substitution character to use in place of an illegal input sequence, or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead. A substitution character can be any valid Unicode code point (up to U+10FFFF) except for surrogate code points (U+D800..U+DFFF). The recommended value is U+FFFD "REPLACEMENT CHARACTER".
`pNumSubstitutions`	Output parameter receiving the number of substitutions if subchar>=0. Set to 0 if no substitutions occur or subchar<0. pNumSubstitutions can be NULL.
`pErrorCode`	Pointer to a standard ICU error code. Its input value must pass the U_SUCCESS() test, or else the function returns immediately. Check for U_FAILURE() on output or use with function chaining. (See User Guide for details.)

Returns

The pointer to destination buffer.

u_strToUTF8

U_CAPI char *U_EXPORT2 u_strToUTF8(
  char *dest,
  int32_t destCapacity,
  int32_t *pDestLength,
  const UChar *src,
  int32_t srcLength,
  UErrorCode *pErrorCode
)

Convert a UTF-16 string to UTF-8.

If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set.

Details

Parameters

`dest`	A buffer for the result string. The result will be zero-terminated if the buffer is large enough.
`destCapacity`	The size of the buffer (number of chars). If it is 0, then dest may be NULL and the function will only return the length of the result without writing any of the result string (pre-flighting).
`pDestLength`	A pointer to receive the number of units written to the destination. If pDestLength!=NULL then *pDestLength is always set to the number of output units corresponding to the transformation of all the input units, even in case of a buffer overflow.
`src`	The original source string
`srcLength`	The length of the original string. If -1, then src must be zero-terminated.
`pErrorCode`	Must be a valid pointer to an error code value, which must not indicate a failure before the function call.

Returns

The pointer to destination buffer.

u_strToUTF8WithSub

U_CAPI char *U_EXPORT2 u_strToUTF8WithSub(
  char *dest,
  int32_t destCapacity,
  int32_t *pDestLength,
  const UChar *src,
  int32_t srcLength,
  UChar32 subchar,
  int32_t *pNumSubstitutions,
  UErrorCode *pErrorCode
)

Convert a UTF-16 string to UTF-8.

Same as u_strToUTF8() except for the additional subchar which is output for illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code. With subchar==U_SENTINEL, this function behaves exactly like u_strToUTF8().

See also: u_strToUTF8 See also: u_strFromUTF8WithSub

Details

Parameters

`dest`	A buffer for the result string. The result will be zero-terminated if the buffer is large enough.
`destCapacity`	The size of the buffer (number of chars). If it is 0, then dest may be NULL and the function will only return the length of the result without writing any of the result string (pre-flighting).
`pDestLength`	A pointer to receive the number of units written to the destination. If pDestLength!=NULL then *pDestLength is always set to the number of output units corresponding to the transformation of all the input units, even in case of a buffer overflow.
`src`	The original source string
`srcLength`	The length of the original string. If -1, then src must be zero-terminated.
`subchar`	The substitution character to use in place of an illegal input sequence, or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead. A substitution character can be any valid Unicode code point (up to U+10FFFF) except for surrogate code points (U+D800..U+DFFF). The recommended value is U+FFFD "REPLACEMENT CHARACTER".
`pNumSubstitutions`	Output parameter receiving the number of substitutions if subchar>=0. Set to 0 if no substitutions occur or subchar<0. pNumSubstitutions can be NULL.
`pErrorCode`	Pointer to a standard ICU error code. Its input value must pass the U_SUCCESS() test, or else the function returns immediately. Check for U_FAILURE() on output or use with function chaining. (See User Guide for details.)

Returns

The pointer to destination buffer.

u_strToUpper

U_CDECL_ENDU_CAPI int32_t U_EXPORT2 u_strToUpper(
  UChar *dest,
  int32_t destCapacity,
  const UChar *src,
  int32_t srcLength,
  const char *locale,
  UErrorCode *pErrorCode
)

Uppercase the characters in a string.

Casing is locale-dependent and context-sensitive. The result may be longer or shorter than the original. The source string and the destination buffer are allowed to overlap.

Details

Parameters

`dest`	A buffer for the result string. The result will be zero-terminated if the buffer is large enough.
`destCapacity`	The size of the buffer (number of UChars). If it is 0, then dest may be NULL and the function will only return the length of the result without writing any of the result string.
`src`	The original string
`srcLength`	The length of the original string. If -1, then src must be zero-terminated.
`locale`	The locale to consider, or "" for the root locale or NULL for the default locale.
`pErrorCode`	Must be a valid pointer to an error code value, which must not indicate a failure before the function call.

Returns

The length of the result string. It may be greater than destCapacity. In that case, only some of the result was written to the destination buffer.

u_strcasecmp

U_CAPI int32_t U_EXPORT2 u_strcasecmp(
  const UChar *s1,
  const UChar *s2,
  uint32_t options
)

Compare two strings case-insensitively using full case folding.

This is equivalent to u_strcmp(u_strFoldCase(s1, options), u_strFoldCase(s2, options)).

Details

Parameters

`s1`	A string to compare.
`s2`	A string to compare.
`options`	A bit set of options: U_FOLD_CASE_DEFAULT or 0 is used for default options: Comparison in code unit order with default case folding. U_COMPARE_CODE_POINT_ORDER Set to choose code point order instead of code unit order (see u_strCompare for details). U_FOLD_CASE_EXCLUDE_SPECIAL_I

Returns

A negative, zero, or positive integer indicating the comparison result.

u_strcat

U_CAPIUChar *U_EXPORT2 u_strcat(
  UChar *dst,
  const UChar *src
)

Concatenate two ustrings.

Appends a copy of src, including the null terminator, to dst. The initial copied character from src overwrites the null terminator in dst.

Details

Parameters

`dst`	The destination string.
`src`	The source string.

Returns

A pointer to dst.

u_strchr

U_CAPIUChar *U_EXPORT2 u_strchr(
  const UChar *s,
  UChar c
)

Find the first occurrence of a BMP code point in a string.

A surrogate code point is found only if its match in the text is not part of a surrogate pair. A NUL character is found at the string terminator.

Details

Parameters

`s`	The string to search (NUL-terminated).
`c`	The BMP code point to find.

Returns

A pointer to the first occurrence of c in s or NULL if c is not in s.

See also: u_strchr32 See also: u_memchr See also: u_strstr See also: u_strFindFirst

u_strchr32

U_CAPIUChar *U_EXPORT2 u_strchr32(
  const UChar *s,
  UChar32 c
)

Find the first occurrence of a code point in a string.

A surrogate code point is found only if its match in the text is not part of a surrogate pair. A NUL character is found at the string terminator.

Details

Parameters

`s`	The string to search (NUL-terminated).
`c`	The code point to find.

Returns

A pointer to the first occurrence of c in s or NULL if c is not in s.

See also: u_strchr See also: u_memchr32 See also: u_strstr See also: u_strFindFirst

u_strcmp

U_CAPI int32_t U_EXPORT2 u_strcmp(
  const UChar *s1,
  const UChar *s2
)

Compare two Unicode strings for bitwise equality (code unit order).

Details

Parameters

`s1`	A string to compare.
`s2`	A string to compare.

Returns

0 if s1 and s2 are bitwise equal; a negative value if s1 is bitwise less than s2,; a positive value if s1 is bitwise greater than s2.

u_strcmpCodePointOrder

U_CAPI int32_t U_EXPORT2 u_strcmpCodePointOrder(
  const UChar *s1,
  const UChar *s2
)

Compare two Unicode strings in code point order.

See u_strCompare for details.

Details

Parameters

`s1`	A string to compare.
`s2`	A string to compare.

Returns

a negative/zero/positive integer corresponding to whether the first string is less than/equal to/greater than the second one in code point order

u_strcpy

U_CAPIUChar *U_EXPORT2 u_strcpy(
  UChar *dst,
  const UChar *src
)

Copy a ustring.

Adds a null terminator.

Details

Parameters

`dst`	The destination string.
`src`	The source string.

Returns

A pointer to dst.

u_strcspn

U_CAPI int32_t U_EXPORT2 u_strcspn(
  const UChar *string,
  const UChar *matchSet
)

Returns the number of consecutive characters in string, beginning with the first, that do not occur somewhere in matchSet.

Works just like C's strcspn but with Unicode.

See also: u_strspn

Details

Parameters

`string`	The string in which to search, NUL-terminated.
`matchSet`	A NUL-terminated string defining a set of code points for which to search in the text string.

Returns

The number of initial characters in string that do not occur in matchSet.

u_strlen

U_CAPI int32_t U_EXPORT2 u_strlen(
  const UChar *s
)

Determine the length of an array of UChar.

Details

Parameters

`s`	The array of UChars, NULL (U+0000) terminated.

Returns

The number of UChars in chars, minus the terminator.

u_strncasecmp

U_CAPI int32_t U_EXPORT2 u_strncasecmp(
  const UChar *s1,
  const UChar *s2,
  int32_t n,
  uint32_t options
)

Compare two strings case-insensitively using full case folding.

This is equivalent to u_strcmp(u_strFoldCase(s1, at most n, options), u_strFoldCase(s2, at most n, options)).

Details

Parameters

`s1`	A string to compare.
`s2`	A string to compare.
`n`	The maximum number of characters each string to case-fold and then compare.
`options`	A bit set of options: U_FOLD_CASE_DEFAULT or 0 is used for default options: Comparison in code unit order with default case folding. U_COMPARE_CODE_POINT_ORDER Set to choose code point order instead of code unit order (see u_strCompare for details). U_FOLD_CASE_EXCLUDE_SPECIAL_I

Returns

A negative, zero, or positive integer indicating the comparison result.

u_strncat

U_CAPIUChar *U_EXPORT2 u_strncat(
  UChar *dst,
  const UChar *src,
  int32_t n
)

Concatenate two ustrings.

Appends at most n characters from src to dst. Adds a terminating NUL. If src is too long, then only n-1 characters will be copied before the terminating NUL. If n<=0 then dst is not modified.

Details

Parameters

`dst`	The destination string.
`src`	The source string (can be NULL/invalid if n<=0).
`n`	The maximum number of characters to append; no-op if <=0.

Returns

A pointer to dst.

u_strncmp

U_CAPI int32_t U_EXPORT2 u_strncmp(
  const UChar *ucs1,
  const UChar *ucs2,
  int32_t n
)

Compare two ustrings for bitwise equality.

Compares at most n characters.

Details

Parameters

`ucs1`	A string to compare (can be NULL/invalid if n<=0).
`ucs2`	A string to compare (can be NULL/invalid if n<=0).
`n`	The maximum number of characters to compare; always returns 0 if n<=0.

Returns

0 if s1 and s2 are bitwise equal; a negative value if s1 is bitwise less than s2; a positive value if s1 is bitwise greater than s2.

u_strncmpCodePointOrder

U_CAPI int32_t U_EXPORT2 u_strncmpCodePointOrder(
  const UChar *s1,
  const UChar *s2,
  int32_t n
)

Compare two Unicode strings in code point order.

This is different in UTF-16 from u_strncmp() if supplementary characters are present. For details, see u_strCompare().

Details

Parameters

`s1`	A string to compare.
`s2`	A string to compare.
`n`	The maximum number of characters to compare.

Returns

a negative/zero/positive integer corresponding to whether the first string is less than/equal to/greater than the second one in code point order

u_strncpy

U_CAPIUChar *U_EXPORT2 u_strncpy(
  UChar *dst,
  const UChar *src,
  int32_t n
)

Copy a ustring.

Copies at most n characters. The result will be null terminated if the length of src is less than n.

Details

Parameters

`dst`	The destination string.
`src`	The source string (can be NULL/invalid if n<=0).
`n`	The maximum number of characters to copy; no-op if <=0.

Returns

A pointer to dst.

u_strpbrk

U_CAPIUChar *U_EXPORT2 u_strpbrk(
  const UChar *string,
  const UChar *matchSet
)

Locates the first occurrence in the string string of any of the characters in the string matchSet.

Works just like C's strpbrk but with Unicode.

Details

Parameters

`string`	The string in which to search, NUL-terminated.
`matchSet`	A NUL-terminated string defining a set of code points for which to search in the text string.

Returns

A pointer to the character in string that matches one of the characters in matchSet, or NULL if no such character is found.

u_strrchr

U_CAPIUChar *U_EXPORT2 u_strrchr(
  const UChar *s,
  UChar c
)

Find the last occurrence of a BMP code point in a string.

A surrogate code point is found only if its match in the text is not part of a surrogate pair. A NUL character is found at the string terminator.

Details

Parameters

`s`	The string to search (NUL-terminated).
`c`	The BMP code point to find.

Returns

A pointer to the last occurrence of c in s or NULL if c is not in s.

See also: u_strrchr32 See also: u_memrchr See also: u_strrstr See also: u_strFindLast

u_strrchr32

U_CAPIUChar *U_EXPORT2 u_strrchr32(
  const UChar *s,
  UChar32 c
)

Find the last occurrence of a code point in a string.

A surrogate code point is found only if its match in the text is not part of a surrogate pair. A NUL character is found at the string terminator.

Details

Parameters

`s`	The string to search (NUL-terminated).
`c`	The code point to find.

Returns

A pointer to the last occurrence of c in s or NULL if c is not in s.

See also: u_strrchr See also: u_memchr32 See also: u_strrstr See also: u_strFindLast

u_strrstr

U_CAPIUChar *U_EXPORT2 u_strrstr(
  const UChar *s,
  const UChar *substring
)

Find the last occurrence of a substring in a string.

Details

Parameters

`s`	The string to search (NUL-terminated).
`substring`	The substring to find (NUL-terminated).

Returns

A pointer to the last occurrence of substring in s, or s itself if the substring is empty, or NULL if substring is not in s.

See also: u_strstr See also: u_strFindFirst See also: u_strFindLast

u_strspn

U_CAPI int32_t U_EXPORT2 u_strspn(
  const UChar *string,
  const UChar *matchSet
)

Returns the number of consecutive characters in string, beginning with the first, that occur somewhere in matchSet.

Works just like C's strspn but with Unicode.

See also: u_strcspn

Details

Parameters

`string`	The string in which to search, NUL-terminated.
`matchSet`	A NUL-terminated string defining a set of code points for which to search in the text string.

Returns

The number of initial characters in string that do occur in matchSet.

u_strstr

U_CAPIUChar *U_EXPORT2 u_strstr(
  const UChar *s,
  const UChar *substring
)

Find the first occurrence of a substring in a string.

Details

Parameters

`s`	The string to search (NUL-terminated).
`substring`	The substring to find (NUL-terminated).

Returns

A pointer to the first occurrence of substring in s, or s itself if the substring is empty, or NULL if substring is not in s.

See also: u_strrstr See also: u_strFindFirst See also: u_strFindLast

u_strtok_r

U_CAPIUChar *U_EXPORT2 u_strtok_r(
  UChar *src,
  const UChar *delim,
  UChar **saveState
)

The string tokenizer API allows an application to break a string into tokens.

Unlike strtok(), the saveState (the current pointer within the original string) is maintained in saveState. In the first call, the argument src is a pointer to the string. In subsequent calls to return successive tokens of that string, src must be specified as NULL. The value saveState is set by this function to maintain the function's position within the string, and on each subsequent call you must give this argument the same variable. This function does handle surrogate pairs. This function is similar to the strtok_r() the POSIX Threads Extension (1003.1c-1995) version.

Details

Parameters

`src`	String containing token(s). This string will be modified. After the first call to u_strtok_r(), this argument must be NULL to get to the next token.
`delim`	Set of delimiter characters (Unicode code points).
`saveState`	The current pointer within the original string, which is set by this function. The saveState parameter should the address of a local variable of type UChar . (i.e. defined "UChar myLocalSaveState" and use &myLocalSaveState for this parameter).

Returns

A pointer to the next token found in src, or NULL when there are no more tokens.

u_tolower

U_CAPIUChar32 U_EXPORT2 u_tolower(
  UChar32 c
)

The given character is mapped to its lowercase equivalent according to UnicodeData.txt; if the character has no lowercase equivalent, the character itself is returned.

Same as java.lang.Character.toLowerCase().

Details

Parameters

`c`	the code point to be mapped

Returns

the Simple_Lowercase_Mapping of the code point, if any; otherwise the code point itself.

u_totitle

U_CAPIUChar32 U_EXPORT2 u_totitle(
  UChar32 c
)

The given character is mapped to its titlecase equivalent according to UnicodeData.txt; if none is defined, the character itself is returned.

Same as java.lang.Character.toTitleCase().

Details

Parameters

`c`	the code point to be mapped

Returns

the Simple_Titlecase_Mapping of the code point, if any; otherwise the code point itself.

u_toupper

U_CAPIUChar32 U_EXPORT2 u_toupper(
  UChar32 c
)

The given character is mapped to its uppercase equivalent according to UnicodeData.txt; if the character has no uppercase equivalent, the character itself is returned.

Same as java.lang.Character.toUpperCase().

Details

Parameters

`c`	the code point to be mapped

Returns

the Simple_Uppercase_Mapping of the code point, if any; otherwise the code point itself.

u_versionToString

U_CAPI void U_EXPORT2 u_versionToString(
  const UVersionInfo versionArray,
  char *versionString
)

Write a string with dotted-decimal version information according to the input UVersionInfo.

Definition of this function lives in putil.c

Details

Parameters

`versionArray`	The version information to be written as a string.
`versionString`	A string buffer that will be filled in with a string corresponding to the numeric version information in versionArray. The buffer size must be at least U_MAX_VERSION_STRING_LENGTH.

ubrk_clone

U_CAPIUBreakIterator *U_EXPORT2 ubrk_clone(
  const UBreakIterator *bi,
  UErrorCode *status
)

Thread safe cloning operation.

Details

Parameters

`bi`	iterator to be cloned
`status`	to indicate whether the operation went on smoothly or there were errors

Returns

pointer to the new clone

ubrk_close

U_CAPI void U_EXPORT2 ubrk_close(
  UBreakIterator *bi
)

Close a UBreakIterator.

Once closed, a UBreakIterator may no longer be used.

Details

Parameters

`bi`	The break iterator to close.

ubrk_countAvailable

U_CAPI int32_t U_EXPORT2 ubrk_countAvailable(
  void
)

Determine how many locales have text breaking information available.

This function is most useful as determining the loop ending condition for calls to ubrk_getAvailable. See also:ubrk_getAvailable

Details
Returns	The number of locales for which text breaking information is available.

ubrk_current

U_CAPI int32_t U_EXPORT2 ubrk_current(
  const UBreakIterator *bi
)

Determine the most recently-returned text boundary.

Details

Parameters

`bi`	The break iterator to use.

Returns

The character index most recently returned by ubrk_next, ubrk_previous, ubrk_first, or ubrk_last.

ubrk_first

U_CAPI int32_t U_EXPORT2 ubrk_first(
  UBreakIterator *bi
)

Set the iterator position to zero, the start of the text being scanned.

See also: ubrk_last

Details

Parameters

`bi`	The break iterator to use.

Returns

The new iterator position (zero).

ubrk_following

U_CAPI int32_t U_EXPORT2 ubrk_following(
  UBreakIterator *bi,
  int32_t offset
)

Advance the iterator to the first boundary following the specified offset.

The value returned is always greater than offset, or UBRK_DONE. See also:ubrk_preceding

Details

Parameters

`bi`	The break iterator to use.
`offset`	The offset to begin scanning.

Returns

The text boundary following offset, or UBRK_DONE.

ubrk_getAvailable

U_CAPI const char *U_EXPORT2 ubrk_getAvailable(
  int32_t index
)

Get a locale for which text breaking information is available.

A UBreakIterator in a locale returned by this function will perform the correct text breaking for the locale. See also:ubrk_countAvailable

Details

Parameters

index

The index of the desired locale.

Returns

A locale for which number text breaking information is available, or 0 if none.

ubrk_getRuleStatus

U_CAPI int32_t U_EXPORT2 ubrk_getRuleStatus(
  UBreakIterator *bi
)

Return the status from the break rule that determined the most recently returned break position.

The values appear in the rule source within brackets, {123}, for example. For rules that do not specify a status, a default value of 0 is returned.

For word break iterators, the possible values are defined in enum UWordBreak.

ubrk_getRuleStatusVec

U_CAPI int32_t U_EXPORT2 ubrk_getRuleStatusVec(
  UBreakIterator *bi,
  int32_t *fillInVec,
  int32_t capacity,
  UErrorCode *status
)

Get the statuses from the break rules that determined the most recently returned break position.

The values appear in the rule source within brackets, {123}, for example. The default status value for rules that do not explicitly provide one is zero.

For word break iterators, the possible values are defined in enum UWordBreak.

Details

Parameters

`bi`	The break iterator to use
`fillInVec`	an array to be filled in with the status values.
`capacity`	the length of the supplied vector. A length of zero causes the function to return the number of status values, in the normal way, without attempting to store any values.
`status`	receives error codes.

Returns

The number of rule status values from rules that determined the most recent boundary returned by the break iterator.

ubrk_isBoundary

U_CAPIUBool U_EXPORT2 ubrk_isBoundary(
  UBreakIterator *bi,
  int32_t offset
)

Returns true if the specified position is a boundary position.

As a side effect, leaves the iterator pointing to the first boundary position at or after "offset".

Details

Parameters

`bi`	The break iterator to use.
`offset`	the offset to check.

Returns

True if "offset" is a boundary position.

ubrk_last

U_CAPI int32_t U_EXPORT2 ubrk_last(
  UBreakIterator *bi
)

Set the iterator position to the index immediately beyond the last character in the text being scanned.

This is not the same as the last character. See also:ubrk_first

Details

Parameters

`bi`	The break iterator to use.

Returns

The character offset immediately beyond the last character in the text being scanned.

ubrk_next

U_CAPI int32_t U_EXPORT2 ubrk_next(
  UBreakIterator *bi
)

Advance the iterator to the boundary following the current boundary.

See also: ubrk_previous

Details

Parameters

`bi`	The break iterator to use.

Returns

The character index of the next text boundary, or UBRK_DONE if all text boundaries have been returned.

ubrk_open

U_CAPIUBreakIterator *U_EXPORT2 ubrk_open(
  UBreakIteratorType type,
  const char *locale,
  const UChar *text,
  int32_t textLength,
  UErrorCode *status
)

Open a new UBreakIterator for locating text boundaries for a specified locale.

A UBreakIterator may be used for detecting character, line, word, and sentence breaks in text. See also:ubrk_openRules

Details

Parameters

`type`	The type of UBreakIterator to open: one of UBRK_CHARACTER, UBRK_WORD, UBRK_LINE, UBRK_SENTENCE
`locale`	The locale specifying the text-breaking conventions. Note that locale keys such as "lb" and "ss" may be used to modify text break behavior, see general discussion of BreakIterator C API.
`text`	The text to be iterated over. May be null, in which case ubrk_setText() is used to specify the text to be iterated.
`textLength`	The number of characters in text, or -1 if null-terminated.
`status`	A UErrorCode to receive any errors.

Returns

A UBreakIterator for the specified locale.

ubrk_preceding

U_CAPI int32_t U_EXPORT2 ubrk_preceding(
  UBreakIterator *bi,
  int32_t offset
)

Set the iterator position to the first boundary preceding the specified offset.

The new position is always smaller than offset, or UBRK_DONE. See also:ubrk_following

Details

Parameters

`bi`	The break iterator to use.
`offset`	The offset to begin scanning.

Returns

The text boundary preceding offset, or UBRK_DONE.

ubrk_previous

U_CAPI int32_t U_EXPORT2 ubrk_previous(
  UBreakIterator *bi
)

Set the iterator position to the boundary preceding the current boundary.

See also: ubrk_next

Details

Parameters

`bi`	The break iterator to use.

Returns

The character index of the preceding text boundary, or UBRK_DONE if all text boundaries have been returned.

ubrk_setText

U_CAPI void U_EXPORT2 ubrk_setText(
  UBreakIterator *bi,
  const UChar *text,
  int32_t textLength,
  UErrorCode *status
)

Sets an existing iterator to point to a new piece of text.

The break iterator retains a pointer to the supplied text. The caller must not modify or delete the text while the BreakIterator retains the reference.

Details

Parameters

`bi`	The iterator to use
`text`	The text to be set
`textLength`	The length of the text
`status`	The error code

ubrk_setUText

U_CAPI void U_EXPORT2 ubrk_setUText(
  UBreakIterator *bi,
  UText *text,
  UErrorCode *status
)

Sets an existing iterator to point to a new piece of text.

All index positions returned by break iterator functions are native indices from the UText. For example, when breaking UTF-8 encoded text, the break positions returned by ubrk_next, ubrk_previous, etc. will be UTF-8 string indices, not UTF-16 positions.

Details

Parameters

`bi`	The iterator to use
`text`	The text to be set. This function makes a shallow clone of the supplied UText. This means that the caller is free to immediately close or otherwise reuse the UText that was passed as a parameter, but that the underlying text itself must not be altered while being referenced by the break iterator.
`status`	The error code

ucol_clone

U_CAPIUCollator *U_EXPORT2 ucol_clone(
  const UCollator *coll,
  UErrorCode *status
)

Thread safe cloning operation.

The result is a clone of a given collator. See also:ucol_openSee also:ucol_openRulesSee also:ucol_close

Details

Parameters

`coll`	collator to be cloned
`status`	to indicate whether the operation went on smoothly or there were errors

Returns

pointer to the new clone

ucol_close

U_CAPI void U_EXPORT2 ucol_close(
  UCollator *coll
)

Close a UCollator.

Once closed, a UCollator should not be used. Every open collator should be closed. Otherwise, a memory leak will result. See also:ucol_openSee also:ucol_openRulesSee also:ucol_clone

Details

Parameters

coll

The UCollator to close.

ucol_countAvailable

U_CAPI int32_t U_EXPORT2 ucol_countAvailable(
  void
)

Determine how many locales have collation rules available.

This function is most useful as determining the loop ending condition for calls to ucol_getAvailable. See also:ucol_getAvailable

Details
Returns	The number of locales for which collation rules are available.

ucol_getAttribute

U_CAPIUColAttributeValue U_EXPORT2 ucol_getAttribute(
  const UCollator *coll,
  UColAttribute attr,
  UErrorCode *status
)

Universal attribute getter.

See also: UColAttribute See also: UColAttributeValue See also: ucol_setAttribute

Details

Parameters

`coll`	collator which attributes are to be changed
`attr`	attribute type

Parameters

status

to indicate whether the operation went on smoothly or there were errors

Returns

attribute value

ucol_getAvailable

U_CAPI const char *U_EXPORT2 ucol_getAvailable(
  int32_t localeIndex
)

Get a locale for which collation rules are available.

A UCollator in a locale returned by this function will perform the correct collation for the locale. See also:ucol_countAvailable

Details

Parameters

localeIndex

The index of the desired locale.

Returns

A locale for which collation rules are available, or 0 if none.

ucol_getDisplayName

U_CAPI int32_t U_EXPORT2 ucol_getDisplayName(
  const char *objLoc,
  const char *dispLoc,
  UChar *result,
  int32_t resultLength,
  UErrorCode *status
)

Get the display name for a UCollator.

The display name is suitable for presentation to a user.

Details

Parameters

`objLoc`	The locale of the collator in question.
`dispLoc`	The locale for display.
`result`	A pointer to a buffer to receive the attribute.
`resultLength`	The maximum size of result.
`status`	A pointer to a UErrorCode to receive any errors

Returns

The total buffer size needed; if greater than resultLength, the output was truncated.

ucol_getEquivalentReorderCodes

U_CAPI int32_t U_EXPORT2 ucol_getEquivalentReorderCodes(
  int32_t reorderCode,
  int32_t *dest,
  int32_t destCapacity,
  UErrorCode *pErrorCode
)

Retrieves the reorder codes that are grouped with the given reorder code.

Some reorder codes will be grouped and must reorder together. Beginning with ICU 55, scripts only reorder together if they are primary-equal, for example Hiragana and Katakana.

See also: ucol_setReorderCodes See also: ucol_getReorderCodes See also: UScriptCode See also: UColReorderCode

Details

Parameters

`reorderCode`	The reorder code to determine equivalence for.
`dest`	The array to fill with the script ordering.
`destCapacity`	The length of dest. If it is 0, then dest may be NULL and the function will only return the length of the result without writing any codes (pre-flighting).
`pErrorCode`	Must be a valid pointer to an error code value, which must not indicate a failure before the function call.

Returns

The number of reordering codes written to the dest array.

ucol_getFunctionalEquivalent

U_CAPI int32_t U_EXPORT2 ucol_getFunctionalEquivalent(
  char *result,
  int32_t resultCapacity,
  const char *keyword,
  const char *locale,
  UBool *isAvailable,
  UErrorCode *status
)

Return the functionally equivalent locale for the specified input locale, with respect to given keyword, for the collation service.

If two different input locale + keyword combinations produce the same result locale, then collators instantiated for these two different input locales will behave equivalently. The converse is not always true; two collators may in fact be equivalent, but return different results, due to internal details. The return result has no other meaning than that stated above, and implies nothing as to the relationship between the two locales. This is intended for use by applications who wish to cache collators, or otherwise reuse collators when possible. The functional equivalent may change over time. For more information, please see the Locales and Services section of the ICU User Guide.

Details

Parameters

`result`	fillin for the functionally equivalent result locale
`resultCapacity`	capacity of the fillin buffer
`keyword`	a particular keyword as enumerated by ucol_getKeywords.
`locale`	the specified input locale
`isAvailable`	if non-NULL, pointer to a fillin parameter that on return indicates whether the specified input locale was 'available' to the collation service. A locale is defined as 'available' if it physically exists within the collation locale data.
`status`	pointer to input-output error code

Returns

the actual buffer size needed for the locale. If greater than resultCapacity, the returned full name will be truncated and an error code will be returned.

ucol_getKeywordValues

U_CAPIUEnumeration *U_EXPORT2 ucol_getKeywordValues(
  const char *keyword,
  UErrorCode *status
)

Given a keyword, create a string enumeration of all values for that keyword that are currently in use.

Details

Parameters

`keyword`	a particular keyword as enumerated by ucol_getKeywords. If any other keyword is passed in, *status is set to U_ILLEGAL_ARGUMENT_ERROR.
`status`	input-output error code

Returns

a string enumeration over collation keyword values, or NULL upon error. The caller is responsible for closing the result.

ucol_getKeywordValuesForLocale

U_CAPIUEnumeration *U_EXPORT2 ucol_getKeywordValuesForLocale(
  const char *key,
  const char *locale,
  UBool commonlyUsed,
  UErrorCode *status
)

Given a key and a locale, returns an array of string values in a preferred order that would make a difference.

These are all and only those values where the open (creation) of the service with the locale formed from the input locale plus input keyword and that value has different behavior than creation with the input locale alone.

Details

Parameters

`key`	one of the keys supported by this service. For now, only "collation" is supported.
`locale`	the locale
`commonlyUsed`	if set to true it will return only commonly used values with the given locale in preferred order. Otherwise, it will return all the available values for the locale.
`status`	error status

Returns

a string enumeration over keyword values for the given key and the locale.

ucol_getKeywords

U_CAPIUEnumeration *U_EXPORT2 ucol_getKeywords(
  UErrorCode *status
)

Create a string enumerator of all possible keywords that are relevant to collation.

At this point, the only recognized keyword for this service is "collation".

Details

Parameters

status

input-output error code

Returns

a string enumeration over locale strings. The caller is responsible for closing the result.

ucol_getMaxVariable

U_CAPIUColReorderCode U_EXPORT2 ucol_getMaxVariable(
  const UCollator *coll
)

Returns the maximum reordering group whose characters are affected by UCOL_ALTERNATE_HANDLING.

See also: ucol_setMaxVariable

Details

Parameters

coll

the collator

Returns

the maximum variable reordering group.

ucol_getReorderCodes

U_CAPI int32_t U_EXPORT2 ucol_getReorderCodes(
  const UCollator *coll,
  int32_t *dest,
  int32_t destCapacity,
  UErrorCode *pErrorCode
)

Retrieves the reordering codes for this collator.

These reordering codes are a combination of UScript codes and UColReorderCode entries. See also:ucol_setReorderCodesSee also:ucol_getEquivalentReorderCodesSee also:UScriptCodeSee also:UColReorderCode

Details

Parameters

`coll`	The UCollator to query.
`dest`	The array to fill with the script ordering.
`destCapacity`	The length of dest. If it is 0, then dest may be NULL and the function will only return the length of the result without writing any codes (pre-flighting).
`pErrorCode`	Must be a valid pointer to an error code value, which must not indicate a failure before the function call.

Returns

The number of reordering codes written to the dest array.

ucol_getSortKey

U_CAPI int32_t U_EXPORT2 ucol_getSortKey(
  const UCollator *coll,
  const UChar *source,
  int32_t sourceLength,
  uint8_t *result,
  int32_t resultLength
)

Get a sort key for a string from a UCollator.

Sort keys may be compared using strcmp.

Note that sort keys are often less efficient than simply doing comparison. For more details, see the ICU User Guide.

Like ICU functions that write to an output buffer, the buffer contents is undefined if the buffer capacity (resultLength parameter) is too small. Unlike ICU functions that write a string to an output buffer, the terminating zero byte is counted in the sort key length. See also:ucol_keyHashCode

Details

Parameters

`coll`	The UCollator containing the collation rules.
`source`	The string to transform.
`sourceLength`	The length of source, or -1 if null-terminated.
`result`	A pointer to a buffer to receive the attribute.
`resultLength`	The maximum size of result.

Returns

The size needed to fully store the sort key. If there was an internal error generating the sort key, a zero value is returned.

ucol_getStrength

U_CAPIUCollationStrength U_EXPORT2 ucol_getStrength(
  const UCollator *coll
)

Get the collation strength used in a UCollator.

The strength influences how strings are compared. See also:ucol_setStrength

Details

Parameters

coll

The UCollator to query.

Returns

The collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY, UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL

ucol_mergeSortkeys

U_CAPI int32_t U_EXPORT2 ucol_mergeSortkeys(
  const uint8_t *src1,
  int32_t src1Length,
  const uint8_t *src2,
  int32_t src2Length,
  uint8_t *dest,
  int32_t destCapacity
)

Merges two sort keys.

The levels are merged with their corresponding counterparts (primaries with primaries, secondaries with secondaries etc.). Between the values from the same level a separator is inserted.

This is useful, for example, for combining sort keys from first and last names to sort such pairs. See http://www.unicode.org/reports/tr10/#Merging_Sort_Keys

The recommended way to achieve "merged" sorting is by concatenating strings with U+FFFE between them. The concatenation has the same sort order as the merged sort keys, but merge(getSortKey(str1), getSortKey(str2)) may differ from getSortKey(str1 + '\uFFFE' + str2). Using strings with U+FFFE may yield shorter sort keys.

For details about Sort Key Features see https://unicode-org.github.io/icu/userguide/collation/api#sort-key-features

It is possible to merge multiple sort keys by consecutively merging another one with the intermediate result.

The length of the merge result is the sum of the lengths of the input sort keys.

Example (uncompressed):

191B1D 01 050505 01 910505 00
1F2123 01 050505 01 910505 00

will be merged as

191B1D 02 1F2123 01 050505 02 050505 01 910505 02 910505 00

If the destination buffer is not big enough, then its contents are undefined. If any of source lengths are zero or any of the source pointers are NULL/undefined, the result is of size zero.

Details

Parameters

`src1`	the first sort key
`src1Length`	the length of the first sort key, including the zero byte at the end; can be -1 if the function is to find the length
`src2`	the second sort key
`src2Length`	the length of the second sort key, including the zero byte at the end; can be -1 if the function is to find the length
`dest`	the buffer where the merged sort key is written, can be NULL if destCapacity==0
`destCapacity`	the number of bytes in the dest buffer

Returns

the length of the merged sort key, src1Length+src2Length; can be larger than destCapacity, or 0 if an error occurs (only for illegal arguments), in which cases the contents of dest is undefined

ucol_open

U_CAPIUCollator *U_EXPORT2 ucol_open(
  const char *loc,
  UErrorCode *status
)

Open a UCollator for comparing strings.

For some languages, multiple collation types are available; for example, "de@collation=phonebook". Starting with ICU 54, collation attributes can be specified via locale keywords as well, in the old locale extension syntax ("el@colCaseFirst=upper") or in language tag syntax ("el-u-kf-upper"). See User Guide: Collation API.

The UCollator pointer is used in all the calls to the Collation service. After finished, collator must be disposed of by calling ucol_close. See also:ucol_openRulesSee also:ucol_cloneSee also:ucol_close

Details

Parameters

`loc`	The locale containing the required collation rules. Special values for locales can be passed in - if NULL is passed for the locale, the default locale collation rules will be used. If empty string ("") or "root" are passed, the root collator will be returned.
`status`	A pointer to a UErrorCode to receive any errors

Returns

A pointer to a UCollator, or 0 if an error occurred.

ucol_openAvailableLocales

U_CAPIUEnumeration *U_EXPORT2 ucol_openAvailableLocales(
  UErrorCode *status
)

Create a string enumerator of all locales for which a valid collator may be opened.

Details

Parameters

status

input-output error code

Returns

a string enumeration over locale strings. The caller is responsible for closing the result.

ucol_openRules

U_CAPIUCollator *U_EXPORT2 ucol_openRules(
  const UChar *rules,
  int32_t rulesLength,
  UColAttributeValue normalizationMode,
  UCollationStrength strength,
  UParseError *parseError,
  UErrorCode *status
)

Produce a UCollator instance according to the rules supplied.

The rules are used to change the default ordering, defined in the UCA in a process called tailoring. The resulting UCollator pointer can be used in the same way as the one obtained by ucol_strcoll. See also:ucol_openSee also:ucol_cloneSee also:ucol_close

Details

Parameters

`rules`	A string describing the collation rules. For the syntax of the rules please see users guide.
`rulesLength`	The length of rules, or -1 if null-terminated.
`normalizationMode`	The normalization mode: One of UCOL_OFF (expect the text to not need normalization), UCOL_ON (normalize), or UCOL_DEFAULT (set the mode according to the rules)
`strength`	The default collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY, UCOL_TERTIARY, UCOL_IDENTICAL,UCOL_DEFAULT_STRENGTH - can be also set in the rules.
`parseError`	A pointer to UParseError to receive information about errors occurred during parsing. This argument can currently be set to NULL, but at users own risk. Please provide a real structure.
`status`	A pointer to a UErrorCode to receive any errors

Returns

A pointer to a UCollator. It is not guaranteed that NULL be returned in case of error - please use status argument to check for errors.

ucol_setAttribute

U_CAPI void U_EXPORT2 ucol_setAttribute(
  UCollator *coll,
  UColAttribute attr,
  UColAttributeValue value,
  UErrorCode *status
)

Universal attribute setter.

See also: UColAttribute See also: UColAttributeValue See also: ucol_getAttribute

Details

Parameters

`coll`	collator which attributes are to be changed
`attr`	attribute type
`value`	attribute value
`status`	to indicate whether the operation went on smoothly or there were errors

ucol_setMaxVariable

U_CAPI void U_EXPORT2 ucol_setMaxVariable(
  UCollator *coll,
  UColReorderCode group,
  UErrorCode *pErrorCode
)

Sets the variable top to the top of the specified reordering group.

The variable top determines the highest-sorting character which is affected by UCOL_ALTERNATE_HANDLING. If that attribute is set to UCOL_NON_IGNORABLE, then the variable top has no effect. See also:ucol_getMaxVariable

Details

Parameters

`coll`	the collator
`group`	one of UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_CURRENCY; or UCOL_REORDER_CODE_DEFAULT to restore the default max variable group
`pErrorCode`	Standard ICU error code. Its input value must pass the U_SUCCESS() test, or else the function returns immediately. Check for U_FAILURE() on output or use with function chaining. (See User Guide for details.)

ucol_setReorderCodes

U_CAPI void U_EXPORT2 ucol_setReorderCodes(
  UCollator *coll,
  const int32_t *reorderCodes,
  int32_t reorderCodesLength,
  UErrorCode *pErrorCode
)

Sets the reordering codes for this collator.

Collation reordering allows scripts and some other groups of characters to be moved relative to each other. This reordering is done on top of the DUCET/CLDR standard collation order. Reordering can specify groups to be placed at the start and/or the end of the collation order. These groups are specified using UScript codes and UColReorderCode entries.

By default, reordering codes specified for the start of the order are placed in the order given after several special non-script blocks. These special groups of characters are space, punctuation, symbol, currency, and digit. These special groups are represented with UColReorderCode entries. Script groups can be intermingled with these special non-script groups if those special groups are explicitly specified in the reordering.

The special code OTHERS stands for any script that is not explicitly mentioned in the list of reordering codes given. Anything that is after OTHERS will go at the very end of the reordering in the order given.

The special reorder code DEFAULT will reset the reordering for this collator to the default for this collator. The default reordering may be the DUCET/CLDR order or may be a reordering that was specified when this collator was created from resource data or from rules. The DEFAULT code must be the sole code supplied when it is used. If not, then U_ILLEGAL_ARGUMENT_ERROR will be set.

The special reorder code NONE will remove any reordering for this collator. The result of setting no reordering will be to have the DUCET/CLDR ordering used. The NONE code must be the sole code supplied when it is used.

See also: ucol_getReorderCodes See also: ucol_getEquivalentReorderCodes See also: UScriptCode See also: UColReorderCode

Details

Parameters

`coll`	The UCollator to set.
`reorderCodes`	An array of script codes in the new order. This can be NULL if the length is also set to 0. An empty array will clear any reordering codes on the collator.
`reorderCodesLength`	The length of reorderCodes.
`pErrorCode`	Must be a valid pointer to an error code value, which must not indicate a failure before the function call.

ucol_setStrength

U_CAPI void U_EXPORT2 ucol_setStrength(
  UCollator *coll,
  UCollationStrength strength
)

Set the collation strength used in a UCollator.

The strength influences how strings are compared. See also:ucol_getStrength

Details

Parameters

`coll`	The UCollator to set.
`strength`	The desired collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY, UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL, UCOL_DEFAULT

ucol_strcoll

U_CAPIUCollationResult U_EXPORT2 ucol_strcoll(
  const UCollator *coll,
  const UChar *source,
  int32_t sourceLength,
  const UChar *target,
  int32_t targetLength
)

Compare two strings.

The strings will be compared using the options already specified. See also:ucol_greater See also:ucol_greaterOrEqual See also:ucol_equal

Details

Parameters

`coll`	The UCollator containing the comparison rules.
`source`	The source string.
`sourceLength`	The length of source, or -1 if null-terminated.
`target`	The target string.
`targetLength`	The length of target, or -1 if null-terminated.

Returns

The result of comparing the strings; one of UCOL_EQUAL, UCOL_GREATER, UCOL_LESS

ucol_strcollUTF8

U_CAPIUCollationResult U_EXPORT2 ucol_strcollUTF8(
  const UCollator *coll,
  const char *source,
  int32_t sourceLength,
  const char *target,
  int32_t targetLength,
  UErrorCode *status
)

Compare two strings in UTF-8.

The strings will be compared using the options already specified. Note: When input string contains malformed a UTF-8 byte sequence, this function treats these bytes as REPLACEMENT CHARACTER (U+FFFD). See also:ucol_greater See also:ucol_greaterOrEqual See also:ucol_equal

Details

Parameters

`coll`	The UCollator containing the comparison rules.
`source`	The source UTF-8 string.
`sourceLength`	The length of source, or -1 if null-terminated.
`target`	The target UTF-8 string.
`targetLength`	The length of target, or -1 if null-terminated.
`status`	A pointer to a UErrorCode to receive any errors

Returns

The result of comparing the strings; one of UCOL_EQUAL, UCOL_GREATER, UCOL_LESS

uenum_close

U_CAPI void U_EXPORT2 uenum_close(
  UEnumeration *en
)

Disposes of resources in use by the iterator.

If en is NULL, does nothing. After this call, any char* or UChar* pointer returned by uenum_unext() or uenum_next() is invalid.

Details

Parameters

`en`	UEnumeration structure pointer

uenum_count

U_CAPI int32_t U_EXPORT2 uenum_count(
  UEnumeration *en,
  UErrorCode *status
)

Returns the number of elements that the iterator traverses.

If the iterator is out-of-sync with its service, status is set to U_ENUM_OUT_OF_SYNC_ERROR. This is a convenience function. It can end up being very expensive as all the items might have to be pre-fetched (depending on the type of data being traversed). Use with caution and only when necessary.

Details

Parameters

`en`	UEnumeration structure pointer
`status`	error code, can be U_ENUM_OUT_OF_SYNC_ERROR if the iterator is out of sync.

Returns

number of elements in the iterator

uenum_next

U_CAPI const char *U_EXPORT2 uenum_next(
  UEnumeration *en,
  int32_t *resultLength,
  UErrorCode *status
)

Returns the next element in the iterator's list.

If there are no more elements, returns NULL. If the iterator is out-of-sync with its service, status is set to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned. If the native service string is a UChar* string, it is converted to char* with the invariant converter. The result is terminated by (char)0. If the conversion fails (because a character cannot be converted) then status is set to U_INVARIANT_CONVERSION_ERROR and the return value is undefined (but non-NULL).

Details

Parameters

`en`	the iterator object
`resultLength`	pointer to receive the length of the result (not including the terminating \0). If the pointer is NULL it is ignored.
`status`	the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if the iterator is out of sync with its service. Set to U_INVARIANT_CONVERSION_ERROR if the underlying native string is UChar* and conversion to char* with the invariant converter fails. This error pertains only to current string, so iteration might be able to continue successfully.

Returns

a pointer to the string. The string will be zero-terminated. The return pointer is owned by this iterator and must not be deleted by the caller. The pointer is valid until the next call to any uenum_... method, including uenum_next() or uenum_unext(). When all strings have been traversed, returns NULL.

uenum_openCharStringsEnumeration

U_CAPIUEnumeration *U_EXPORT2 uenum_openCharStringsEnumeration(
  const char *const strings[],
  int32_t count,
  UErrorCode *ec
)

Given an array of const char* strings (invariant chars only), return a UEnumeration.

String pointers from 0..count-1 must not be null. Do not free or modify either the string array or the characters it points to until this object has been destroyed with uenum_close.

See also:uenum_close

Details

Parameters

`strings`	array of char* strings (each null terminated). All storage is owned by the caller.
`count`	length of the array
`ec`	error code

Returns

the new UEnumeration object. Caller is responsible for calling uenum_close to free memory

uenum_openUCharStringsEnumeration

U_CAPIUEnumeration *U_EXPORT2 uenum_openUCharStringsEnumeration(
  const UChar *const strings[],
  int32_t count,
  UErrorCode *ec
)

Given an array of const UChar* strings, return a UEnumeration.

String pointers from 0..count-1 must not be null. Do not free or modify either the string array or the characters it points to until this object has been destroyed with uenum_close.

See also:uenum_close

Details

Parameters

`strings`	array of const UChar* strings (each null terminated). All storage is owned by the caller.
`count`	length of the array
`ec`	error code

Returns

the new UEnumeration object. Caller is responsible for calling uenum_close to free memory.

uenum_reset

U_CAPI void U_EXPORT2 uenum_reset(
  UEnumeration *en,
  UErrorCode *status
)

Resets the iterator to the current list of service IDs.

This re-establishes sync with the service and rewinds the iterator to start at the first element.

Details

Parameters

`en`	the iterator object
`status`	the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if the iterator is out of sync with its service.

uenum_unext

U_CAPI const UChar *U_EXPORT2 uenum_unext(
  UEnumeration *en,
  int32_t *resultLength,
  UErrorCode *status
)

Returns the next element in the iterator's list.

Details

Parameters

`en`	the iterator object
`resultLength`	pointer to receive the length of the result (not including the terminating \0). If the pointer is NULL it is ignored.
`status`	the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if the iterator is out of sync with its service.

Returns

uldn_close

U_CAPI void U_EXPORT2 uldn_close(
  ULocaleDisplayNames *ldn
)

Closes a ULocaleDisplayNames instance obtained from uldn_open().

Details

Parameters

ldn

the ULocaleDisplayNames instance to be closed

uldn_getContext

U_CAPIUDisplayContext U_EXPORT2 uldn_getContext(
  const ULocaleDisplayNames *ldn,
  UDisplayContextType type,
  UErrorCode *pErrorCode
)

Returns the UDisplayContext value for the specified UDisplayContextType.

Details

Parameters

`ldn`	the ULocaleDisplayNames instance
`type`	the UDisplayContextType whose value to return
`pErrorCode`	Pointer to UErrorCode input/output status. If at entry this indicates a failure status, the function will do nothing; otherwise this will be updated with any new status from the function.

Returns

the UDisplayContextValue for the specified type.

uldn_getDialectHandling

U_CAPIUDialectHandling U_EXPORT2 uldn_getDialectHandling(
  const ULocaleDisplayNames *ldn
)

Returns the dialect handling used in the display names.

Details

Parameters

ldn

the LocaleDisplayNames instance

Returns

the dialect handling enum

uldn_getLocale

U_CAPI const char *U_EXPORT2 uldn_getLocale(
  const ULocaleDisplayNames *ldn
)

Returns the locale used to determine the display names.

This is not necessarily the same locale passed to uldn_open.

Details

Parameters

ldn

the LocaleDisplayNames instance

Returns

the display locale

uldn_keyDisplayName

U_CAPI int32_t U_EXPORT2 uldn_keyDisplayName(
  const ULocaleDisplayNames *ldn,
  const char *key,
  UChar *result,
  int32_t maxResultSize,
  UErrorCode *pErrorCode
)

Returns the display name of the provided locale key.

Details

Parameters

`ldn`	the LocaleDisplayNames instance
`key`	the locale key whose display name to return
`result`	receives the display name
`maxResultSize`	the size of the result buffer
`pErrorCode`	the status code

Returns

the actual buffer size needed for the display name. If it's greater than maxResultSize, the returned name will be truncated.

uldn_keyValueDisplayName

U_CAPI int32_t U_EXPORT2 uldn_keyValueDisplayName(
  const ULocaleDisplayNames *ldn,
  const char *key,
  const char *value,
  UChar *result,
  int32_t maxResultSize,
  UErrorCode *pErrorCode
)

Returns the display name of the provided value (used with the provided key).

Details

Parameters

`ldn`	the LocaleDisplayNames instance
`key`	the locale key
`value`	the locale key's value
`result`	receives the display name
`maxResultSize`	the size of the result buffer
`pErrorCode`	the status code

Returns

the actual buffer size needed for the display name. If it's greater than maxResultSize, the returned name will be truncated.

uldn_languageDisplayName

U_CAPI int32_t U_EXPORT2 uldn_languageDisplayName(
  const ULocaleDisplayNames *ldn,
  const char *lang,
  UChar *result,
  int32_t maxResultSize,
  UErrorCode *pErrorCode
)

Returns the display name of the provided language code.

Details

Parameters

`ldn`	the LocaleDisplayNames instance
`lang`	the language code whose display name to return
`result`	receives the display name
`maxResultSize`	the size of the result buffer
`pErrorCode`	the status code

Returns

the actual buffer size needed for the display name. If it's greater than maxResultSize, the returned name will be truncated.

uldn_localeDisplayName

U_CAPI int32_t U_EXPORT2 uldn_localeDisplayName(
  const ULocaleDisplayNames *ldn,
  const char *locale,
  UChar *result,
  int32_t maxResultSize,
  UErrorCode *pErrorCode
)

Returns the display name of the provided locale.

Details

Parameters

`ldn`	the LocaleDisplayNames instance
`locale`	the locale whose display name to return
`result`	receives the display name
`maxResultSize`	the size of the result buffer
`pErrorCode`	the status code

Returns

the actual buffer size needed for the display name. If it's greater than maxResultSize, the returned name will be truncated.

uldn_open

U_CAPIULocaleDisplayNames *U_EXPORT2 uldn_open(
  const char *locale,
  UDialectHandling dialectHandling,
  UErrorCode *pErrorCode
)

Returns an instance of LocaleDisplayNames that returns names formatted for the provided locale, using the provided dialectHandling.

The usual value for dialectHandling is ULOC_STANDARD_NAMES.

Details

Parameters

`locale`	the display locale
`dialectHandling`	how to select names for locales

Parameters

pErrorCode

the status code

Returns

a ULocaleDisplayNames instance

uldn_openForContext

U_CAPIULocaleDisplayNames *U_EXPORT2 uldn_openForContext(
  const char *locale,
  UDisplayContext *contexts,
  int32_t length,
  UErrorCode *pErrorCode
)

Returns an instance of LocaleDisplayNames that returns names formatted for the provided locale, using the provided UDisplayContext settings.

Details

Parameters

`locale`	The display locale
`contexts`	List of one or more context settings (e.g. for dialect handling, capitalization, etc.
`length`	Number of items in the contexts list
`pErrorCode`	Pointer to UErrorCode input/output status. If at entry this indicates a failure status, the function will do nothing; otherwise this will be updated with any new status from the function.

Returns

a ULocaleDisplayNames instance

uldn_regionDisplayName

U_CAPI int32_t U_EXPORT2 uldn_regionDisplayName(
  const ULocaleDisplayNames *ldn,
  const char *region,
  UChar *result,
  int32_t maxResultSize,
  UErrorCode *pErrorCode
)

Returns the display name of the provided region code.

Details

Parameters

`ldn`	the LocaleDisplayNames instance
`region`	the region code whose display name to return
`result`	receives the display name
`maxResultSize`	the size of the result buffer
`pErrorCode`	the status code

Returns

the actual buffer size needed for the display name. If it's greater than maxResultSize, the returned name will be truncated.

uldn_scriptCodeDisplayName

U_CAPI int32_t U_EXPORT2 uldn_scriptCodeDisplayName(
  const ULocaleDisplayNames *ldn,
  UScriptCode scriptCode,
  UChar *result,
  int32_t maxResultSize,
  UErrorCode *pErrorCode
)

Returns the display name of the provided script code.

Details

Parameters

`ldn`	the LocaleDisplayNames instance
`scriptCode`	the script code whose display name to return
`result`	receives the display name
`maxResultSize`	the size of the result buffer
`pErrorCode`	the status code

Returns

the actual buffer size needed for the display name. If it's greater than maxResultSize, the returned name will be truncated.

uldn_scriptDisplayName

U_CAPI int32_t U_EXPORT2 uldn_scriptDisplayName(
  const ULocaleDisplayNames *ldn,
  const char *script,
  UChar *result,
  int32_t maxResultSize,
  UErrorCode *pErrorCode
)

Returns the display name of the provided script.

Details

Parameters

`ldn`	the LocaleDisplayNames instance
`script`	the script whose display name to return
`result`	receives the display name
`maxResultSize`	the size of the result buffer
`pErrorCode`	the status code

Returns

the actual buffer size needed for the display name. If it's greater than maxResultSize, the returned name will be truncated.

uldn_variantDisplayName

U_CAPI int32_t U_EXPORT2 uldn_variantDisplayName(
  const ULocaleDisplayNames *ldn,
  const char *variant,
  UChar *result,
  int32_t maxResultSize,
  UErrorCode *pErrorCode
)

Returns the display name of the provided variant.

Details

Parameters

`ldn`	the LocaleDisplayNames instance
`variant`	the variant whose display name to return
`result`	receives the display name
`maxResultSize`	the size of the result buffer
`pErrorCode`	the status code

Returns

the actual buffer size needed for the display name. If it's greater than maxResultSize, the returned name will be truncated.

uloc_acceptLanguage

U_CAPI int32_t U_EXPORT2 uloc_acceptLanguage(
  char *result,
  int32_t resultAvailable,
  UAcceptResult *outResult,
  const char **acceptList,
  int32_t acceptListCount,
  UEnumeration *availableLocales,
  UErrorCode *status
)

Based on a list of available locales, determine an acceptable locale for the user.

This is a thin wrapper over C++ class LocaleMatcher.

Details

Parameters

`result`	- buffer to accept the result locale
`resultAvailable`	the size of the result buffer.
`outResult`	- An out parameter that contains the fallback status
`acceptList`	- list of acceptable languages
`acceptListCount`	- count of acceptList items
`availableLocales`	- list of available locales to match
`status`	ICU error code. Its input value must pass the U_SUCCESS() test, or else the function returns immediately. Check for U_FAILURE() on output or use with function chaining. (See User Guide for details.)

Returns

length needed for the locale.

uloc_addLikelySubtags

U_CAPI int32_t U_EXPORT2 uloc_addLikelySubtags(
  const char *localeID,
  char *maximizedLocaleID,
  int32_t maximizedLocaleIDCapacity,
  UErrorCode *err
)

Add the likely subtags for a provided locale ID, per the algorithm described in the following CLDR technical report:

http://www.unicode.org/reports/tr35/#Likely_Subtags

If localeID is already in the maximal form, or there is no data available for maximization, it will be copied to the output buffer. For example, "und-Zzzz" cannot be maximized, since there is no reasonable maximization.

Examples:

"en" maximizes to "en_Latn_US"

"de" maximizes to "de_Latn_US"

"sr" maximizes to "sr_Cyrl_RS"

"sh" maximizes to "sr_Latn_RS" (Note this will not reverse.)

"zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.)

Details

Parameters

`localeID`	The locale to maximize
`maximizedLocaleID`	The maximized locale
`maximizedLocaleIDCapacity`	The capacity of the maximizedLocaleID buffer
`err`	Error information if maximizing the locale failed. If the length of the localeID and the null-terminator is greater than the maximum allowed size, or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.

Returns

The actual buffer size needed for the maximized locale. If it's greater than maximizedLocaleIDCapacity, the returned ID will be truncated. On error, the return value is -1.

uloc_canonicalize

U_CAPI int32_t U_EXPORT2 uloc_canonicalize(
  const char *localeID,
  char *name,
  int32_t nameCapacity,
  UErrorCode *err
)

Gets the full name for the specified locale.

Note: This has the effect of 'canonicalizing' the string to a certain extent. Upper and lower case are set as needed, and if the components were in 'POSIX' format they are changed to ICU format. It does NOT map aliased names in any way. See the top of this header file.

Details

Parameters

`localeID`	the locale to get the full name with
`name`	the full name for localeID
`nameCapacity`	the size of the name buffer to store the full name with
`err`	error information if retrieving the full name failed

Returns

the actual buffer size needed for the full name. If it's greater than nameCapacity, the returned full name will be truncated.

uloc_countAvailable

U_CAPI int32_t U_EXPORT2 uloc_countAvailable(
  void
)

Gets the size of the all available locale list.

Details
Returns	the size of the locale list

uloc_forLanguageTag

U_CAPI int32_t U_EXPORT2 uloc_forLanguageTag(
  const char *langtag,
  char *localeID,
  int32_t localeIDCapacity,
  int32_t *parsedLength,
  UErrorCode *err
)

Returns a locale ID for the specified BCP47 language tag string.

If the specified language tag contains any ill-formed subtags, the first such subtag and all following subtags are ignored.

This implements the 'Language-Tag' production of BCP 47, and so supports legacy language tags (marked as “Type: grandfathered” in BCP 47) (regular and irregular) as well as private use language tags.

Private use tags are represented as 'x-whatever', and legacy tags are converted to their canonical replacements where they exist.

Note that a few legacy tags have no modern replacement; these will be converted using the fallback described in the first paragraph, so some information might be lost.

Details

Parameters

`langtag`	the input BCP47 language tag.
`localeID`	the output buffer receiving a locale ID for the specified BCP47 language tag.
`localeIDCapacity`	the size of the locale ID output buffer.
`parsedLength`	if not NULL, successfully parsed length for the input language tag is set.
`err`	error information if receiving the locald ID failed.

Returns

the length of the locale ID.

uloc_getAvailable

U_CAPI const char *U_EXPORT2 uloc_getAvailable(
  int32_t n
)

Gets the specified locale from a list of available locales.

This method corresponds to uloc_openAvailableByType called with the ULOC_AVAILABLE_DEFAULT type argument.

The return value is a pointer to an item of a locale name array. Both this array and the pointers it contains are owned by ICU and should not be deleted or written through by the caller. The locale name is terminated by a null pointer.

Details

Parameters

`n`	the specific locale name index of the available locale list; should not exceed the number returned by uloc_countAvailable.

Returns

a specified locale name of all available locales

uloc_getBaseName

U_CAPI int32_t U_EXPORT2 uloc_getBaseName(
  const char *localeID,
  char *name,
  int32_t nameCapacity,
  UErrorCode *err
)

Gets the full name for the specified locale, like uloc_getName(), but without keywords.

This API strips off the keyword part, so "de_DE\@collation=phonebook" will become "de_DE". This API supports preflighting.

Details

Parameters

`localeID`	the locale to get the full name with
`name`	fill in buffer for the name without keywords.
`nameCapacity`	capacity of the fill in buffer.
`err`	error information if retrieving the full name failed

Returns

the actual buffer size needed for the full name. If it's greater than nameCapacity, the returned full name will be truncated.

uloc_getCharacterOrientation

U_CAPIULayoutType U_EXPORT2 uloc_getCharacterOrientation(
  const char *localeId,
  UErrorCode *status
)

Get the layout character orientation for the specified locale.

Details

Parameters

`localeId`	locale name
`status`	Error status

Returns

an enum indicating the layout orientation for characters.

uloc_getCountry

U_CAPI int32_t U_EXPORT2 uloc_getCountry(
  const char *localeID,
  char *country,
  int32_t countryCapacity,
  UErrorCode *err
)

Gets the country code for the specified locale.

Details

Parameters

`localeID`	the locale to get the country code with
`country`	the country code for localeID
`countryCapacity`	the size of the country buffer to store the country code with
`err`	error information if retrieving the country code failed

Returns

the actual buffer size needed for the country code. If it's greater than countryCapacity, the returned country code will be truncated.

uloc_getDefault

U_CAPI const char *U_EXPORT2 uloc_getDefault(
  void
)

Gets ICU's default locale.

The returned string is a snapshot in time, and will remain valid and unchanged even when uloc_setDefault() is called. The returned storage is owned by ICU, and must not be altered or deleted by the caller. On Android, uloc_setDefault() is not visible because the default Locale in ICU4C, ICU4J and java.util.Locale are synchronized. To set a default locale, call java.util.Locale::setDefault in java or by reverse JNI.

Details
Returns	the ICU default locale

uloc_getDisplayCountry

U_CAPI int32_t U_EXPORT2 uloc_getDisplayCountry(
  const char *locale,
  const char *displayLocale,
  UChar *country,
  int32_t countryCapacity,
  UErrorCode *status
)

Gets the country name suitable for display for the specified locale.

Warning: this is for the region part of a valid locale ID; it cannot just be the region code (like "FR"). To get the display name for a region alone, or for other options, use ULocaleDisplayNames instead.

Details

Parameters

`locale`	the locale to get the displayable country code with. NULL may be used to specify the default.
`displayLocale`	Specifies the locale to be used to display the name. In other words, if the locale's language code is "en", passing Locale::getFrench() for inLocale would result in "Anglais", while passing Locale::getGerman() for inLocale would result in "Englisch". NULL may be used to specify the default.
`country`	the displayable country code for localeID.
`countryCapacity`	the size of the country buffer to store the displayable country code with.
`status`	error information if retrieving the displayable country code failed. U_USING_DEFAULT_WARNING indicates that no data was found from the locale resources and a case canonicalized country code is placed into country as fallback.

Returns

the actual buffer size needed for the displayable country code. If it's greater than countryCapacity, the returned displayable country code will be truncated.

uloc_getDisplayKeyword

U_CAPI int32_t U_EXPORT2 uloc_getDisplayKeyword(
  const char *keyword,
  const char *displayLocale,
  UChar *dest,
  int32_t destCapacity,
  UErrorCode *status
)

Gets the keyword name suitable for display for the specified locale.

E.g: for the locale string de_DE@collation=PHONEBOOK, this API gets the display string for the keyword collation. Usage: UErrorCode status = U_ZERO_ERROR; const char* keyword =NULL; int32_t keywordLen = 0; int32_t keywordCount = 0; UChar displayKeyword[256]; int32_t displayKeywordLen = 0; UEnumeration* keywordEnum = uloc_openKeywords("de_DE@collation=PHONEBOOK;calendar=TRADITIONAL", &status); for(keywordCount = uenum_count(keywordEnum, &status); keywordCount > 0 ; keywordCount){ if(U_FAILURE(status)){ ...something went wrong so handle the error... break; } // the uenum_next returns NUL terminated string keyword = uenum_next(keywordEnum, &keywordLen, &status); displayKeywordLen = uloc_getDisplayKeyword(keyword, "en_US", displayKeyword, 256); ... do something interesting ..... } uenum_close(keywordEnum); See also:uloc_openKeywords

Details

Parameters

`keyword`	The keyword whose display string needs to be returned.
`displayLocale`	Specifies the locale to be used to display the name. In other words, if the locale's language code is "en", passing Locale::getFrench() for inLocale would result in "Anglais", while passing Locale::getGerman() for inLocale would result in "Englisch". NULL may be used to specify the default.
`dest`	the buffer to which the displayable keyword should be written.
`destCapacity`	The size of the buffer (number of UChars). If it is 0, then dest may be NULL and the function will only return the length of the result without writing any of the result string (pre-flighting).
`status`	error information if retrieving the displayable string failed. Should not be NULL and should not indicate failure on entry. U_USING_DEFAULT_WARNING indicates that no data was found from the locale resources and the keyword is placed into dest as fallback.

Returns

the actual buffer size needed for the displayable variant code.

uloc_getDisplayKeywordValue

U_CAPI int32_t U_EXPORT2 uloc_getDisplayKeywordValue(
  const char *locale,
  const char *keyword,
  const char *displayLocale,
  UChar *dest,
  int32_t destCapacity,
  UErrorCode *status
)

Gets the value of the keyword suitable for display for the specified locale.

E.g: for the locale string de_DE@collation=PHONEBOOK, this API gets the display string for PHONEBOOK, in the display locale, when "collation" is specified as the keyword.

Details

Parameters

`locale`	The locale to get the displayable variant code with. NULL may be used to specify the default.
`keyword`	The keyword for whose value should be used.
`displayLocale`	Specifies the locale to be used to display the name. In other words, if the locale's language code is "en", passing Locale::getFrench() for inLocale would result in "Anglais", while passing Locale::getGerman() for inLocale would result in "Englisch". NULL may be used to specify the default.
`dest`	the buffer to which the displayable keyword should be written.
`destCapacity`	The size of the buffer (number of UChars). If it is 0, then dest may be NULL and the function will only return the length of the result without writing any of the result string (pre-flighting).
`status`	error information if retrieving the displayable string failed. Should not be NULL and must not indicate failure on entry. U_USING_DEFAULT_WARNING indicates that no data was found from the locale resources and the value of the keyword is placed into dest as fallback.

Returns

the actual buffer size needed for the displayable variant code.

uloc_getDisplayLanguage

U_CAPI int32_t U_EXPORT2 uloc_getDisplayLanguage(
  const char *locale,
  const char *displayLocale,
  UChar *language,
  int32_t languageCapacity,
  UErrorCode *status
)

Gets the language name suitable for display for the specified locale.

Details

Parameters

`locale`	the locale to get the ISO language code with
`displayLocale`	Specifies the locale to be used to display the name. In other words, if the locale's language code is "en", passing Locale::getFrench() for inLocale would result in "Anglais", while passing Locale::getGerman() for inLocale would result in "Englisch".
`language`	the displayable language code for localeID
`languageCapacity`	the size of the language buffer to store the displayable language code with.
`status`	error information if retrieving the displayable language code failed. U_USING_DEFAULT_WARNING indicates that no data was found from the locale resources and a case canonicalized language code is placed into language as fallback.

Returns

the actual buffer size needed for the displayable language code. If it's greater than languageCapacity, the returned language code will be truncated.

uloc_getDisplayName

U_CAPI int32_t U_EXPORT2 uloc_getDisplayName(
  const char *localeID,
  const char *inLocaleID,
  UChar *result,
  int32_t maxResultSize,
  UErrorCode *err
)

Gets the full name suitable for display for the specified locale.

Details

Parameters

`localeID`	the locale to get the displayable name with. NULL may be used to specify the default.
`inLocaleID`	Specifies the locale to be used to display the name. In other words, if the locale's language code is "en", passing Locale::getFrench() for inLocale would result in "Anglais", while passing Locale::getGerman() for inLocale would result in "Englisch". NULL may be used to specify the default.
`result`	the displayable name for localeID
`maxResultSize`	the size of the name buffer to store the displayable full name with
`err`	error information if retrieving the displayable name failed

Returns

the actual buffer size needed for the displayable name. If it's greater than maxResultSize, the returned displayable name will be truncated.

uloc_getDisplayScript

U_CAPI int32_t U_EXPORT2 uloc_getDisplayScript(
  const char *locale,
  const char *displayLocale,
  UChar *script,
  int32_t scriptCapacity,
  UErrorCode *status
)

Gets the script name suitable for display for the specified locale.

Details

Parameters

`locale`	the locale to get the displayable script code with. NULL may be used to specify the default.
`displayLocale`	Specifies the locale to be used to display the name. In other words, if the locale's language code is "en", passing Locale::getFrench() for inLocale would result in "", while passing Locale::getGerman() for inLocale would result in "". NULL may be used to specify the default.
`script`	the displayable script for the localeID.
`scriptCapacity`	the size of the script buffer to store the displayable script code with.
`status`	error information if retrieving the displayable script code failed. U_USING_DEFAULT_WARNING indicates that no data was found from the locale resources and a case canonicalized script code is placed into script as fallback.

Returns

the actual buffer size needed for the displayable script code. If it's greater than scriptCapacity, the returned displayable script code will be truncated.

uloc_getDisplayVariant

U_CAPI int32_t U_EXPORT2 uloc_getDisplayVariant(
  const char *locale,
  const char *displayLocale,
  UChar *variant,
  int32_t variantCapacity,
  UErrorCode *status
)

Gets the variant name suitable for display for the specified locale.

Details

Parameters

`locale`	the locale to get the displayable variant code with. NULL may be used to specify the default.
`displayLocale`	Specifies the locale to be used to display the name. In other words, if the locale's language code is "en", passing Locale::getFrench() for inLocale would result in "Anglais", while passing Locale::getGerman() for inLocale would result in "Englisch". NULL may be used to specify the default.
`variant`	the displayable variant code for localeID.
`variantCapacity`	the size of the variant buffer to store the displayable variant code with.
`status`	error information if retrieving the displayable variant code failed. U_USING_DEFAULT_WARNING indicates that no data was found from the locale resources and a case canonicalized variant code is placed into variant as fallback.

Returns

the actual buffer size needed for the displayable variant code. If it's greater than variantCapacity, the returned displayable variant code will be truncated.

uloc_getISO3Country

U_CAPI const char *U_EXPORT2 uloc_getISO3Country(
  const char *localeID
)

Gets the ISO country code for the specified locale.

Details

Parameters

localeID

the locale to get the ISO country code with

Returns

country the ISO country code for localeID

uloc_getISO3Language

U_CAPI const char *U_EXPORT2 uloc_getISO3Language(
  const char *localeID
)

Gets the ISO language code for the specified locale.

Details

Parameters

localeID

the locale to get the ISO language code with

Returns

language the ISO language code for localeID

uloc_getISOCountries

U_CAPI const char *const *U_EXPORT2 uloc_getISOCountries(
  void
)

Gets a list of all available 2-letter country codes defined in ISO 639.

This is a pointer to an array of pointers to arrays of char. All of these pointers are owned by ICU do not delete them, and do not write through them. The array is terminated with a null pointer.

Details
Returns	a list of all available country codes

uloc_getISOLanguages

U_CAPI const char *const *U_EXPORT2 uloc_getISOLanguages(
  void
)

Gets a list of all available 2-letter language codes defined in ISO 639, plus additional 3-letter codes determined to be useful for locale generation as defined by Unicode CLDR.

This is a pointer to an array of pointers to arrays of char. All of these pointers are owned by ICU do not delete them, and do not write through them. The array is terminated with a null pointer.

Details
Returns	a list of all available language codes

uloc_getKeywordValue

U_CAPI int32_t U_EXPORT2 uloc_getKeywordValue(
  const char *localeID,
  const char *keywordName,
  char *buffer,
  int32_t bufferCapacity,
  UErrorCode *status
)

Get the value for a keyword.

Locale name does not need to be normalized.

Details

Parameters

`localeID`	locale name containing the keyword ("de_DE@currency=EURO;collation=PHONEBOOK")
`keywordName`	name of the keyword for which we want the value; must not be NULL or empty, and must consist only of [A-Za-z0-9]. Case insensitive.
`buffer`	receiving buffer
`bufferCapacity`	capacity of receiving buffer
`status`	containing error code: e.g. buffer not big enough or ill-formed localeID or keywordName parameters.

Returns

the length of keyword value

uloc_getLanguage

U_CAPI int32_t U_EXPORT2 uloc_getLanguage(
  const char *localeID,
  char *language,
  int32_t languageCapacity,
  UErrorCode *err
)

Gets the language code for the specified locale.

Details

Parameters

`localeID`	the locale to get the ISO language code with
`language`	the language code for localeID
`languageCapacity`	the size of the language buffer to store the language code with
`err`	error information if retrieving the language code failed

Returns

the actual buffer size needed for the language code. If it's greater than languageCapacity, the returned language code will be truncated.

uloc_getLineOrientation

U_CAPIULayoutType U_EXPORT2 uloc_getLineOrientation(
  const char *localeId,
  UErrorCode *status
)

Get the layout line orientation for the specified locale.

Details

Parameters

`localeId`	locale name
`status`	Error status

Returns

an enum indicating the layout orientation for lines.

uloc_getName

U_CAPI int32_t U_EXPORT2 uloc_getName(
  const char *localeID,
  char *name,
  int32_t nameCapacity,
  UErrorCode *err
)

Gets the full name for the specified locale.

Note: This has the effect of 'canonicalizing' the ICU locale ID to a certain extent. Upper and lower case are set as needed. It does NOT map aliased names in any way. See the top of this header file. This API supports preflighting.

Details

Parameters

`localeID`	the locale to get the full name with
`name`	fill in buffer for the name without keywords.
`nameCapacity`	capacity of the fill in buffer.
`err`	error information if retrieving the full name failed

Returns

the actual buffer size needed for the full name. If it's greater than nameCapacity, the returned full name will be truncated.

uloc_getScript

U_CAPI int32_t U_EXPORT2 uloc_getScript(
  const char *localeID,
  char *script,
  int32_t scriptCapacity,
  UErrorCode *err
)

Gets the script code for the specified locale.

Details

Parameters

`localeID`	the locale to get the ISO language code with
`script`	the language code for localeID
`scriptCapacity`	the size of the language buffer to store the language code with
`err`	error information if retrieving the language code failed

Returns

the actual buffer size needed for the language code. If it's greater than scriptCapacity, the returned language code will be truncated.

uloc_getVariant

U_CAPI int32_t U_EXPORT2 uloc_getVariant(
  const char *localeID,
  char *variant,
  int32_t variantCapacity,
  UErrorCode *err
)

Gets the variant code for the specified locale.

Details

Parameters

`localeID`	the locale to get the variant code with
`variant`	the variant code for localeID
`variantCapacity`	the size of the variant buffer to store the variant code with
`err`	error information if retrieving the variant code failed

Returns

the actual buffer size needed for the variant code. If it's greater than variantCapacity, the returned variant code will be truncated.

uloc_isRightToLeft

U_CAPIUBool U_EXPORT2 uloc_isRightToLeft(
  const char *locale
)

Returns whether the locale's script is written right-to-left.

If there is no script subtag, then the likely script is used, see uloc_addLikelySubtags(). If no likely script is known, then false is returned.

A script is right-to-left according to the CLDR script metadata which corresponds to whether the script's letters have Bidi_Class=R or AL.

Returns true for "ar" and "en-Hebr", false for "zh" and "fa-Cyrl".

Details

Parameters

locale

input locale ID

Returns

true if the locale's script is written right-to-left

uloc_minimizeSubtags

U_CAPI int32_t U_EXPORT2 uloc_minimizeSubtags(
  const char *localeID,
  char *minimizedLocaleID,
  int32_t minimizedLocaleIDCapacity,
  UErrorCode *err
)

Minimize the subtags for a provided locale ID, per the algorithm described in the following CLDR technical report:

http://www.unicode.org/reports/tr35/#Likely_Subtags

If localeID is already in the minimal form, or there is no data available for minimization, it will be copied to the output buffer. Since the minimization algorithm relies on proper maximization, see the comments for uloc_addLikelySubtags for reasons why there might not be any data.

Examples:

"en_Latn_US" minimizes to "en"

"de_Latn_US" minimizes to "de"

"sr_Cyrl_RS" minimizes to "sr"

"zh_Hant_TW" minimizes to "zh_TW" (The region is preferred to the script, and minimizing to "zh" would imply "zh_Hans_CN".)

Details

Parameters

`localeID`	The locale to minimize
`minimizedLocaleID`	The minimized locale
`minimizedLocaleIDCapacity`	The capacity of the minimizedLocaleID buffer
`err`	Error information if minimizing the locale failed. If the length of the localeID and the null-terminator is greater than the maximum allowed size, or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.

Returns

The actual buffer size needed for the minimized locale. If it's greater than minimizedLocaleIDCapacity, the returned ID will be truncated. On error, the return value is -1.

uloc_openKeywords

U_CAPIUEnumeration *U_EXPORT2 uloc_openKeywords(
  const char *localeID,
  UErrorCode *status
)

Gets an enumeration of keywords for the specified locale.

Enumeration must get disposed of by the client using uenum_close function.

Details

Parameters

`localeID`	the locale to get the variant code with
`status`	error information if retrieving the keywords failed

Returns

enumeration of keywords or NULL if there are no keywords.

uloc_setKeywordValue

U_CAPI int32_t U_EXPORT2 uloc_setKeywordValue(
  const char *keywordName,
  const char *keywordValue,
  char *buffer,
  int32_t bufferCapacity,
  UErrorCode *status
)

Sets or removes the value of the specified keyword.

For removing all keywords, use uloc_getBaseName().

NOTE: Unlike almost every other ICU function which takes a buffer, this function will NOT truncate the output text, and will not update the buffer with unterminated text setting a status of U_STRING_NOT_TERMINATED_WARNING. If a BUFFER_OVERFLOW_ERROR is received, it means a terminated version of the updated locale ID would not fit in the buffer, and the original buffer is untouched. This is done to prevent incorrect or possibly even malformed locales from being generated and used.

See also: uloc_getKeywordValue

Details

Parameters

`keywordName`	name of the keyword to be set; must not be NULL or empty, and must consist only of [A-Za-z0-9]. Case insensitive.
`keywordValue`	value of the keyword to be set. If 0-length or NULL, will result in the keyword being removed; no error is given if that keyword does not exist. Otherwise, must consist only of [A-Za-z0-9] and [/_+-].
`buffer`	input buffer containing well-formed locale ID to be modified.
`bufferCapacity`	capacity of receiving buffer
`status`	containing error code: e.g. buffer not big enough or ill-formed keywordName or keywordValue parameters, or ill-formed locale ID in buffer on input.

Returns

the length needed for the buffer

uloc_toLanguageTag

U_CAPI int32_t U_EXPORT2 uloc_toLanguageTag(
  const char *localeID,
  char *langtag,
  int32_t langtagCapacity,
  UBool strict,
  UErrorCode *err
)

Returns a well-formed language tag for this locale ID.

Note: When strict is false, any locale fields which do not satisfy the BCP47 syntax requirement will be omitted from the result. When strict is true, this function sets U_ILLEGAL_ARGUMENT_ERROR to the err if any locale fields do not satisfy the BCP47 syntax requirement.

Details

Parameters

`localeID`	the input locale ID
`langtag`	the output buffer receiving BCP47 language tag for the locale ID.
`langtagCapacity`	the size of the BCP47 language tag output buffer.
`strict`	boolean value indicating if the function returns an error for an ill-formed input locale ID.
`err`	error information if receiving the language tag failed.

Returns

The length of the BCP47 language tag.

uloc_toLegacyKey

U_CAPI const char *U_EXPORT2 uloc_toLegacyKey(
  const char *keyword
)

Converts the specified keyword (BCP 47 Unicode locale extension key, or legacy key) to the legacy key.

For example, legacy key "collation" is returned for the input BCP 47 Unicode locale extension key "co".

See also:toUnicodeLocaleKey

Details

Parameters

keyword

the input locale keyword (either BCP 47 Unicode locale extension key or legacy key).

Returns

the well-formed legacy key, or NULL if the specified keyword cannot be mapped to a well-formed legacy key.

uloc_toLegacyType

U_CAPI const char *U_EXPORT2 uloc_toLegacyType(
  const char *keyword,
  const char *value
)

Converts the specified keyword value (BCP 47 Unicode locale extension type, or legacy type or type alias) to the canonical legacy type.

For example, the legacy type "phonebook" is returned for the input BCP 47 Unicode locale extension type "phonebk" with the keyword "collation" (or "co").

When the specified keyword is not recognized, but the specified value satisfies the syntax of legacy key, or when the specified keyword allows 'variable' type and the specified value satisfies the syntax, then the pointer to the input type value itself will be returned. For example, uloc_toLegacyType("Foo", "Bar") returns "Bar", uloc_toLegacyType("vt", "00A4") returns "00A4".

See also:toUnicodeLocaleType

Details

Parameters

`keyword`	the locale keyword (either legacy keyword such as "collation" or BCP 47 Unicode locale extension key such as "co").
`value`	the locale keyword value (either BCP 47 Unicode locale extension type such as "phonebk" or legacy keyword value such as "phonebook").

Returns

the well-formed legacy type, or NULL if the specified keyword value cannot be mapped to a well-formed legacy type.

uloc_toUnicodeLocaleKey

U_CAPI const char *U_EXPORT2 uloc_toUnicodeLocaleKey(
  const char *keyword
)

Converts the specified keyword (legacy key, or BCP 47 Unicode locale extension key) to the equivalent BCP 47 Unicode locale extension key.

For example, BCP 47 Unicode locale extension key "co" is returned for the input keyword "collation".

When the specified keyword is unknown, but satisfies the BCP syntax, then the pointer to the input keyword itself will be returned. For example, uloc_toUnicodeLocaleKey("ZZ") returns "ZZ".

See also: uloc_toLegacyKey

Details

Parameters

keyword

the input locale keyword (either legacy key such as "collation" or BCP 47 Unicode locale extension key such as "co").

Returns

the well-formed BCP 47 Unicode locale extension key, or NULL if the specified locale keyword cannot be mapped to a well-formed BCP 47 Unicode locale extension key.

uloc_toUnicodeLocaleType

U_CAPI const char *U_EXPORT2 uloc_toUnicodeLocaleType(
  const char *keyword,
  const char *value
)

Converts the specified keyword value (legacy type, or BCP 47 Unicode locale extension type) to the well-formed BCP 47 Unicode locale extension type for the specified keyword (category).

For example, BCP 47 Unicode locale extension type "phonebk" is returned for the input keyword value "phonebook", with the keyword "collation" (or "co").

When the specified keyword is not recognized, but the specified value satisfies the syntax of the BCP 47 Unicode locale extension type, or when the specified keyword allows 'variable' type and the specified value satisfies the syntax, then the pointer to the input type value itself will be returned. For example, uloc_toUnicodeLocaleType("Foo", "Bar") returns "Bar", uloc_toUnicodeLocaleType("variableTop", "00A4") returns "00A4".

See also: uloc_toLegacyType

Details

Parameters

`keyword`	the locale keyword (either legacy key such as "collation" or BCP 47 Unicode locale extension key such as "co").
`value`	the locale keyword value (either legacy type such as "phonebook" or BCP 47 Unicode locale extension type such as "phonebk").

Returns

the well-formed BCP47 Unicode locale extension type, or NULL if the locale keyword value cannot be mapped to a well-formed BCP 47 Unicode locale extension type.

ulocdata_getCLDRVersion

U_CAPI void U_EXPORT2 ulocdata_getCLDRVersion(
  UVersionInfo versionArray,
  UErrorCode *status
)

Return the current CLDR version used by the library.

Details

Parameters

`versionArray`	fill-in that will receive the version number
`status`	error code - could be U_MISSING_RESOURCE_ERROR if the version was not found.

unorm2_append

U_CAPI int32_t U_EXPORT2 unorm2_append(
  const UNormalizer2 *norm2,
  UChar *first,
  int32_t firstLength,
  int32_t firstCapacity,
  const UChar *second,
  int32_t secondLength,
  UErrorCode *pErrorCode
)

Appends the second string to the first string (merging them at the boundary) and returns the length of the first string.

The result is normalized if both the strings were normalized. The first and second strings must be different buffers.

Details

Parameters

`norm2`	UNormalizer2 instance
`first`	string, should be normalized
`firstLength`	length of the first string, or -1 if NUL-terminated
`firstCapacity`	number of UChars that can be written to first
`second`	string, should be normalized
`secondLength`	length of the source string, or -1 if NUL-terminated
`pErrorCode`	Standard ICU error code. Its input value must pass the U_SUCCESS() test, or else the function returns immediately. Check for U_FAILURE() on output or use with function chaining. (See User Guide for details.)

Returns

first

unorm2_close

U_CAPI void U_EXPORT2 unorm2_close(
  UNormalizer2 *norm2
)

Closes a UNormalizer2 instance from unorm2_openFiltered().

Do not close instances from unorm2_getInstance()!

Details

Parameters

norm2

UNormalizer2 instance to be closed

unorm2_composePair

U_CAPIUChar32 U_EXPORT2 unorm2_composePair(
  const UNormalizer2 *norm2,
  UChar32 a,
  UChar32 b
)

Performs pairwise composition of a & b and returns the composite if there is one.

Returns a composite code point c only if c has a two-way mapping to a+b. In standard Unicode normalization, this means that c has a canonical decomposition to a+b and c does not have the Full_Composition_Exclusion property.

This function is independent of the mode of the UNormalizer2.

Details

Parameters

`norm2`	UNormalizer2 instance
`a`	A (normalization starter) code point.
`b`	Another code point.

Returns

The non-negative composite code point if there is one; otherwise a negative value.

unorm2_getCombiningClass

U_CAPI uint8_t U_EXPORT2 unorm2_getCombiningClass(
  const UNormalizer2 *norm2,
  UChar32 c
)

Gets the combining class of c.

The default implementation returns 0 but all standard implementations return the Unicode Canonical_Combining_Class value.

Details

Parameters

`norm2`	UNormalizer2 instance
`c`	code point

Returns

c's combining class

unorm2_getDecomposition

U_CAPI int32_t U_EXPORT2 unorm2_getDecomposition(
  const UNormalizer2 *norm2,
  UChar32 c,
  UChar *decomposition,
  int32_t capacity,
  UErrorCode *pErrorCode
)

Gets the decomposition mapping of c.

Roughly equivalent to normalizing the String form of c on a UNORM2_DECOMPOSE UNormalizer2 instance, but much faster, and except that this function returns a negative value and does not write a string if c does not have a decomposition mapping in this instance's data. This function is independent of the mode of the UNormalizer2.

Details

Parameters

`norm2`	UNormalizer2 instance
`c`	code point
`decomposition`	String buffer which will be set to c's decomposition mapping, if there is one.
`capacity`	number of UChars that can be written to decomposition
`pErrorCode`	Standard ICU error code. Its input value must pass the U_SUCCESS() test, or else the function returns immediately. Check for U_FAILURE() on output or use with function chaining. (See User Guide for details.)

Returns

the non-negative length of c's decomposition, if there is one; otherwise a negative value

unorm2_getNFCInstance

U_CAPI const UNormalizer2 *U_EXPORT2 unorm2_getNFCInstance(
  UErrorCode *pErrorCode
)

Returns a UNormalizer2 instance for Unicode NFC normalization.

Same as unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE, pErrorCode). Returns an unmodifiable singleton instance. Do not delete it.

Details

Parameters

pErrorCode

Standard ICU error code. Its input value must pass the U_SUCCESS() test, or else the function returns immediately. Check for U_FAILURE() on output or use with function chaining. (See User Guide for details.)

Returns

the requested Normalizer2, if successful

unorm2_getNFDInstance

U_CAPI const UNormalizer2 *U_EXPORT2 unorm2_getNFDInstance(
  UErrorCode *pErrorCode
)

Returns a UNormalizer2 instance for Unicode NFD normalization.

Same as unorm2_getInstance(NULL, "nfc", UNORM2_DECOMPOSE, pErrorCode). Returns an unmodifiable singleton instance. Do not delete it.

Details

Parameters

pErrorCode

Returns

the requested Normalizer2, if successful

unorm2_getNFKCCasefoldInstance

U_CAPI const UNormalizer2 *U_EXPORT2 unorm2_getNFKCCasefoldInstance(
  UErrorCode *pErrorCode
)

Returns a UNormalizer2 instance for Unicode NFKC_Casefold normalization.

Same as unorm2_getInstance(NULL, "nfkc_cf", UNORM2_COMPOSE, pErrorCode). Returns an unmodifiable singleton instance. Do not delete it.

Details

Parameters

pErrorCode

Returns

the requested Normalizer2, if successful

unorm2_getNFKCInstance

U_CAPI const UNormalizer2 *U_EXPORT2 unorm2_getNFKCInstance(
  UErrorCode *pErrorCode
)

Returns a UNormalizer2 instance for Unicode NFKC normalization.

Same as unorm2_getInstance(NULL, "nfkc", UNORM2_COMPOSE, pErrorCode). Returns an unmodifiable singleton instance. Do not delete it.

Details

Parameters

pErrorCode

Returns

the requested Normalizer2, if successful

unorm2_getNFKDInstance

U_CAPI const UNormalizer2 *U_EXPORT2 unorm2_getNFKDInstance(
  UErrorCode *pErrorCode
)

Returns a UNormalizer2 instance for Unicode NFKD normalization.

Same as unorm2_getInstance(NULL, "nfkc", UNORM2_DECOMPOSE, pErrorCode). Returns an unmodifiable singleton instance. Do not delete it.

Details

Parameters

pErrorCode

Returns

the requested Normalizer2, if successful

unorm2_getRawDecomposition

U_CAPI int32_t U_EXPORT2 unorm2_getRawDecomposition(
  const UNormalizer2 *norm2,
  UChar32 c,
  UChar *decomposition,
  int32_t capacity,
  UErrorCode *pErrorCode
)

Gets the raw decomposition mapping of c.

This is similar to the unorm2_getDecomposition() function but returns the raw decomposition mapping as specified in UnicodeData.txt or (for custom data) in the mapping files processed by the gennorm2 tool. By contrast, unorm2_getDecomposition() returns the processed, recursively-decomposed version of this mapping.

When used on a standard NFKC Normalizer2 instance, unorm2_getRawDecomposition() returns the Unicode Decomposition_Mapping (dm) property.

When used on a standard NFC Normalizer2 instance, it returns the Decomposition_Mapping only if the Decomposition_Type (dt) is Canonical (Can); in this case, the result contains either one or two code points (=1..4 UChars).

This function is independent of the mode of the UNormalizer2.

Details

Parameters

`norm2`	UNormalizer2 instance
`c`	code point
`decomposition`	String buffer which will be set to c's raw decomposition mapping, if there is one.
`capacity`	number of UChars that can be written to decomposition
`pErrorCode`	Standard ICU error code. Its input value must pass the U_SUCCESS() test, or else the function returns immediately. Check for U_FAILURE() on output or use with function chaining. (See User Guide for details.)

Returns

the non-negative length of c's raw decomposition, if there is one; otherwise a negative value

unorm2_hasBoundaryAfter

U_CAPIUBool U_EXPORT2 unorm2_hasBoundaryAfter(
  const UNormalizer2 *norm2,
  UChar32 c
)

Tests if the character always has a normalization boundary after it, regardless of context.

For details see the Normalizer2 base class documentation.

Details

Parameters

`norm2`	UNormalizer2 instance
`c`	character to test

Returns

true if c has a normalization boundary after it

unorm2_hasBoundaryBefore

U_CAPIUBool U_EXPORT2 unorm2_hasBoundaryBefore(
  const UNormalizer2 *norm2,
  UChar32 c
)

Tests if the character always has a normalization boundary before it, regardless of context.

For details see the Normalizer2 base class documentation.

Details

Parameters

`norm2`	UNormalizer2 instance
`c`	character to test

Returns

true if c has a normalization boundary before it

unorm2_isInert

U_CAPIUBool U_EXPORT2 unorm2_isInert(
  const UNormalizer2 *norm2,
  UChar32 c
)

Tests if the character is normalization-inert.

For details see the Normalizer2 base class documentation.

Details

Parameters

`norm2`	UNormalizer2 instance
`c`	character to test

Returns

true if c is normalization-inert

unorm2_isNormalized

U_CAPIUBool U_EXPORT2 unorm2_isNormalized(
  const UNormalizer2 *norm2,
  const UChar *s,
  int32_t length,
  UErrorCode *pErrorCode
)

Tests if the string is normalized.

Internally, in cases where the quickCheck() method would return "maybe" (which is only possible for the two COMPOSE modes) this method resolves to "yes" or "no" to provide a definitive result, at the cost of doing more work in those cases.

Details

Parameters

`norm2`	UNormalizer2 instance
`s`	input string
`length`	length of the string, or -1 if NUL-terminated
`pErrorCode`	Standard ICU error code. Its input value must pass the U_SUCCESS() test, or else the function returns immediately. Check for U_FAILURE() on output or use with function chaining. (See User Guide for details.)

Returns

true if s is normalized

unorm2_normalize

U_CAPI int32_t U_EXPORT2 unorm2_normalize(
  const UNormalizer2 *norm2,
  const UChar *src,
  int32_t length,
  UChar *dest,
  int32_t capacity,
  UErrorCode *pErrorCode
)

Writes the normalized form of the source string to the destination string (replacing its contents) and returns the length of the destination string.

The source and destination strings must be different buffers.

Details

Parameters

`norm2`	UNormalizer2 instance
`src`	source string
`length`	length of the source string, or -1 if NUL-terminated
`dest`	destination string; its contents is replaced with normalized src
`capacity`	number of UChars that can be written to dest
`pErrorCode`	Standard ICU error code. Its input value must pass the U_SUCCESS() test, or else the function returns immediately. Check for U_FAILURE() on output or use with function chaining. (See User Guide for details.)

Returns

dest

unorm2_normalizeSecondAndAppend

U_CAPI int32_t U_EXPORT2 unorm2_normalizeSecondAndAppend(
  const UNormalizer2 *norm2,
  UChar *first,
  int32_t firstLength,
  int32_t firstCapacity,
  const UChar *second,
  int32_t secondLength,
  UErrorCode *pErrorCode
)

Appends the normalized form of the second string to the first string (merging them at the boundary) and returns the length of the first string.

The result is normalized if the first string was normalized. The first and second strings must be different buffers.

Details

Parameters

`norm2`	UNormalizer2 instance
`first`	string, should be normalized
`firstLength`	length of the first string, or -1 if NUL-terminated
`firstCapacity`	number of UChars that can be written to first
`second`	string, will be normalized
`secondLength`	length of the source string, or -1 if NUL-terminated
`pErrorCode`	Standard ICU error code. Its input value must pass the U_SUCCESS() test, or else the function returns immediately. Check for U_FAILURE() on output or use with function chaining. (See User Guide for details.)

Returns

first

unorm2_quickCheck

U_CAPIUNormalizationCheckResult U_EXPORT2 unorm2_quickCheck(
  const UNormalizer2 *norm2,
  const UChar *s,
  int32_t length,
  UErrorCode *pErrorCode
)

Tests if the string is normalized.

For the two COMPOSE modes, the result could be "maybe" in cases that would take a little more work to resolve definitively. Use spanQuickCheckYes() and normalizeSecondAndAppend() for a faster combination of quick check + normalization, to avoid re-checking the "yes" prefix.

Details

Parameters

`norm2`	UNormalizer2 instance
`s`	input string
`length`	length of the string, or -1 if NUL-terminated
`pErrorCode`	Standard ICU error code. Its input value must pass the U_SUCCESS() test, or else the function returns immediately. Check for U_FAILURE() on output or use with function chaining. (See User Guide for details.)

Returns

UNormalizationCheckResult

unorm2_spanQuickCheckYes

U_CAPI int32_t U_EXPORT2 unorm2_spanQuickCheckYes(
  const UNormalizer2 *norm2,
  const UChar *s,
  int32_t length,
  UErrorCode *pErrorCode
)

Returns the end of the normalized substring of the input string.

In other words, with end=spanQuickCheckYes(s, ec); the substring UnicodeString(s, 0, end) will pass the quick check with a "yes" result.

The returned end index is usually one or more characters before the "no" or "maybe" character: The end index is at a normalization boundary. (See the class documentation for more about normalization boundaries.)

When the goal is a normalized string and most input strings are expected to be normalized already, then call this method, and if it returns a prefix shorter than the input string, copy that prefix and use normalizeSecondAndAppend() for the remainder.

Details

Parameters

`norm2`	UNormalizer2 instance
`s`	input string
`length`	length of the string, or -1 if NUL-terminated
`pErrorCode`	Standard ICU error code. Its input value must pass the U_SUCCESS() test, or else the function returns immediately. Check for U_FAILURE() on output or use with function chaining. (See User Guide for details.)

Returns

"yes" span end index

uscript_breaksBetweenLetters

U_CAPIUBool U_EXPORT2 uscript_breaksBetweenLetters(
  UScriptCode script
)

Returns true if the script allows line breaks between letters (excluding hyphenation).

Such a script typically requires dictionary-based line breaking. For example, Hani and Thai.

Details

Parameters

script

script code

Returns

true if the script allows line breaks between letters

uscript_getCode

U_CAPI int32_t U_EXPORT2 uscript_getCode(
  const char *nameOrAbbrOrLocale,
  UScriptCode *fillIn,
  int32_t capacity,
  UErrorCode *err
)

Gets the script codes associated with the given locale or ISO 15924 abbreviation or name.

Fills in USCRIPT_MALAYALAM given "Malayam" OR "Mlym". Fills in USCRIPT_LATIN given "en" OR "en_US" If the required capacity is greater than the capacity of the destination buffer, then the error code is set to U_BUFFER_OVERFLOW_ERROR and the required capacity is returned.

Note: To search by short or long script alias only, use u_getPropertyValueEnum(UCHAR_SCRIPT, alias) instead. That does a fast lookup with no access of the locale data.

Details

Parameters

`nameOrAbbrOrLocale`	name of the script, as given in PropertyValueAliases.txt, or ISO 15924 code or locale
`fillIn`	the UScriptCode buffer to fill in the script code
`capacity`	the capacity (size) of UScriptCode buffer passed in.
`err`	the error status code.

Returns

The number of script codes filled in the buffer passed in

uscript_getName

U_CAPI const char *U_EXPORT2 uscript_getName(
  UScriptCode scriptCode
)

Returns the long Unicode script name, if there is one.

Otherwise returns the 4-letter ISO 15924 script code. Returns "Malayam" given USCRIPT_MALAYALAM.

Details

Parameters

scriptCode

UScriptCode enum

Returns

long script name as given in PropertyValueAliases.txt, or the 4-letter code, or NULL if scriptCode is invalid

uscript_getSampleString

U_CAPI int32_t U_EXPORT2 uscript_getSampleString(
  UScriptCode script,
  UChar *dest,
  int32_t capacity,
  UErrorCode *pErrorCode
)

Writes the script sample character string.

This string normally consists of one code point but might be longer. The string is empty if the script is not encoded.

Details

Parameters

`script`	script code
`dest`	output string array
`capacity`	number of UChars in the dest array
`pErrorCode`	standard ICU in/out error code, must pass U_SUCCESS() on input

Returns

the string length, even if U_BUFFER_OVERFLOW_ERROR

uscript_getScript

U_CAPIUScriptCode U_EXPORT2 uscript_getScript(
  UChar32 codepoint,
  UErrorCode *err
)

Gets the script code associated with the given codepoint.

Returns USCRIPT_MALAYALAM given 0x0D02

Details

Parameters

`codepoint`	UChar32 codepoint
`err`	the error status code.

Returns

The UScriptCode, or 0 if codepoint is invalid

uscript_getScriptExtensions

U_CAPI int32_t U_EXPORT2 uscript_getScriptExtensions(
  UChar32 c,
  UScriptCode *scripts,
  int32_t capacity,
  UErrorCode *errorCode
)

Writes code point c's Script_Extensions as a list of UScriptCode values to the output scripts array and returns the number of script codes.

If c does have Script_Extensions, then the Script property value (normally Common or Inherited) is not included.
If c does not have Script_Extensions, then the one Script code is written to the output array.
If c is not a valid code point, then the one USCRIPT_UNKNOWN code is written. In other words, if the return value is 1, then the output array contains exactly c's single Script code. If the return value is n>=2, then the output array contains c's n Script_Extensions script codes.

Some characters are commonly used in multiple scripts. For more information, see UAX #24: http://www.unicode.org/reports/tr24/.

If there are more than capacity script codes to be written, then U_BUFFER_OVERFLOW_ERROR is set and the number of Script_Extensions is returned. (Usual ICU buffer handling behavior.)

Details

Parameters

`c`	code point
`scripts`	output script code array
`capacity`	capacity of the scripts array
`errorCode`	Standard ICU error code. Its input value must pass the U_SUCCESS() test, or else the function returns immediately. Check for U_FAILURE() on output or use with function chaining. (See User Guide for details.)

Returns

number of script codes in c's Script_Extensions, or 1 for the single Script value, written to scripts unless U_BUFFER_OVERFLOW_ERROR indicates insufficient capacity

uscript_getShortName

U_CAPI const char *U_EXPORT2 uscript_getShortName(
  UScriptCode scriptCode
)

Returns the 4-letter ISO 15924 script code, which is the same as the short Unicode script name if Unicode has names for the script.

Returns "Mlym" given USCRIPT_MALAYALAM.

Details

Parameters

scriptCode

UScriptCode enum

Returns

short script name (4-letter code), or NULL if scriptCode is invalid

uscript_getUsage

U_CAPIUScriptUsage U_EXPORT2 uscript_getUsage(
  UScriptCode script
)

Returns the script usage according to UAX #31 Unicode Identifier and Pattern Syntax.

Returns USCRIPT_USAGE_NOT_ENCODED if the script is not encoded in Unicode.

See also: UScriptUsage

Details

Parameters

script

script code

Returns

script usage

uscript_hasScript

U_CAPIUBool U_EXPORT2 uscript_hasScript(
  UChar32 c,
  UScriptCode sc
)

Do the Script_Extensions of code point c contain script sc? If c does not have explicit Script_Extensions, then this tests whether c has the Script property value sc.

Some characters are commonly used in multiple scripts. For more information, see UAX #24: http://www.unicode.org/reports/tr24/.

Details

Parameters

`c`	code point
`sc`	script code

Returns

true if sc is in Script_Extensions(c)

uscript_isCased

U_CAPIUBool U_EXPORT2 uscript_isCased(
  UScriptCode script
)

Returns true if in modern (or most recent) usage of the script case distinctions are customary.

For example, Latn and Cyrl.

Details

Parameters

script

script code

Returns

true if the script is cased

uscript_isRightToLeft

U_CAPIUBool U_EXPORT2 uscript_isRightToLeft(
  UScriptCode script
)

Returns true if the script is written right-to-left.

For example, Arab and Hebr.

Details

Parameters

script

script code

Returns

true if the script is right-to-left

utext_char32At

U_CAPIUChar32 U_EXPORT2 utext_char32At(
  UText *ut,
  int64_t nativeIndex
)

Returns the code point at the requested index, or U_SENTINEL (-1) if it is out of bounds.

If the specified index points to the interior of a multi-unit character - one of the trail bytes of a UTF-8 sequence, for example - the complete code point will be returned.

The iteration position will be set to the start of the returned code point.

This function is roughly equivalent to the sequence utext_setNativeIndex(index); utext_current32(); (There is a subtle difference if the index is out of bounds by being less than zero - utext_setNativeIndex(negative value) sets the index to zero, after which utext_current() will return the char at zero. utext_char32At(negative index), on the other hand, will return the U_SENTINEL value of -1.)

Details

Parameters

`ut`	the text to be accessed
`nativeIndex`	the native index of the character to be accessed. If the index points to other than the first unit of a multi-unit character, it will be adjusted to the start of the character.

Returns

the code point at the specified index.

utext_clone

U_CAPIUText *U_EXPORT2 utext_clone(
  UText *dest,
  const UText *src,
  UBool deep,
  UBool readOnly,
  UErrorCode *status
)

Clone a UText.

This is much like opening a UText where the source text is itself another UText.

A deep clone will copy both the UText data structures and the underlying text. The original and cloned UText will operate completely independently; modifications made to the text in one will not affect the other. Text providers are not required to support deep clones. The user of clone() must check the status return and be prepared to handle failures.

The standard UText implementations for UTF8, UChar *, UnicodeString and Replaceable all support deep cloning.

The UText returned from a deep clone will be writable, assuming that the text provider is able to support writing, even if the source UText had been made non-writable by means of UText_freeze().

A shallow clone replicates only the UText data structures; it does not make a copy of the underlying text. Shallow clones can be used as an efficient way to have multiple iterators active in a single text string that is not being modified.

A shallow clone operation will not fail, barring truly exceptional conditions such as memory allocation failures.

Shallow UText clones should be avoided if the UText functions that modify the text are expected to be used, either on the original or the cloned UText. Any such modifications can cause unpredictable behavior. Read Only shallow clones provide some protection against errors of this type by disabling text modification via the cloned UText.

A shallow clone made with the readOnly parameter == false will preserve the utext_isWritable() state of the source object. Note, however, that write operations must be avoided while more than one UText exists that refer to the same underlying text.

A UText and its clone may be safely concurrently accessed by separate threads. This is true for read access only with shallow clones, and for both read and write access with deep clones. It is the responsibility of the Text Provider to ensure that this thread safety constraint is met.

Details

Parameters

`dest`	A UText struct to be filled in with the result of the clone operation, or NULL if the clone function should heap-allocate a new UText struct. If non-NULL, must refer to an already existing UText, which will then be reset to become the clone.
`src`	The UText to be cloned.
`deep`	true to request a deep clone, false for a shallow clone.
`readOnly`	true to request that the cloned UText have read only access to the underlying text.
`status`	Errors are returned here. For deep clones, U_UNSUPPORTED_ERROR will be returned if the text provider is unable to clone the original text.

Returns

The newly created clone, or NULL if the clone operation failed.

utext_close

U_CAPIUText *U_EXPORT2 utext_close(
  UText *ut
)

Close function for UText instances.

Cleans up, releases any resources being held by an open UText.

If the UText was originally allocated by one of the utext_open functions, the storage associated with the utext will also be freed. If the UText storage originated with the application, as it would with a local or static instance, the storage will not be deleted.

An open UText can be reset to refer to new string by using one of the utext_open() functions without first closing the UText.

Details

Parameters

`ut`	The UText to be closed.

Returns

NULL if the UText struct was deleted by the close. If the UText struct was originally provided by the caller to the open function, it is returned by this function, and may be safely used again in a subsequent utext_open.

utext_current32

U_CAPIUChar32 U_EXPORT2 utext_current32(
  UText *ut
)

Get the code point at the current iteration position, or U_SENTINEL (-1) if the iteration has reached the end of the input text.

Details

Parameters

`ut`	the text to be accessed.

Returns

the Unicode code point at the current iterator position.

utext_equals

U_CAPIUBool U_EXPORT2 utext_equals(
  const UText *a,
  const UText *b
)

Compare two UText objects for equality.

UTexts are equal if they are iterating over the same text, and have the same iteration position within the text. If either or both of the parameters are NULL, the comparison is false.

Details

Parameters

`a`	The first of the two UTexts to compare.
`b`	The other UText to be compared.

Returns

true if the two UTexts are equal.

utext_extract

U_CAPI int32_t U_EXPORT2 utext_extract(
  UText *ut,
  int64_t nativeStart,
  int64_t nativeLimit,
  UChar *dest,
  int32_t destCapacity,
  UErrorCode *status
)

Extract text from a UText into a UChar buffer.

The range of text to be extracted is specified in the native indices of the UText provider. These may not necessarily be UTF-16 indices.

The size (number of 16 bit UChars) of the data to be extracted is returned. The full number of UChars is returned, even when the extracted text is truncated because the specified buffer size is too small.

The extracted string will (if you are a user) / must (if you are a text provider) be NUL-terminated if there is sufficient space in the destination buffer. This terminating NUL is not included in the returned length.

The iteration index is left at the position following the last extracted character.

Details

Parameters

`ut`	the UText from which to extract data.
`nativeStart`	the native index of the first character to extract.\ If the specified index is out of range, it will be pinned to be within 0 <= index <= textLength
`nativeLimit`	the native string index of the position following the last character to extract. If the specified index is out of range, it will be pinned to be within 0 <= index <= textLength. nativeLimit must be >= nativeStart.
`dest`	the UChar (UTF-16) buffer into which the extracted text is placed
`destCapacity`	The size, in UChars, of the destination buffer. May be zero for precomputing the required size.
`status`	receives any error status. U_BUFFER_OVERFLOW_ERROR: the extracted text was truncated because the buffer was too small. Returns number of UChars for preflighting.

Returns

Number of UChars in the data to be extracted. Does not include a trailing NUL.

utext_getNativeIndex

U_CAPI int64_t U_EXPORT2 utext_getNativeIndex(
  const UText *ut
)

Get the current iterator position, which can range from 0 to the length of the text.

The position is a native index into the input text, in whatever format it may have (possibly UTF-8 for example), and may not always be the same as the corresponding UChar (UTF-16) index. The returned position will always be aligned to a code point boundary.

Details

Parameters

`ut`	the text to be accessed.

Returns

the current index position, in the native units of the text provider.

utext_getPreviousNativeIndex

U_CAPI int64_t U_EXPORT2 utext_getPreviousNativeIndex(
  UText *ut
)

Get the native index of the character preceding the current position.

If the iteration position is already at the start of the text, zero is returned. The value returned is the same as that obtained from the following sequence, but without the side effect of changing the iteration position.

UText  *ut = whatever;
  ...
utext_previous(ut)
utext_getNativeIndex(ut);

This function is most useful during forwards iteration, where it will get the native index of the character most recently returned from utext_next().

Details

Parameters

`ut`	the text to be accessed

Returns

the native index of the character preceding the current index position, or zero if the current position is at the start of the text.

utext_moveIndex32

U_CAPIUBool U_EXPORT2 utext_moveIndex32(
  UText *ut,
  int32_t delta
)

Move the iterator position by delta code points.

The number of code points is a signed number; a negative delta will move the iterator backwards, towards the start of the text.

The index is moved by delta code points forward or backward, but no further backward than to 0 and no further forward than to utext_nativeLength(). The resulting index value will be in between 0 and length, inclusive.

Details

Parameters

`ut`	the text to be accessed.
`delta`	the signed number of code points to move the iteration position.

Returns

true if the position could be moved the requested number of positions while staying within the range [0 - text length].

utext_nativeLength

U_CAPI int64_t U_EXPORT2 utext_nativeLength(
  UText *ut
)

Get the length of the text.

Depending on the characteristics of the underlying text representation, this may be expensive. See also:utext_isLengthExpensive()

Details

Parameters

`ut`	the text to be accessed.

Returns

the length of the text, expressed in native units.

utext_next32

U_CAPIUChar32 U_EXPORT2 utext_next32(
  UText *ut
)

Get the code point at the current iteration position of the UText, and advance the position to the first index following the character.

If the position is at the end of the text (the index following the last character, which is also the length of the text), return U_SENTINEL (-1) and do not advance the index.

This is a post-increment operation.

An inline macro version of this function, UTEXT_NEXT32(), is available for performance critical use.

See also:UTEXT_NEXT32

Details

Parameters

`ut`	the text to be accessed.

Returns

the Unicode code point at the iteration position.

utext_next32From

U_CAPIUChar32 U_EXPORT2 utext_next32From(
  UText *ut,
  int64_t nativeIndex
)

Set the iteration index and return the code point at that index.

Leave the iteration index at the start of the following code point.

This function is the most efficient and convenient way to begin a forward iteration. The results are identical to the those from the sequence

utext_setIndex();
utext_next32();

Details

Parameters

`ut`	the text to be accessed.
`nativeIndex`	Iteration index, in the native units of the text provider.

Returns

Code point which starts at or before index, or U_SENTINEL (-1) if it is out of bounds.

utext_openUChars

U_CAPIUText *U_EXPORT2 utext_openUChars(
  UText *ut,
  const UChar *s,
  int64_t length,
  UErrorCode *status
)

Open a read-only UText for UChar * string.

Details

Parameters

`ut`	Pointer to a UText struct. If NULL, a new UText will be created. If non-NULL, must refer to an initialized UText struct, which will then be reset to reference the specified UChar string.
`s`	A UChar (UTF-16) string
`length`	The number of UChars in the input string, or -1 if the string is zero terminated.
`status`	Errors are returned here.

Returns

A pointer to the UText. If a pre-allocated UText was provided, it will always be used and returned.

utext_openUTF8

U_CAPIUText *U_EXPORT2 utext_openUTF8(
  UText *ut,
  const char *s,
  int64_t length,
  UErrorCode *status
)

Open a read-only UText implementation for UTF-8 strings.

Any invalid UTF-8 in the input will be handled in this way: a sequence of bytes that has the form of a truncated, but otherwise valid, UTF-8 sequence will be replaced by a single unicode replacement character, \uFFFD. Any other illegal bytes will each be replaced by a \uFFFD.

Details

Parameters

`ut`	Pointer to a UText struct. If NULL, a new UText will be created. If non-NULL, must refer to an initialized UText struct, which will then be reset to reference the specified UTF-8 string.
`s`	A UTF-8 string. Must not be NULL.
`length`	The length of the UTF-8 string in bytes, or -1 if the string is zero terminated.
`status`	Errors are returned here.

Returns

A pointer to the UText. If a pre-allocated UText was provided, it will always be used and returned.

utext_previous32

U_CAPIUChar32 U_EXPORT2 utext_previous32(
  UText *ut
)

Move the iterator position to the character (code point) whose index precedes the current position, and return that character.

This is a pre-decrement operation.

If the initial position is at the start of the text (index of 0) return U_SENTINEL (-1), and leave the position unchanged.

An inline macro version of this function, UTEXT_PREVIOUS32(), is available for performance critical use.

See also:UTEXT_PREVIOUS32

Details

Parameters

`ut`	the text to be accessed.

Returns

the previous UChar32 code point, or U_SENTINEL (-1) if the iteration has reached the start of the text.

utext_previous32From

U_CAPIUChar32 U_EXPORT2 utext_previous32From(
  UText *ut,
  int64_t nativeIndex
)

Set the iteration index, and return the code point preceding the one specified by the initial index.

Leave the iteration position at the start of the returned code point.

This function is the most efficient and convenient way to begin a backwards iteration.

Details

Parameters

`ut`	the text to be accessed.
`nativeIndex`	Iteration index in the native units of the text provider.

Returns

Code point preceding the one at the initial index, or U_SENTINEL (-1) if it is out of bounds.

utext_setNativeIndex

U_CAPI void U_EXPORT2 utext_setNativeIndex(
  UText *ut,
  int64_t nativeIndex
)

Set the current iteration position to the nearest code point boundary at or preceding the specified index.

The index is in the native units of the original input text. If the index is out of range, it will be pinned to be within the range of the input text.

It will usually be more efficient to begin an iteration using the functions utext_next32From() or utext_previous32From() rather than setIndex().

Moving the index position to an adjacent character is best done with utext_next32(), utext_previous32() or utext_moveIndex32(). Attempting to do direct arithmetic on the index position is complicated by the fact that the size (in native units) of a character depends on the underlying representation of the character (UTF-8, UTF-16, UTF-32, arbitrary codepage), and is not easily knowable.

Details

Parameters

`ut`	the text to be accessed.
`nativeIndex`	the native unit index of the new iteration position.

utrans_clone

U_CAPIUTransliterator *U_EXPORT2 utrans_clone(
  const UTransliterator *trans,
  UErrorCode *status
)

Create a copy of a transliterator.

Any non-NULL result from this function should later be closed with utrans_close().

Details

Parameters

`trans`	the transliterator to be copied.
`status`	a pointer to the UErrorCode

Returns

a transliterator pointer that may be passed to other utrans_xxx() functions, or NULL if the clone call fails.

utrans_close

U_CAPI void U_EXPORT2 utrans_close(
  UTransliterator *trans
)

Close a transliterator.

Any non-NULL pointer returned by utrans_openXxx() or utrans_clone() should eventually be closed.

Details

Parameters

trans

the transliterator to be closed.

utrans_openIDs

U_CAPIUEnumeration *U_EXPORT2 utrans_openIDs(
  UErrorCode *pErrorCode
)

Return a UEnumeration for the available transliterators.

Details

Parameters

pErrorCode

Pointer to the UErrorCode in/out parameter.

Returns

UEnumeration for the available transliterators. Close with uenum_close().

utrans_openInverse

U_CAPIUTransliterator *U_EXPORT2 utrans_openInverse(
  const UTransliterator *trans,
  UErrorCode *status
)

Open an inverse of an existing transliterator.

For this to work, the inverse must be registered with the system. For example, if the Transliterator "A-B" is opened, and then its inverse is opened, the result is the Transliterator "B-A", if such a transliterator is registered with the system. Otherwise the result is NULL and a failing UErrorCode is set. Any non-NULL result from this function should later be closed with utrans_close().

Details

Parameters

`trans`	the transliterator to open the inverse of.
`status`	a pointer to the UErrorCode

Returns

a pointer to a newly-opened transliterator that is the inverse of trans, or NULL if the open call fails.

utrans_openU

U_CAPIUTransliterator *U_EXPORT2 utrans_openU(
  const UChar *id,
  int32_t idLength,
  UTransDirection dir,
  const UChar *rules,
  int32_t rulesLength,
  UParseError *parseError,
  UErrorCode *pErrorCode
)

Open a custom transliterator, given a custom rules string OR a system transliterator, given its ID.

Any non-NULL result from this function should later be closed with utrans_close().

Details

Parameters

`id`	a valid transliterator ID
`idLength`	the length of the ID string, or -1 if NUL-terminated
`dir`	the desired direction
`rules`	the transliterator rules. See the C++ header rbt.h for rules syntax. If NULL then a system transliterator matching the ID is returned.
`rulesLength`	the length of the rules, or -1 if the rules are NUL-terminated.
`parseError`	a pointer to a UParseError struct to receive the details of any parsing errors. This parameter may be NULL if no parsing error details are desired.
`pErrorCode`	a pointer to the UErrorCode

Returns

a transliterator pointer that may be passed to other utrans_xxx() functions, or NULL if the open call fails.

utrans_setFilter

U_CAPI void U_EXPORT2 utrans_setFilter(
  UTransliterator *trans,
  const UChar *filterPattern,
  int32_t filterPatternLen,
  UErrorCode *status
)

Set the filter used by a transliterator.

A filter can be used to make the transliterator pass certain characters through untouched. The filter is expressed using a UnicodeSet pattern. If the filterPattern is NULL or the empty string, then the transliterator will be reset to use no filter.

See also:UnicodeSet

Details

Parameters

`trans`	the transliterator
`filterPattern`	a pattern string, in the form accepted by UnicodeSet, specifying which characters to apply the transliteration to. May be NULL or the empty string to indicate no filter.
`filterPatternLen`	the length of filterPattern, or -1 if filterPattern is zero-terminated
`status`	a pointer to the UErrorCode

utrans_toRules

U_CAPI int32_t U_EXPORT2 utrans_toRules(
  const UTransliterator *trans,
  UBool escapeUnprintable,
  UChar *result,
  int32_t resultLength,
  UErrorCode *status
)

Create a rule string that can be passed to utrans_openU to recreate this transliterator.

Details

Parameters

`trans`	The transliterator
`escapeUnprintable`	if true then convert unprintable characters to their hex escape representations, \uxxxx or \Uxxxxxxxx. Unprintable characters are those other than U+000A, U+0020..U+007E.
`result`	A pointer to a buffer to receive the rules.
`resultLength`	The maximum size of result.
`status`	A pointer to the UErrorCode. In case of error status, the contents of result are undefined.

Returns

int32_t The length of the rule string (may be greater than resultLength, in which case an error is returned).

utrans_trans

U_CAPI void U_EXPORT2 utrans_trans(
  const UTransliterator *trans,
  UReplaceable *rep,
  const UReplaceableCallbacks *repFunc,
  int32_t start,
  int32_t *limit,
  UErrorCode *status
)

Transliterate a segment of a UReplaceable string.

The string is passed in as a UReplaceable pointer rep and a UReplaceableCallbacks function pointer struct repFunc. Functions in the repFunc struct will be called in order to modify the rep string.

Details

Parameters

`trans`	the transliterator
`rep`	a pointer to the string. This will be passed to the repFunc functions.
`repFunc`	a set of function pointers that will be used to modify the string pointed to by rep.
`start`	the beginning index, inclusive; `0 <= start <= limit`.
`limit`	pointer to the ending index, exclusive; `start <= limit <= repFunc->length(rep)`. Upon return, limit will contain the new limit index. The text previously occupying `[start, limit)` has been transliterated, possibly to a string of a different length, at `[start,` new-limit`)`, where new-limit* is the return value.
`status`	a pointer to the UErrorCode

utrans_transIncremental

U_CAPI void U_EXPORT2 utrans_transIncremental(
  const UTransliterator *trans,
  UReplaceable *rep,
  const UReplaceableCallbacks *repFunc,
  UTransPosition *pos,
  UErrorCode *status
)

Transliterate the portion of the UReplaceable text buffer that can be transliterated unambiguously.

This method is typically called after new text has been inserted, e.g. as a result of a keyboard event. The transliterator will try to transliterate characters of rep between index.cursor and index.limit. Characters before index.cursor will not be changed.

Upon return, values in index will be updated. index.start will be advanced to the first character that future calls to this method will read. index.cursor and index.limit will be adjusted to delimit the range of text that future calls to this method may change.

Typical usage of this method begins with an initial call with index.start and index.limit set to indicate the portion of text to be transliterated, and index.cursor == index.start. Thereafter, index can be used without modification in future calls, provided that all changes to text are made via this method.

This method assumes that future calls may be made that will insert new text into the buffer. As a result, it only performs unambiguous transliterations. After the last call to this method, there may be untransliterated text that is waiting for more input to resolve an ambiguity. In order to perform these pending transliterations, clients should call utrans_trans() with a start of index.start and a limit of index.end after the last call to this method has been made.

Details

Parameters

`trans`	the transliterator
`rep`	a pointer to the string. This will be passed to the repFunc functions.
`repFunc`	a set of function pointers that will be used to modify the string pointed to by rep.
`pos`	a struct containing the start and limit indices of the text to be read and the text to be transliterated
`status`	a pointer to the UErrorCode

utrans_transIncrementalUChars

U_CAPI void U_EXPORT2 utrans_transIncrementalUChars(
  const UTransliterator *trans,
  UChar *text,
  int32_t *textLength,
  int32_t textCapacity,
  UTransPosition *pos,
  UErrorCode *status
)

Transliterate the portion of the UChar* text buffer that can be transliterated unambiguously.

See utrans_transIncremental(). The string is passed in in a UChar* buffer. The string is modified in place. If the result is longer than textCapacity, it is truncated. The actual length of the result is returned in *textLength, if textLength is non-NULL. *textLength may be greater than textCapacity, but only textCapacity UChars will be written to *text, including the zero terminator. See utrans_transIncremental() for usage details.

See also: utrans_transIncremental

Details

Parameters

`trans`	the transliterator
`text`	a pointer to a buffer containing the text to be transliterated on input and the result text on output.
`textLength`	a pointer to the length of the string in text. If the length is -1 then the string is assumed to be zero-terminated. Upon return, the new length is stored in *textLength. If textLength is NULL then the string is assumed to be zero-terminated.
`textCapacity`	the length of the text buffer
`pos`	a struct containing the start and limit indices of the text to be read and the text to be transliterated
`status`	a pointer to the UErrorCode

utrans_transUChars

U_CAPI void U_EXPORT2 utrans_transUChars(
  const UTransliterator *trans,
  UChar *text,
  int32_t *textLength,
  int32_t textCapacity,
  int32_t start,
  int32_t *limit,
  UErrorCode *status
)

Transliterate a segment of a UChar* string.

The string is passed in in a UChar* buffer. The string is modified in place. If the result is longer than textCapacity, it is truncated. The actual length of the result is returned in *textLength, if textLength is non-NULL. *textLength may be greater than textCapacity, but only textCapacity UChars will be written to *text, including the zero terminator.

Details

Parameters

`trans`	the transliterator
`text`	a pointer to a buffer containing the text to be transliterated on input and the result text on output.
`textLength`	a pointer to the length of the string in text. If the length is -1 then the string is assumed to be zero-terminated. Upon return, the new length is stored in *textLength. If textLength is NULL then the string is assumed to be zero-terminated.
`textCapacity`	the length of the text buffer
`start`	the beginning index, inclusive; `0 <= start <= limit`.
`limit`	pointer to the ending index, exclusive; `start <= limit <= repFunc->length(rep)`. Upon return, limit will contain the new limit index. The text previously occupying `[start, limit)` has been transliterated, possibly to a string of a different length, at `[start,` new-limit`)`, where new-limit* is the return value.
`status`	a pointer to the UErrorCode

ICU4C

Summary

Enumerations

Typedefs

Variables

Functions

Structs

Enumerations

Anonymous Enum 124

UAcceptResult

UBidiPairedBracketType

UBlockCode

UBreakIteratorType

UCPMapRangeOption

UCharCategory

UCharDirection

UCharNameChoice

UColAttribute

UColAttributeValue

UColBoundMode

UColReorderCode

UCollationResult

UDecompositionType

UDialectHandling

UDisplayContext

UDisplayContextType

UEastAsianWidth

UErrorCode

UGraphemeClusterBreak

UHangulSyllableType

UIndicPositionalCategory

UIndicSyllabicCategory

UJoiningGroup

UJoiningType

ULayoutType

ULineBreak

ULineBreakTag

ULocAvailableType

ULocDataLocaleType

UNormalization2Mode

UNormalizationCheckResult

UNumericType

UProperty

UPropertyNameChoice

UScriptCode

UScriptUsage

USentenceBreak

USentenceBreakTag

UTransDirection

UVerticalOrientation

UWordBreak

UWordBreakValues

Typedefs

OldUChar

UBidiPairedBracketType

UBlockCode

UBool

UBreakIteratorType

UCPMap

UCPMapValueFilter

UChar

UChar32

UCharCategory

UCharDirection

UCharEnumTypeRange

UCharNameChoice

UColAttribute

UColAttributeValue

UColBoundMode

UColReorderCode

UCollationResult

UCollationStrength

UCollator

UDate

UDecompositionType

UDisplayContext

UDisplayContextType

UEastAsianWidth

UEnumCharNamesFn

UEnumeration