ICU4C
#include <parseerr.h>
#include <ptypes.h>
#include <putil.h>
#include <stringoptions.h>
#include <ubrk.h>
#include <uchar.h>
#include <ucol.h>
#include <ucpmap.h>
#include <udisplaycontext.h>
#include <uenum.h>
#include <uldnames.h>
#include <uloc.h>
#include <ulocdata.h>
#include <umachine.h>
#include <unorm2.h>
#include <urep.h>
#include <uscript.h>
#include <ustring.h>
#include <utext.h>
#include <utf.h>
#include <utf16.h>
#include <utf8.h>
#include <utrans.h>
#include <utypes.h>
#include <uversion.h>
Summary
Typedefs |
|
---|---|
OldUChar
|
OldUCharuint16_t
Default ICU 58 definition of UChar. |
UBidiPairedBracketType
|
typedef Bidi Paired Bracket Type constants. |
UBlockCode
|
typedefenum UBlockCode
|
UBool
|
typedefint8_t
The ICU boolean type, a signed-byte integer. |
UBreakIteratorType
|
typedefenum UBreakIteratorType
The possible types of text boundaries. |
UCPMap
|
typedefstruct UCPMap
Abstract map from Unicode code points (U+0000..U+10FFFF) to integer values. |
UCPMapValueFilter(const void *context, uint32_t value)
|
typedefuint32_t U_CALLCONV
Callback function type: Modifies a map value. |
UChar
|
UCharuint16_t
The base type for UTF-16 code units and pointers. |
UChar32
|
typedefint32_t
Define UChar32 as a type for single Unicode code points. |
UCharCategory
|
typedefenum UCharCategory
Data for enumerated Unicode general category types. |
UCharDirection
|
typedefenum UCharDirection
This specifies the language directional property of a character set. |
UCharEnumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type)
|
typedef Callback from u_enumCharTypes(), is called for each contiguous range of code points c (where start<=c |
UCharNameChoice
|
typedefenum UCharNameChoice
Selector constants for u_charName(). |
UColAttribute
|
typedefenum UColAttribute
Attributes that collation service understands. |
UColAttributeValue
|
typedefenum UColAttributeValue
Enum containing attribute values for controlling collation behavior. |
UColBoundMode
|
typedefenum UColBoundMode
enum that is taken by ucol_getBound API See below for explanation do not change the values assigned to the members of this enum. |
UColReorderCode
|
typedefenum UColReorderCode
Enum containing the codes for reordering segments of the collation table that are not script codes. |
UCollationResult
|
typedefenum UCollationResult
UCOL_LESS is returned if source string is compared to be less than target string in the ucol_strcoll() method. |
UCollationStrength
|
typedef Base letter represents a primary difference. |
UCollator
|
typedefstruct UCollator
structure representing a collator object instance |
UDate
|
typedefdouble
Date and Time data type. |
UDecompositionType
|
typedefenum UDecompositionType
Decomposition Type constants. |
UDisplayContext
|
typedefenum UDisplayContext
|
UDisplayContextType
|
typedefenum UDisplayContextType
|
UEastAsianWidth
|
typedefenum UEastAsianWidth
East Asian Width constants. |
UEnumCharNamesFn(void *context, UChar32 code, UCharNameChoice nameChoice, const char *name, int32_t length)
|
typedef Type of a callback function for u_enumCharNames() that gets called for each Unicode character with the code point value and the character name. |
UEnumeration
|
typedefstruct UEnumeration
structure representing an enumeration object instance |
UErrorCode
|
typedefenum UErrorCode
Standard ICU4C error code type, a substitute for exceptions. |
UGraphemeClusterBreak
|
typedef Grapheme Cluster Break constants. |
UHangulSyllableType
|
typedefenum UHangulSyllableType
Hangul Syllable Type constants. |
UIndicPositionalCategory
|
typedef Indic Positional Category constants. |
UIndicSyllabicCategory
|
typedef Indic Syllabic Category constants. |
UJoiningGroup
|
typedefenum UJoiningGroup
Joining Group constants. |
UJoiningType
|
typedefenum UJoiningType
Joining Type constants. |
ULineBreak
|
typedefenum ULineBreak
Line Break constants. |
ULineBreakTag
|
typedefenum ULineBreakTag
Enum constants for the line break tags returned by getRuleStatus(). |
ULocAvailableType
|
typedefenum ULocAvailableType
Types for uloc_getAvailableByType and uloc_countAvailableByType. |
ULocaleData
|
typedefstruct ULocaleData
A locale data object. |
ULocaleDisplayNames
|
typedefstruct ULocaleDisplayNames
C typedef for struct ULocaleDisplayNames. |
UNormalizationCheckResult
|
typedef Result values for normalization quick check functions. |
UNormalizer2
|
typedefstruct UNormalizer2
C typedef for struct UNormalizer2. |
UNumericType
|
typedefenum UNumericType
Numeric Type constants. |
UParseError
|
typedefstruct UParseError
A UParseError struct is used to returned detailed information about parsing errors. |
UProperty
|
typedefenum UProperty
Selection constants for Unicode properties. |
UPropertyNameChoice
|
typedefenum UPropertyNameChoice
Selector constants for u_getPropertyName() and u_getPropertyValueName(). |
UReplaceable
|
typedefvoid *
An opaque replaceable text object. |
UReplaceableCallbacks
|
typedefstruct UReplaceableCallbacks
A set of function pointers that transliterators use to manipulate a UReplaceable. |
UScriptCode
|
typedefenum UScriptCode
Constants for ISO 15924 script codes. |
UScriptUsage
|
typedefenum UScriptUsage
Script usage constants. |
USentenceBreak
|
typedefenum USentenceBreak
Sentence Break constants. |
USentenceBreakTag
|
typedefenum USentenceBreakTag
Enum constants for the sentence break tags returned by getRuleStatus(). |
UText
|
typedefstruct UText
C typedef for struct UText. |
UTransDirection
|
typedefenum UTransDirection
Direction constant indicating the direction in a transliterator, e.g., the forward or reverse rules of a RuleBasedTransliterator. |
UTransPosition
|
typedefstruct UTransPosition
Position structure for utrans_transIncremental() incremental transliteration. |
UTransliterator
|
typedefvoid *
An opaque transliterator for use in C. |
UVersionInfo[U_MAX_VERSION_LENGTH]
|
typedefuint8_t
The binary form of a version on ICU APIs is an array of 4 uint8_t. |
UVerticalOrientation
|
typedefenum UVerticalOrientation
Vertical Orientation constants. |
UWordBreak
|
typedefenum UWordBreak
Enum constants for the word break tags returned by getRuleStatus(). |
UWordBreakValues
|
typedefenum UWordBreakValues
Word Break constants. |
Variables |
|
---|---|
context
|
U_CDECL_BEGIN typedef void *
|
Functions |
|
---|---|
UChar(U_CALLCONV *UNESCAPE_CHAR_AT)(int32_t offset
|
U_CDECL_BEGIN typedef
Callback function for u_unescapeAt() that returns a character of the source text given an offset and a context pointer.
|
u_charAge(UChar32 c, UVersionInfo versionArray)
|
U_CAPI void U_EXPORT2
Get the "age" of the code point.
|
u_charDigitValue(UChar32 c)
|
U_CAPI int32_t U_EXPORT2
Returns the decimal digit value of a decimal digit character.
|
u_charDirection(UChar32 c)
|
U_CAPIUCharDirection U_EXPORT2
Returns the bidirectional category value for the code point, which is used in the Unicode bidirectional algorithm (UAX #9 http://www.unicode.org/reports/tr9/).
|
u_charFromName(UCharNameChoice nameChoice, const char *name, UErrorCode *pErrorCode)
|
Find a Unicode character by its name and return its code point value.
|
u_charMirror(UChar32 c)
|
Maps the specified character to a "mirror-image" character.
|
u_charName(UChar32 code, UCharNameChoice nameChoice, char *buffer, int32_t bufferLength, UErrorCode *pErrorCode)
|
U_CAPI int32_t U_EXPORT2
Retrieve the name of a Unicode character.
|
u_charType(UChar32 c)
|
U_CAPI int8_t U_EXPORT2
Returns the general category value for the code point.
|
u_countChar32(const UChar *s, int32_t length)
|
U_CAPI int32_t U_EXPORT2
Count Unicode code points in the length UChar code units of the string.
|
u_digit(UChar32 ch, int8_t radix)
|
U_CAPI int32_t U_EXPORT2
Returns the decimal digit value of the code point in the specified radix.
|
u_enumCharNames(UChar32 start, UChar32 limit, UEnumCharNamesFn *fn, void *context, UCharNameChoice nameChoice, UErrorCode *pErrorCode)
|
U_CAPI void U_EXPORT2
Enumerate all assigned Unicode characters between the start and limit code points (start inclusive, limit exclusive) and call a function for each, passing the code point value and the character name.
|
u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context)
|
U_CAPI void U_EXPORT2
Enumerate efficiently all code points with their Unicode general categories.
|
u_errorName(UErrorCode code)
|
U_CAPI const char *U_EXPORT2
Return a string for a UErrorCode value.
|
u_foldCase(UChar32 c, uint32_t options)
|
The given character is mapped to its case folding equivalent according to UnicodeData.txt and CaseFolding.txt; if the character has no case folding equivalent, the character itself is returned.
|
u_forDigit(int32_t digit, int8_t radix)
|
Determines the character representation for a specific digit in the specified radix.
|
u_getBidiPairedBracket(UChar32 c)
|
Maps the specified character to its paired bracket character.
|
u_getCombiningClass(UChar32 c)
|
U_CAPI uint8_t U_EXPORT2
Returns the combining class of the code point as specified in UnicodeData.txt.
|
u_getIntPropertyMaxValue(UProperty which)
|
U_CAPI int32_t U_EXPORT2
Get the maximum value for an enumerated/integer/binary Unicode property.
|
u_getIntPropertyMinValue(UProperty which)
|
U_CAPI int32_t U_EXPORT2
Get the minimum value for an enumerated/integer/binary Unicode property.
|
u_getIntPropertyValue(UChar32 c, UProperty which)
|
U_CAPI int32_t U_EXPORT2
Get the property value for an enumerated or integer Unicode property for a code point.
|
u_getNumericValue(UChar32 c)
|
U_CAPI double U_EXPORT2
Get the numeric value for a Unicode code point as defined in the Unicode Character Database.
|
u_getPropertyEnum(const char *alias)
|
Return the UProperty enum for a given property name, as specified in the Unicode database file PropertyAliases.txt.
|
u_getPropertyName(UProperty property, UPropertyNameChoice nameChoice)
|
U_CAPI const char *U_EXPORT2
Return the Unicode name for a given property, as given in the Unicode database file PropertyAliases.txt.
|
u_getPropertyValueEnum(UProperty property, const char *alias)
|
U_CAPI int32_t U_EXPORT2
Return the property value integer for a given value name, as specified in the Unicode database file PropertyValueAliases.txt.
|
u_getPropertyValueName(UProperty property, int32_t value, UPropertyNameChoice nameChoice)
|
U_CAPI const char *U_EXPORT2
Return the Unicode name for a given property value, as given in the Unicode database file PropertyValueAliases.txt.
|
u_getUnicodeVersion(UVersionInfo versionArray)
|
U_CAPI void U_EXPORT2
Gets the Unicode version information.
|
u_getVersion(UVersionInfo versionArray)
|
U_CAPI void U_EXPORT2
Gets the ICU release version.
|
u_hasBinaryProperty(UChar32 c, UProperty which)
|
Check a binary Unicode property for a code point.
|
u_isIDIgnorable(UChar32 c)
|
Determines if the specified character should be regarded as an ignorable character in an identifier, according to Java.
|
u_isIDPart(UChar32 c)
|
Determines if the specified character is permissible in an identifier according to Java.
|
u_isIDStart(UChar32 c)
|
Determines if the specified character is permissible as the first character in an identifier according to Unicode (The Unicode Standard, Version 3.0, chapter 5.16 Identifiers).
|
u_isISOControl(UChar32 c)
|
Determines whether the specified code point is an ISO control code.
|
u_isJavaIDPart(UChar32 c)
|
Determines if the specified character is permissible in a Java identifier.
|
u_isJavaIDStart(UChar32 c)
|
Determines if the specified character is permissible as the first character in a Java identifier.
|
u_isJavaSpaceChar(UChar32 c)
|
Determine if the specified code point is a space character according to Java.
|
u_isMirrored(UChar32 c)
|
Determines whether the code point has the Bidi_Mirrored property.
|
u_isUAlphabetic(UChar32 c)
|
Check if a code point has the Alphabetic Unicode property.
|
u_isULowercase(UChar32 c)
|
Check if a code point has the Lowercase Unicode property.
|
u_isUUppercase(UChar32 c)
|
Check if a code point has the Uppercase Unicode property.
|
u_isUWhiteSpace(UChar32 c)
|
Check if a code point has the White_Space Unicode property.
|
u_isWhitespace(UChar32 c)
|
Determines if the specified code point is a whitespace character according to Java/ICU.
|
u_isalnum(UChar32 c)
|
Determines whether the specified code point is an alphanumeric character (letter or digit) according to Java.
|
u_isalpha(UChar32 c)
|
Determines whether the specified code point is a letter character.
|
u_isbase(UChar32 c)
|
Non-standard: Determines whether the specified code point is a base character.
|
u_isblank(UChar32 c)
|
Determines whether the specified code point is a "blank" or "horizontal space", a character that visibly separates words on a line.
|
u_iscntrl(UChar32 c)
|
Determines whether the specified code point is a control character (as defined by this function).
|
u_isdefined(UChar32 c)
|
Determines whether the specified code point is "defined", which usually means that it is assigned a character.
|
u_isdigit(UChar32 c)
|
Determines whether the specified code point is a digit character according to Java.
|
u_isgraph(UChar32 c)
|
Determines whether the specified code point is a "graphic" character (printable, excluding spaces).
|
u_islower(UChar32 c)
|
Determines whether the specified code point has the general category "Ll" (lowercase letter).
|
u_isprint(UChar32 c)
|
Determines whether the specified code point is a printable character.
|
u_ispunct(UChar32 c)
|
Determines whether the specified code point is a punctuation character.
|
u_isspace(UChar32 c)
|
Determines if the specified character is a space character or not.
|
u_istitle(UChar32 c)
|
Determines whether the specified code point is a titlecase letter.
|
u_isupper(UChar32 c)
|
Determines whether the specified code point has the general category "Lu" (uppercase letter).
|
u_isxdigit(UChar32 c)
|
Determines whether the specified code point is a hexadecimal digit.
|
u_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options)
|
U_CAPI int32_t U_EXPORT2
Compare two strings case-insensitively using full case folding.
|
u_memchr(const UChar *s, UChar c, int32_t count)
|
Find the first occurrence of a BMP code point in a string.
|
u_memchr32(const UChar *s, UChar32 c, int32_t count)
|
Find the first occurrence of a code point in a string.
|
u_memcmp(const UChar *buf1, const UChar *buf2, int32_t count)
|
U_CAPI int32_t U_EXPORT2
Compare the first
count UChars of each buffer. |
u_memcmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t count)
|
U_CAPI int32_t U_EXPORT2
Compare two Unicode strings in code point order.
|
u_memcpy(UChar *dest, const UChar *src, int32_t count)
|
Synonym for memcpy(), but with UChars only.
|
u_memmove(UChar *dest, const UChar *src, int32_t count)
|
Synonym for memmove(), but with UChars only.
|
u_memrchr(const UChar *s, UChar c, int32_t count)
|
Find the last occurrence of a BMP code point in a string.
|
u_memrchr32(const UChar *s, UChar32 c, int32_t count)
|
Find the last occurrence of a code point in a string.
|
u_memset(UChar *dest, UChar c, int32_t count)
|
Initialize
count characters of dest to c . |
u_strCaseCompare(const UChar *s1, int32_t length1, const UChar *s2, int32_t length2, uint32_t options, UErrorCode *pErrorCode)
|
U_CAPI int32_t U_EXPORT2
Compare two strings case-insensitively using full case folding.
|
u_strCompare(const UChar *s1, int32_t length1, const UChar *s2, int32_t length2, UBool codePointOrder)
|
U_CAPI int32_t U_EXPORT2
Compare two Unicode strings (binary order).
|
u_strFindFirst(const UChar *s, int32_t length, const UChar *substring, int32_t subLength)
|
Find the first occurrence of a substring in a string.
|
u_strFindLast(const UChar *s, int32_t length, const UChar *substring, int32_t subLength)
|
Find the last occurrence of a substring in a string.
|
u_strFoldCase(UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, uint32_t options, UErrorCode *pErrorCode)
|
U_CAPI int32_t U_EXPORT2
Case-folds the characters in a string.
|
u_strFromUTF32(UChar *dest, int32_t destCapacity, int32_t *pDestLength, const UChar32 *src, int32_t srcLength, UErrorCode *pErrorCode)
|
Convert a UTF-32 string to UTF-16.
|
u_strFromUTF32WithSub(UChar *dest, int32_t destCapacity, int32_t *pDestLength, const UChar32 *src, int32_t srcLength, UChar32 subchar, int32_t *pNumSubstitutions, UErrorCode *pErrorCode)
|
Convert a UTF-32 string to UTF-16.
|
u_strFromUTF8(UChar *dest, int32_t destCapacity, int32_t *pDestLength, const char *src, int32_t srcLength, UErrorCode *pErrorCode)
|
Convert a UTF-8 string to UTF-16.
|
u_strFromUTF8Lenient(UChar *dest, int32_t destCapacity, int32_t *pDestLength, const char *src, int32_t srcLength, UErrorCode *pErrorCode)
|
Convert a UTF-8 string to UTF-16.
|
u_strFromUTF8WithSub(UChar *dest, int32_t destCapacity, int32_t *pDestLength, const char *src, int32_t srcLength, UChar32 subchar, int32_t *pNumSubstitutions, UErrorCode *pErrorCode)
|
Convert a UTF-8 string to UTF-16.
|
u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number)
|
Check if the string contains more Unicode code points than a certain number.
|
u_strToLower(UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, const char *locale, UErrorCode *pErrorCode)
|
U_CAPI int32_t U_EXPORT2
Lowercase the characters in a string.
|
u_strToTitle(UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, UBreakIterator *titleIter, const char *locale, UErrorCode *pErrorCode)
|
U_CAPI int32_t U_EXPORT2
Titlecase a string.
|
u_strToUTF32(UChar32 *dest, int32_t destCapacity, int32_t *pDestLength, const UChar *src, int32_t srcLength, UErrorCode *pErrorCode)
|
Convert a UTF-16 string to UTF-32.
|
u_strToUTF32WithSub(UChar32 *dest, int32_t destCapacity, int32_t *pDestLength, const UChar *src, int32_t srcLength, UChar32 subchar, int32_t *pNumSubstitutions, UErrorCode *pErrorCode)
|
Convert a UTF-16 string to UTF-32.
|
u_strToUTF8(char *dest, int32_t destCapacity, int32_t *pDestLength, const UChar *src, int32_t srcLength, UErrorCode *pErrorCode)
|
U_CAPI char *U_EXPORT2
Convert a UTF-16 string to UTF-8.
|
u_strToUTF8WithSub(char *dest, int32_t destCapacity, int32_t *pDestLength, const UChar *src, int32_t srcLength, UChar32 subchar, int32_t *pNumSubstitutions, UErrorCode *pErrorCode)
|
U_CAPI char *U_EXPORT2
Convert a UTF-16 string to UTF-8.
|
u_strToUpper(UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, const char *locale, UErrorCode *pErrorCode)
|
U_CDECL_ENDU_CAPI int32_t U_EXPORT2
Uppercase the characters in a string.
|
u_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options)
|
U_CAPI int32_t U_EXPORT2
Compare two strings case-insensitively using full case folding.
|
u_strcat(UChar *dst, const UChar *src)
|
Concatenate two ustrings.
|
u_strchr(const UChar *s, UChar c)
|
Find the first occurrence of a BMP code point in a string.
|
u_strchr32(const UChar *s, UChar32 c)
|
Find the first occurrence of a code point in a string.
|
u_strcmp(const UChar *s1, const UChar *s2)
|
U_CAPI int32_t U_EXPORT2
Compare two Unicode strings for bitwise equality (code unit order).
|
u_strcmpCodePointOrder(const UChar *s1, const UChar *s2)
|
U_CAPI int32_t U_EXPORT2
Compare two Unicode strings in code point order.
|
u_strcpy(UChar *dst, const UChar *src)
|
Copy a ustring.
|
u_strcspn(const UChar *string, const UChar *matchSet)
|
U_CAPI int32_t U_EXPORT2
Returns the number of consecutive characters in
string , beginning with the first, that do not occur somewhere in matchSet . |
u_strlen(const UChar *s)
|
U_CAPI int32_t U_EXPORT2
Determine the length of an array of UChar.
|
u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options)
|
U_CAPI int32_t U_EXPORT2
Compare two strings case-insensitively using full case folding.
|
u_strncat(UChar *dst, const UChar *src, int32_t n)
|
Concatenate two ustrings.
|
u_strncmp(const UChar *ucs1, const UChar *ucs2, int32_t n)
|
U_CAPI int32_t U_EXPORT2
Compare two ustrings for bitwise equality.
|
u_strncmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t n)
|
U_CAPI int32_t U_EXPORT2
Compare two Unicode strings in code point order.
|
u_strncpy(UChar *dst, const UChar *src, int32_t n)
|
Copy a ustring.
|
u_strpbrk(const UChar *string, const UChar *matchSet)
|
Locates the first occurrence in the string
string of any of the characters in the string matchSet . |
u_strrchr(const UChar *s, UChar c)
|
Find the last occurrence of a BMP code point in a string.
|
u_strrchr32(const UChar *s, UChar32 c)
|
Find the last occurrence of a code point in a string.
|
u_strrstr(const UChar *s, const UChar *substring)
|
Find the last occurrence of a substring in a string.
|
u_strspn(const UChar *string, const UChar *matchSet)
|
U_CAPI int32_t U_EXPORT2
Returns the number of consecutive characters in
string , beginning with the first, that occur somewhere in matchSet . |
u_strstr(const UChar *s, const UChar *substring)
|
Find the first occurrence of a substring in a string.
|
u_strtok_r(UChar *src, const UChar *delim, UChar **saveState)
|
The string tokenizer API allows an application to break a string into tokens.
|
u_tolower(UChar32 c)
|
The given character is mapped to its lowercase equivalent according to UnicodeData.txt; if the character has no lowercase equivalent, the character itself is returned.
|
u_totitle(UChar32 c)
|
The given character is mapped to its titlecase equivalent according to UnicodeData.txt; if none is defined, the character itself is returned.
|
u_toupper(UChar32 c)
|
The given character is mapped to its uppercase equivalent according to UnicodeData.txt; if the character has no uppercase equivalent, the character itself is returned.
|
u_versionToString(const UVersionInfo versionArray, char *versionString)
|
U_CAPI void U_EXPORT2
Write a string with dotted-decimal version information according to the input UVersionInfo.
|
ubrk_clone(const UBreakIterator *bi, UErrorCode *status)
|
U_CAPIUBreakIterator *U_EXPORT2
Thread safe cloning operation.
|
ubrk_close(UBreakIterator *bi)
|
U_CAPI void U_EXPORT2
Close a UBreakIterator.
|
ubrk_countAvailable(void)
|
U_CAPI int32_t U_EXPORT2
Determine how many locales have text breaking information available.
|
ubrk_current(const UBreakIterator *bi)
|
U_CAPI int32_t U_EXPORT2
Determine the most recently-returned text boundary.
|
ubrk_first(UBreakIterator *bi)
|
U_CAPI int32_t U_EXPORT2
Set the iterator position to zero, the start of the text being scanned.
|
ubrk_following(UBreakIterator *bi, int32_t offset)
|
U_CAPI int32_t U_EXPORT2
Advance the iterator to the first boundary following the specified offset.
|
ubrk_getAvailable(int32_t index)
|
U_CAPI const char *U_EXPORT2
Get a locale for which text breaking information is available.
|
ubrk_getRuleStatus(UBreakIterator *bi)
|
U_CAPI int32_t U_EXPORT2
Return the status from the break rule that determined the most recently returned break position.
|
ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, UErrorCode *status)
|
U_CAPI int32_t U_EXPORT2
Get the statuses from the break rules that determined the most recently returned break position.
|
ubrk_isBoundary(UBreakIterator *bi, int32_t offset)
|
Returns true if the specified position is a boundary position.
|
ubrk_last(UBreakIterator *bi)
|
U_CAPI int32_t U_EXPORT2
Set the iterator position to the index immediately beyond the last character in the text being scanned.
|
ubrk_next(UBreakIterator *bi)
|
U_CAPI int32_t U_EXPORT2
Advance the iterator to the boundary following the current boundary.
|
ubrk_open(UBreakIteratorType type, const char *locale, const UChar *text, int32_t textLength, UErrorCode *status)
|
U_CAPIUBreakIterator *U_EXPORT2
Open a new UBreakIterator for locating text boundaries for a specified locale.
|
ubrk_preceding(UBreakIterator *bi, int32_t offset)
|
U_CAPI int32_t U_EXPORT2
Set the iterator position to the first boundary preceding the specified offset.
|
ubrk_previous(UBreakIterator *bi)
|
U_CAPI int32_t U_EXPORT2
Set the iterator position to the boundary preceding the current boundary.
|
ubrk_setText(UBreakIterator *bi, const UChar *text, int32_t textLength, UErrorCode *status)
|
U_CAPI void U_EXPORT2
Sets an existing iterator to point to a new piece of text.
|
ubrk_setUText(UBreakIterator *bi, UText *text, UErrorCode *status)
|
U_CAPI void U_EXPORT2
Sets an existing iterator to point to a new piece of text.
|
ucol_clone(const UCollator *coll, UErrorCode *status)
|
Thread safe cloning operation.
|
ucol_close(UCollator *coll)
|
U_CAPI void U_EXPORT2
Close a UCollator.
|
ucol_countAvailable(void)
|
U_CAPI int32_t U_EXPORT2
Determine how many locales have collation rules available.
|
ucol_getAttribute(const UCollator *coll, UColAttribute attr, UErrorCode *status)
|
U_CAPIUColAttributeValue U_EXPORT2
Universal attribute getter.
|
ucol_getAvailable(int32_t localeIndex)
|
U_CAPI const char *U_EXPORT2
Get a locale for which collation rules are available.
|
ucol_getDisplayName(const char *objLoc, const char *dispLoc, UChar *result, int32_t resultLength, UErrorCode *status)
|
U_CAPI int32_t U_EXPORT2
Get the display name for a UCollator.
|
ucol_getEquivalentReorderCodes(int32_t reorderCode, int32_t *dest, int32_t destCapacity, UErrorCode *pErrorCode)
|
U_CAPI int32_t U_EXPORT2
Retrieves the reorder codes that are grouped with the given reorder code.
|
ucol_getFunctionalEquivalent(char *result, int32_t resultCapacity, const char *keyword, const char *locale, UBool *isAvailable, UErrorCode *status)
|
U_CAPI int32_t U_EXPORT2
Return the functionally equivalent locale for the specified input locale, with respect to given keyword, for the collation service.
|
ucol_getKeywordValues(const char *keyword, UErrorCode *status)
|
U_CAPIUEnumeration *U_EXPORT2
Given a keyword, create a string enumeration of all values for that keyword that are currently in use.
|
ucol_getKeywordValuesForLocale(const char *key, const char *locale, UBool commonlyUsed, UErrorCode *status)
|
U_CAPIUEnumeration *U_EXPORT2
Given a key and a locale, returns an array of string values in a preferred order that would make a difference.
|
ucol_getKeywords(UErrorCode *status)
|
U_CAPIUEnumeration *U_EXPORT2
Create a string enumerator of all possible keywords that are relevant to collation.
|
ucol_getMaxVariable(const UCollator *coll)
|
U_CAPIUColReorderCode U_EXPORT2
Returns the maximum reordering group whose characters are affected by UCOL_ALTERNATE_HANDLING.
|
ucol_getReorderCodes(const UCollator *coll, int32_t *dest, int32_t destCapacity, UErrorCode *pErrorCode)
|
U_CAPI int32_t U_EXPORT2
Retrieves the reordering codes for this collator.
|
ucol_getSortKey(const UCollator *coll, const UChar *source, int32_t sourceLength, uint8_t *result, int32_t resultLength)
|
U_CAPI int32_t U_EXPORT2
Get a sort key for a string from a UCollator.
|
ucol_getStrength(const UCollator *coll)
|
U_CAPIUCollationStrength U_EXPORT2
Get the collation strength used in a UCollator.
|
ucol_mergeSortkeys(const uint8_t *src1, int32_t src1Length, const uint8_t *src2, int32_t src2Length, uint8_t *dest, int32_t destCapacity)
|
U_CAPI int32_t U_EXPORT2
Merges two sort keys.
|
ucol_open(const char *loc, UErrorCode *status)
|
Open a UCollator for comparing strings.
|
ucol_openAvailableLocales(UErrorCode *status)
|
U_CAPIUEnumeration *U_EXPORT2
Create a string enumerator of all locales for which a valid collator may be opened.
|
ucol_openRules(const UChar *rules, int32_t rulesLength, UColAttributeValue normalizationMode, UCollationStrength strength, UParseError *parseError, UErrorCode *status)
|
Produce a UCollator instance according to the rules supplied.
|
ucol_setAttribute(UCollator *coll, UColAttribute attr, UColAttributeValue value, UErrorCode *status)
|
U_CAPI void U_EXPORT2
Universal attribute setter.
|
ucol_setMaxVariable(UCollator *coll, UColReorderCode group, UErrorCode *pErrorCode)
|
U_CAPI void U_EXPORT2
Sets the variable top to the top of the specified reordering group.
|
ucol_setReorderCodes(UCollator *coll, const int32_t *reorderCodes, int32_t reorderCodesLength, UErrorCode *pErrorCode)
|
U_CAPI void U_EXPORT2
Sets the reordering codes for this collator.
|
ucol_setStrength(UCollator *coll, UCollationStrength strength)
|
U_CAPI void U_EXPORT2
Set the collation strength used in a UCollator.
|
ucol_strcoll(const UCollator *coll, const UChar *source, int32_t sourceLength, const UChar *target, int32_t targetLength)
|
U_CAPIUCollationResult U_EXPORT2
Compare two strings.
|
ucol_strcollUTF8(const UCollator *coll, const char *source, int32_t sourceLength, const char *target, int32_t targetLength, UErrorCode *status)
|
U_CAPIUCollationResult U_EXPORT2
Compare two strings in UTF-8.
|
uenum_close(UEnumeration *en)
|
U_CAPI void U_EXPORT2
Disposes of resources in use by the iterator.
|
uenum_count(UEnumeration *en, UErrorCode *status)
|
U_CAPI int32_t U_EXPORT2
Returns the number of elements that the iterator traverses.
|
uenum_next(UEnumeration *en, int32_t *resultLength, UErrorCode *status)
|
U_CAPI const char *U_EXPORT2
Returns the next element in the iterator's list.
|
uenum_openCharStringsEnumeration(const char *const strings[], int32_t count, UErrorCode *ec)
|
U_CAPIUEnumeration *U_EXPORT2
Given an array of const char* strings (invariant chars only), return a UEnumeration.
|
uenum_openUCharStringsEnumeration(const UChar *const strings[], int32_t count, UErrorCode *ec)
|
U_CAPIUEnumeration *U_EXPORT2
Given an array of const UChar* strings, return a UEnumeration.
|
uenum_reset(UEnumeration *en, UErrorCode *status)
|
U_CAPI void U_EXPORT2
Resets the iterator to the current list of service IDs.
|
uenum_unext(UEnumeration *en, int32_t *resultLength, UErrorCode *status)
|
Returns the next element in the iterator's list.
|
uldn_close(ULocaleDisplayNames *ldn)
|
U_CAPI void U_EXPORT2
Closes a ULocaleDisplayNames instance obtained from uldn_open().
|
uldn_getContext(const ULocaleDisplayNames *ldn, UDisplayContextType type, UErrorCode *pErrorCode)
|
U_CAPIUDisplayContext U_EXPORT2
Returns the UDisplayContext value for the specified UDisplayContextType.
|
uldn_getDialectHandling(const ULocaleDisplayNames *ldn)
|
U_CAPIUDialectHandling U_EXPORT2
Returns the dialect handling used in the display names.
|
uldn_getLocale(const ULocaleDisplayNames *ldn)
|
U_CAPI const char *U_EXPORT2
Returns the locale used to determine the display names.
|
uldn_keyDisplayName(const ULocaleDisplayNames *ldn, const char *key, UChar *result, int32_t maxResultSize, UErrorCode *pErrorCode)
|
U_CAPI int32_t U_EXPORT2
Returns the display name of the provided locale key.
|
uldn_keyValueDisplayName(const ULocaleDisplayNames *ldn, const char *key, const char *value, UChar *result, int32_t maxResultSize, UErrorCode *pErrorCode)
|
U_CAPI int32_t U_EXPORT2
Returns the display name of the provided value (used with the provided key).
|
uldn_languageDisplayName(const ULocaleDisplayNames *ldn, const char *lang, UChar *result, int32_t maxResultSize, UErrorCode *pErrorCode)
|
U_CAPI int32_t U_EXPORT2
Returns the display name of the provided language code.
|
uldn_localeDisplayName(const ULocaleDisplayNames *ldn, const char *locale, UChar *result, int32_t maxResultSize, UErrorCode *pErrorCode)
|
U_CAPI int32_t U_EXPORT2
Returns the display name of the provided locale.
|
uldn_open(const char *locale, UDialectHandling dialectHandling, UErrorCode *pErrorCode)
|
U_CAPIULocaleDisplayNames *U_EXPORT2
Returns an instance of LocaleDisplayNames that returns names formatted for the provided locale, using the provided dialectHandling.
|
uldn_openForContext(const char *locale, UDisplayContext *contexts, int32_t length, UErrorCode *pErrorCode)
|
U_CAPIULocaleDisplayNames *U_EXPORT2
Returns an instance of LocaleDisplayNames that returns names formatted for the provided locale, using the provided UDisplayContext settings.
|
uldn_regionDisplayName(const ULocaleDisplayNames *ldn, const char *region, UChar *result, int32_t maxResultSize, UErrorCode *pErrorCode)
|
U_CAPI int32_t U_EXPORT2
Returns the display name of the provided region code.
|
uldn_scriptCodeDisplayName(const ULocaleDisplayNames *ldn, UScriptCode scriptCode, UChar *result, int32_t maxResultSize, UErrorCode *pErrorCode)
|
U_CAPI int32_t U_EXPORT2
Returns the display name of the provided script code.
|
uldn_scriptDisplayName(const ULocaleDisplayNames *ldn, const char *script, UChar *result, int32_t maxResultSize, UErrorCode *pErrorCode)
|
U_CAPI int32_t U_EXPORT2
Returns the display name of the provided script.
|
uldn_variantDisplayName(const ULocaleDisplayNames *ldn, const char *variant, UChar *result, int32_t maxResultSize, UErrorCode *pErrorCode)
|
U_CAPI int32_t U_EXPORT2
Returns the display name of the provided variant.
|
uloc_acceptLanguage(char *result, int32_t resultAvailable, UAcceptResult *outResult, const char **acceptList, int32_t acceptListCount, UEnumeration *availableLocales, UErrorCode *status)
|
U_CAPI int32_t U_EXPORT2
Based on a list of available locales, determine an acceptable locale for the user.
|
uloc_addLikelySubtags(const char *localeID, char *maximizedLocaleID, int32_t maximizedLocaleIDCapacity, UErrorCode *err)
|
U_CAPI int32_t U_EXPORT2
Add the likely subtags for a provided locale ID, per the algorithm described in the following CLDR technical report:
|
uloc_canonicalize(const char *localeID, char *name, int32_t nameCapacity, UErrorCode *err)
|
U_CAPI int32_t U_EXPORT2
Gets the full name for the specified locale.
|
uloc_countAvailable(void)
|
U_CAPI int32_t U_EXPORT2
Gets the size of the all available locale list.
|
uloc_forLanguageTag(const char *langtag, char *localeID, int32_t localeIDCapacity, int32_t *parsedLength, UErrorCode *err)
|
U_CAPI int32_t U_EXPORT2
Returns a locale ID for the specified BCP47 language tag string.
|
uloc_getAvailable(int32_t n)
|
U_CAPI const char *U_EXPORT2
Gets the specified locale from a list of available locales.
|
uloc_getBaseName(const char *localeID, char *name, int32_t nameCapacity, UErrorCode *err)
|
U_CAPI int32_t U_EXPORT2
Gets the full name for the specified locale, like uloc_getName(), but without keywords.
|
uloc_getCharacterOrientation(const char *localeId, UErrorCode *status)
|
U_CAPIULayoutType U_EXPORT2
Get the layout character orientation for the specified locale.
|
uloc_getCountry(const char *localeID, char *country, int32_t countryCapacity, UErrorCode *err)
|
U_CAPI int32_t U_EXPORT2
Gets the country code for the specified locale.
|
uloc_getDefault(void)
|
U_CAPI const char *U_EXPORT2
Gets ICU's default locale.
|
uloc_getDisplayCountry(const char *locale, const char *displayLocale, UChar *country, int32_t countryCapacity, UErrorCode *status)
|
U_CAPI int32_t U_EXPORT2
Gets the country name suitable for display for the specified locale.
|
uloc_getDisplayKeyword(const char *keyword, const char *displayLocale, UChar *dest, int32_t destCapacity, UErrorCode *status)
|
U_CAPI int32_t U_EXPORT2
Gets the keyword name suitable for display for the specified locale.
|
uloc_getDisplayKeywordValue(const char *locale, const char *keyword, const char *displayLocale, UChar *dest, int32_t destCapacity, UErrorCode *status)
|
U_CAPI int32_t U_EXPORT2
Gets the value of the keyword suitable for display for the specified locale.
|
uloc_getDisplayLanguage(const char *locale, const char *displayLocale, UChar *language, int32_t languageCapacity, UErrorCode *status)
|
U_CAPI int32_t U_EXPORT2
Gets the language name suitable for display for the specified locale.
|
uloc_getDisplayName(const char *localeID, const char *inLocaleID, UChar *result, int32_t maxResultSize, UErrorCode *err)
|
U_CAPI int32_t U_EXPORT2
Gets the full name suitable for display for the specified locale.
|
uloc_getDisplayScript(const char *locale, const char *displayLocale, UChar *script, int32_t scriptCapacity, UErrorCode *status)
|
U_CAPI int32_t U_EXPORT2
Gets the script name suitable for display for the specified locale.
|
uloc_getDisplayVariant(const char *locale, const char *displayLocale, UChar *variant, int32_t variantCapacity, UErrorCode *status)
|
U_CAPI int32_t U_EXPORT2
Gets the variant name suitable for display for the specified locale.
|
uloc_getISO3Country(const char *localeID)
|
U_CAPI const char *U_EXPORT2
Gets the ISO country code for the specified locale.
|
uloc_getISO3Language(const char *localeID)
|
U_CAPI const char *U_EXPORT2
Gets the ISO language code for the specified locale.
|
uloc_getISOCountries(void)
|
U_CAPI const char *const *U_EXPORT2
Gets a list of all available 2-letter country codes defined in ISO 639.
|
uloc_getISOLanguages(void)
|
U_CAPI const char *const *U_EXPORT2
Gets a list of all available 2-letter language codes defined in ISO 639, plus additional 3-letter codes determined to be useful for locale generation as defined by Unicode CLDR.
|
uloc_getKeywordValue(const char *localeID, const char *keywordName, char *buffer, int32_t bufferCapacity, UErrorCode *status)
|
U_CAPI int32_t U_EXPORT2
Get the value for a keyword.
|
uloc_getLanguage(const char *localeID, char *language, int32_t languageCapacity, UErrorCode *err)
|
U_CAPI int32_t U_EXPORT2
Gets the language code for the specified locale.
|
uloc_getLineOrientation(const char *localeId, UErrorCode *status)
|
U_CAPIULayoutType U_EXPORT2
Get the layout line orientation for the specified locale.
|
uloc_getName(const char *localeID, char *name, int32_t nameCapacity, UErrorCode *err)
|
U_CAPI int32_t U_EXPORT2
Gets the full name for the specified locale.
|
uloc_getScript(const char *localeID, char *script, int32_t scriptCapacity, UErrorCode *err)
|
U_CAPI int32_t U_EXPORT2
Gets the script code for the specified locale.
|
uloc_getVariant(const char *localeID, char *variant, int32_t variantCapacity, UErrorCode *err)
|
U_CAPI int32_t U_EXPORT2
Gets the variant code for the specified locale.
|
uloc_isRightToLeft(const char *locale)
|
Returns whether the locale's script is written right-to-left.
|
uloc_minimizeSubtags(const char *localeID, char *minimizedLocaleID, int32_t minimizedLocaleIDCapacity, UErrorCode *err)
|
U_CAPI int32_t U_EXPORT2
Minimize the subtags for a provided locale ID, per the algorithm described in the following CLDR technical report:
|
uloc_openKeywords(const char *localeID, UErrorCode *status)
|
U_CAPIUEnumeration *U_EXPORT2
Gets an enumeration of keywords for the specified locale.
|
uloc_setKeywordValue(const char *keywordName, const char *keywordValue, char *buffer, int32_t bufferCapacity, UErrorCode *status)
|
U_CAPI int32_t U_EXPORT2
Sets or removes the value of the specified keyword.
|
uloc_toLanguageTag(const char *localeID, char *langtag, int32_t langtagCapacity, UBool strict, UErrorCode *err)
|
U_CAPI int32_t U_EXPORT2
Returns a well-formed language tag for this locale ID.
|
uloc_toLegacyKey(const char *keyword)
|
U_CAPI const char *U_EXPORT2
Converts the specified keyword (BCP 47 Unicode locale extension key, or legacy key) to the legacy key.
|
uloc_toLegacyType(const char *keyword, const char *value)
|
U_CAPI const char *U_EXPORT2
Converts the specified keyword value (BCP 47 Unicode locale extension type, or legacy type or type alias) to the canonical legacy type.
|
uloc_toUnicodeLocaleKey(const char *keyword)
|
U_CAPI const char *U_EXPORT2
Converts the specified keyword (legacy key, or BCP 47 Unicode locale extension key) to the equivalent BCP 47 Unicode locale extension key.
|
uloc_toUnicodeLocaleType(const char *keyword, const char *value)
|
U_CAPI const char *U_EXPORT2
Converts the specified keyword value (legacy type, or BCP 47 Unicode locale extension type) to the well-formed BCP 47 Unicode locale extension type for the specified keyword (category).
|
ulocdata_getCLDRVersion(UVersionInfo versionArray, UErrorCode *status)
|
U_CAPI void U_EXPORT2
Return the current CLDR version used by the library.
|
unorm2_append(const UNormalizer2 *norm2, UChar *first, int32_t firstLength, int32_t firstCapacity, const UChar *second, int32_t secondLength, UErrorCode *pErrorCode)
|
U_CAPI int32_t U_EXPORT2
Appends the second string to the first string (merging them at the boundary) and returns the length of the first string.
|
unorm2_close(UNormalizer2 *norm2)
|
U_CAPI void U_EXPORT2
Closes a UNormalizer2 instance from unorm2_openFiltered().
|
unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b)
|
Performs pairwise composition of a & b and returns the composite if there is one.
|
unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c)
|
U_CAPI uint8_t U_EXPORT2
Gets the combining class of c.
|
unorm2_getDecomposition(const UNormalizer2 *norm2, UChar32 c, UChar *decomposition, int32_t capacity, UErrorCode *pErrorCode)
|
U_CAPI int32_t U_EXPORT2
Gets the decomposition mapping of c.
|
unorm2_getNFCInstance(UErrorCode *pErrorCode)
|
U_CAPI const UNormalizer2 *U_EXPORT2
Returns a UNormalizer2 instance for Unicode NFC normalization.
|
unorm2_getNFDInstance(UErrorCode *pErrorCode)
|
U_CAPI const UNormalizer2 *U_EXPORT2
Returns a UNormalizer2 instance for Unicode NFD normalization.
|
unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode)
|
U_CAPI const UNormalizer2 *U_EXPORT2
Returns a UNormalizer2 instance for Unicode NFKC_Casefold normalization.
|
unorm2_getNFKCInstance(UErrorCode *pErrorCode)
|
U_CAPI const UNormalizer2 *U_EXPORT2
Returns a UNormalizer2 instance for Unicode NFKC normalization.
|
unorm2_getNFKDInstance(UErrorCode *pErrorCode)
|
U_CAPI const UNormalizer2 *U_EXPORT2
Returns a UNormalizer2 instance for Unicode NFKD normalization.
|
unorm2_getRawDecomposition(const UNormalizer2 *norm2, UChar32 c, UChar *decomposition, int32_t capacity, UErrorCode *pErrorCode)
|
U_CAPI int32_t U_EXPORT2
Gets the raw decomposition mapping of c.
|
unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c)
|
Tests if the character always has a normalization boundary after it, regardless of context.
|
unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c)
|
Tests if the character always has a normalization boundary before it, regardless of context.
|
unorm2_isInert(const UNormalizer2 *norm2, UChar32 c)
|
Tests if the character is normalization-inert.
|
unorm2_isNormalized(const UNormalizer2 *norm2, const UChar *s, int32_t length, UErrorCode *pErrorCode)
|
Tests if the string is normalized.
|
unorm2_normalize(const UNormalizer2 *norm2, const UChar *src, int32_t length, UChar *dest, int32_t capacity, UErrorCode *pErrorCode)
|
U_CAPI int32_t U_EXPORT2
Writes the normalized form of the source string to the destination string (replacing its contents) and returns the length of the destination string.
|
unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2, UChar *first, int32_t firstLength, int32_t firstCapacity, const UChar *second, int32_t secondLength, UErrorCode *pErrorCode)
|
U_CAPI int32_t U_EXPORT2
Appends the normalized form of the second string to the first string (merging them at the boundary) and returns the length of the first string.
|
unorm2_quickCheck(const UNormalizer2 *norm2, const UChar *s, int32_t length, UErrorCode *pErrorCode)
|
U_CAPIUNormalizationCheckResult U_EXPORT2
Tests if the string is normalized.
|
unorm2_spanQuickCheckYes(const UNormalizer2 *norm2, const UChar *s, int32_t length, UErrorCode *pErrorCode)
|
U_CAPI int32_t U_EXPORT2
Returns the end of the normalized substring of the input string.
|
uscript_breaksBetweenLetters(UScriptCode script)
|
Returns true if the script allows line breaks between letters (excluding hyphenation).
|
uscript_getCode(const char *nameOrAbbrOrLocale, UScriptCode *fillIn, int32_t capacity, UErrorCode *err)
|
U_CAPI int32_t U_EXPORT2
Gets the script codes associated with the given locale or ISO 15924 abbreviation or name.
|
uscript_getName(UScriptCode scriptCode)
|
U_CAPI const char *U_EXPORT2
Returns the long Unicode script name, if there is one.
|
uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode)
|
U_CAPI int32_t U_EXPORT2
Writes the script sample character string.
|
uscript_getScript(UChar32 codepoint, UErrorCode *err)
|
U_CAPIUScriptCode U_EXPORT2
Gets the script code associated with the given codepoint.
|
uscript_getScriptExtensions(UChar32 c, UScriptCode *scripts, int32_t capacity, UErrorCode *errorCode)
|
U_CAPI int32_t U_EXPORT2
Writes code point c's Script_Extensions as a list of UScriptCode values to the output scripts array and returns the number of script codes.
|
uscript_getShortName(UScriptCode scriptCode)
|
U_CAPI const char *U_EXPORT2
Returns the 4-letter ISO 15924 script code, which is the same as the short Unicode script name if Unicode has names for the script.
|
uscript_getUsage(UScriptCode script)
|
U_CAPIUScriptUsage U_EXPORT2
Returns the script usage according to UAX #31 Unicode Identifier and Pattern Syntax.
|
uscript_hasScript(UChar32 c, UScriptCode sc)
|
Do the Script_Extensions of code point c contain script sc? If c does not have explicit Script_Extensions, then this tests whether c has the Script property value sc.
|
uscript_isCased(UScriptCode script)
|
Returns true if in modern (or most recent) usage of the script case distinctions are customary.
|
uscript_isRightToLeft(UScriptCode script)
|
Returns true if the script is written right-to-left.
|
utext_char32At(UText *ut, int64_t nativeIndex)
|
Returns the code point at the requested index, or U_SENTINEL (-1) if it is out of bounds.
|
utext_clone(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCode *status)
|
Clone a UText.
|
utext_close(UText *ut)
|
Close function for UText instances.
|
utext_current32(UText *ut)
|
Get the code point at the current iteration position, or U_SENTINEL (-1) if the iteration has reached the end of the input text.
|
utext_equals(const UText *a, const UText *b)
|
Compare two UText objects for equality.
|
utext_extract(UText *ut, int64_t nativeStart, int64_t nativeLimit, UChar *dest, int32_t destCapacity, UErrorCode *status)
|
U_CAPI int32_t U_EXPORT2
Extract text from a UText into a UChar buffer.
|
utext_getNativeIndex(const UText *ut)
|
U_CAPI int64_t U_EXPORT2
Get the current iterator position, which can range from 0 to the length of the text.
|
utext_getPreviousNativeIndex(UText *ut)
|
U_CAPI int64_t U_EXPORT2
Get the native index of the character preceding the current position.
|
utext_moveIndex32(UText *ut, int32_t delta)
|
Move the iterator position by delta code points.
|
utext_nativeLength(UText *ut)
|
U_CAPI int64_t U_EXPORT2
Get the length of the text.
|
utext_next32(UText *ut)
|
Get the code point at the current iteration position of the UText, and advance the position to the first index following the character.
|
utext_next32From(UText *ut, int64_t nativeIndex)
|
Set the iteration index and return the code point at that index.
|
utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status)
|
Open a read-only UText for UChar * string.
|
utext_openUTF8(UText *ut, const char *s, int64_t length, UErrorCode *status)
|
Open a read-only UText implementation for UTF-8 strings.
|
utext_previous32(UText *ut)
|
Move the iterator position to the character (code point) whose index precedes the current position, and return that character.
|
utext_previous32From(UText *ut, int64_t nativeIndex)
|
Set the iteration index, and return the code point preceding the one specified by the initial index.
|
utext_setNativeIndex(UText *ut, int64_t nativeIndex)
|
U_CAPI void U_EXPORT2
Set the current iteration position to the nearest code point boundary at or preceding the specified index.
|
utrans_clone(const UTransliterator *trans, UErrorCode *status)
|
U_CAPIUTransliterator *U_EXPORT2
Create a copy of a transliterator.
|
utrans_close(UTransliterator *trans)
|
U_CAPI void U_EXPORT2
Close a transliterator.
|
utrans_openIDs(UErrorCode *pErrorCode)
|
U_CAPIUEnumeration *U_EXPORT2
Return a UEnumeration for the available transliterators.
|
utrans_openInverse(const UTransliterator *trans, UErrorCode *status)
|
U_CAPIUTransliterator *U_EXPORT2
Open an inverse of an existing transliterator.
|
utrans_openU(const UChar *id, int32_t idLength, UTransDirection dir, const UChar *rules, int32_t rulesLength, UParseError *parseError, UErrorCode *pErrorCode)
|
U_CAPIUTransliterator *U_EXPORT2
Open a custom transliterator, given a custom rules string OR a system transliterator, given its ID.
|
utrans_setFilter(UTransliterator *trans, const UChar *filterPattern, int32_t filterPatternLen, UErrorCode *status)
|
U_CAPI void U_EXPORT2
Set the filter used by a transliterator.
|
utrans_toRules(const UTransliterator *trans, UBool escapeUnprintable, UChar *result, int32_t resultLength, UErrorCode *status)
|
U_CAPI int32_t U_EXPORT2
Create a rule string that can be passed to utrans_openU to recreate this transliterator.
|
utrans_trans(const UTransliterator *trans, UReplaceable *rep, const UReplaceableCallbacks *repFunc, int32_t start, int32_t *limit, UErrorCode *status)
|
U_CAPI void U_EXPORT2
Transliterate a segment of a UReplaceable string.
|
utrans_transIncremental(const UTransliterator *trans, UReplaceable *rep, const UReplaceableCallbacks *repFunc, UTransPosition *pos, UErrorCode *status)
|
U_CAPI void U_EXPORT2
Transliterate the portion of the UReplaceable text buffer that can be transliterated unambiguously.
|
utrans_transIncrementalUChars(const UTransliterator *trans, UChar *text, int32_t *textLength, int32_t textCapacity, UTransPosition *pos, UErrorCode *status)
|
U_CAPI void U_EXPORT2
Transliterate the portion of the UChar* text buffer that can be transliterated unambiguously.
|
utrans_transUChars(const UTransliterator *trans, UChar *text, int32_t *textLength, int32_t textCapacity, int32_t start, int32_t *limit, UErrorCode *status)
|
U_CAPI void U_EXPORT2
Transliterate a segment of a UChar* string.
|
Structs |
|
---|---|
UParseError |
A UParseError struct is used to returned detailed information about parsing errors. |
UReplaceableCallbacks |
A set of function pointers that transliterators use to manipulate a UReplaceable. |
UTransPosition |
Position structure for utrans_transIncremental() incremental transliteration. |
Enumerations
Anonymous Enum 124
Anonymous Enum 124
The capacity of the context strings in UParseError.
Properties | |
---|---|
U_PARSE_CONTEXT_LEN
|
UAcceptResult
UAcceptResult
Output values which uloc_acceptLanguage() writes to the 'outResult' parameter.
See also:uloc_acceptLanguageFromHTTP See also:uloc_acceptLanguage
UBidiPairedBracketType
UBidiPairedBracketType
Bidi Paired Bracket Type constants.
See also: UCHAR_BIDI_PAIRED_BRACKET_TYPE
Properties | |
---|---|
U_BPT_CLOSE
|
Close paired bracket. |
U_BPT_COUNT
|
One more than the highest normal UBidiPairedBracketType value. The highest value is available via u_getIntPropertyMaxValue(UCHAR_BIDI_PAIRED_BRACKET_TYPE). Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
U_BPT_NONE
|
Not a paired bracket. |
U_BPT_OPEN
|
Open paired bracket. |
UBlockCode
UBlockCode
Constants for Unicode blocks, see the Unicode Data file Blocks.txt.
Properties | |
---|---|
UBLOCK_ADLAM
|
|
UBLOCK_AEGEAN_NUMBERS
|
|
UBLOCK_AHOM
|
|
UBLOCK_ALCHEMICAL_SYMBOLS
|
|
UBLOCK_ALPHABETIC_PRESENTATION_FORMS
|
|
UBLOCK_ANATOLIAN_HIEROGLYPHS
|
|
UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION
|
|
UBLOCK_ANCIENT_GREEK_NUMBERS
|
|
UBLOCK_ANCIENT_SYMBOLS
|
|
UBLOCK_ARABIC
|
|
UBLOCK_ARABIC_EXTENDED_A
|
|
UBLOCK_ARABIC_EXTENDED_B
|
|
UBLOCK_ARABIC_EXTENDED_C
|
|
UBLOCK_ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS
|
|
UBLOCK_ARABIC_PRESENTATION_FORMS_A
|
|
UBLOCK_ARABIC_PRESENTATION_FORMS_B
|
|
UBLOCK_ARABIC_SUPPLEMENT
|
|
UBLOCK_ARMENIAN
|
|
UBLOCK_ARROWS
|
|
UBLOCK_AVESTAN
|
|
UBLOCK_BALINESE
|
|
UBLOCK_BAMUM
|
|
UBLOCK_BAMUM_SUPPLEMENT
|
|
UBLOCK_BASIC_LATIN
|
|
UBLOCK_BASSA_VAH
|
|
UBLOCK_BATAK
|
|
UBLOCK_BENGALI
|
|
UBLOCK_BHAIKSUKI
|
|
UBLOCK_BLOCK_ELEMENTS
|
|
UBLOCK_BOPOMOFO
|
|
UBLOCK_BOPOMOFO_EXTENDED
|
|
UBLOCK_BOX_DRAWING
|
|
UBLOCK_BRAHMI
|
|
UBLOCK_BRAILLE_PATTERNS
|
|
UBLOCK_BUGINESE
|
|
UBLOCK_BUHID
|
|
UBLOCK_BYZANTINE_MUSICAL_SYMBOLS
|
|
UBLOCK_CARIAN
|
|
UBLOCK_CAUCASIAN_ALBANIAN
|
|
UBLOCK_CHAKMA
|
|
UBLOCK_CHAM
|
|
UBLOCK_CHEROKEE
|
|
UBLOCK_CHEROKEE_SUPPLEMENT
|
|
UBLOCK_CHESS_SYMBOLS
|
|
UBLOCK_CHORASMIAN
|
|
UBLOCK_CJK_COMPATIBILITY
|
|
UBLOCK_CJK_COMPATIBILITY_FORMS
|
|
UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS
|
|
UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT
|
|
UBLOCK_CJK_RADICALS_SUPPLEMENT
|
|
UBLOCK_CJK_STROKES
|
|
UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION
|
|
UBLOCK_CJK_UNIFIED_IDEOGRAPHS
|
|
UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
|
|
UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B
|
|
UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C
|
|
UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D
|
|
UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E
|
|
UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F
|
|
UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G
|
|
UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H
|
|
UBLOCK_COMBINING_DIACRITICAL_MARKS
|
|
UBLOCK_COMBINING_DIACRITICAL_MARKS_EXTENDED
|
|
UBLOCK_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT
|
|
UBLOCK_COMBINING_HALF_MARKS
|
|
UBLOCK_COMBINING_MARKS_FOR_SYMBOLS
|
Unicode 3.2 renames this block to "Combining Diacritical Marks for Symbols". |
UBLOCK_COMMON_INDIC_NUMBER_FORMS
|
|
UBLOCK_CONTROL_PICTURES
|
|
UBLOCK_COPTIC
|
|
UBLOCK_COPTIC_EPACT_NUMBERS
|
|
UBLOCK_COUNT
|
One more than the highest normal UBlockCode value. The highest value is available via u_getIntPropertyMaxValue(UCHAR_BLOCK). Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
UBLOCK_COUNTING_ROD_NUMERALS
|
|
UBLOCK_CUNEIFORM
|
|
UBLOCK_CUNEIFORM_NUMBERS_AND_PUNCTUATION
|
|
UBLOCK_CURRENCY_SYMBOLS
|
|
UBLOCK_CYPRIOT_SYLLABARY
|
|
UBLOCK_CYPRO_MINOAN
|
|
UBLOCK_CYRILLIC
|
|
UBLOCK_CYRILLIC_EXTENDED_A
|
|
UBLOCK_CYRILLIC_EXTENDED_B
|
|
UBLOCK_CYRILLIC_EXTENDED_C
|
|
UBLOCK_CYRILLIC_EXTENDED_D
|
|
UBLOCK_CYRILLIC_SUPPLEMENT
|
|
UBLOCK_CYRILLIC_SUPPLEMENTARY
|
Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". |
UBLOCK_DESERET
|
|
UBLOCK_DEVANAGARI
|
|
UBLOCK_DEVANAGARI_EXTENDED
|
|
UBLOCK_DEVANAGARI_EXTENDED_A
|
|
UBLOCK_DINGBATS
|
|
UBLOCK_DIVES_AKURU
|
|
UBLOCK_DOGRA
|
|
UBLOCK_DOMINO_TILES
|
|
UBLOCK_DUPLOYAN
|
|
UBLOCK_EARLY_DYNASTIC_CUNEIFORM
|
|
UBLOCK_EGYPTIAN_HIEROGLYPHS
|
|
UBLOCK_EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS
|
|
UBLOCK_ELBASAN
|
|
UBLOCK_ELYMAIC
|
|
UBLOCK_EMOTICONS
|
|
UBLOCK_ENCLOSED_ALPHANUMERICS
|
|
UBLOCK_ENCLOSED_ALPHANUMERIC_SUPPLEMENT
|
|
UBLOCK_ENCLOSED_CJK_LETTERS_AND_MONTHS
|
|
UBLOCK_ENCLOSED_IDEOGRAPHIC_SUPPLEMENT
|
|
UBLOCK_ETHIOPIC
|
|
UBLOCK_ETHIOPIC_EXTENDED
|
|
UBLOCK_ETHIOPIC_EXTENDED_A
|
|
UBLOCK_ETHIOPIC_EXTENDED_B
|
|
UBLOCK_ETHIOPIC_SUPPLEMENT
|
|
UBLOCK_GENERAL_PUNCTUATION
|
|
UBLOCK_GEOMETRIC_SHAPES
|
|
UBLOCK_GEOMETRIC_SHAPES_EXTENDED
|
|
UBLOCK_GEORGIAN
|
|
UBLOCK_GEORGIAN_EXTENDED
|
|
UBLOCK_GEORGIAN_SUPPLEMENT
|
|
UBLOCK_GLAGOLITIC
|
|
UBLOCK_GLAGOLITIC_SUPPLEMENT
|
|
UBLOCK_GOTHIC
|
|
UBLOCK_GRANTHA
|
|
UBLOCK_GREEK
|
Unicode 3.2 renames this block to "Greek and Coptic". |
UBLOCK_GREEK_EXTENDED
|
|
UBLOCK_GUJARATI
|
|
UBLOCK_GUNJALA_GONDI
|
|
UBLOCK_GURMUKHI
|
|
UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS
|
|
UBLOCK_HANGUL_COMPATIBILITY_JAMO
|
|
UBLOCK_HANGUL_JAMO
|
|
UBLOCK_HANGUL_JAMO_EXTENDED_A
|
|
UBLOCK_HANGUL_JAMO_EXTENDED_B
|
|
UBLOCK_HANGUL_SYLLABLES
|
|
UBLOCK_HANIFI_ROHINGYA
|
|
UBLOCK_HANUNOO
|
|
UBLOCK_HATRAN
|
|
UBLOCK_HEBREW
|
|
UBLOCK_HIGH_PRIVATE_USE_SURROGATES
|
|
UBLOCK_HIGH_SURROGATES
|
|
UBLOCK_HIRAGANA
|
|
UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS
|
|
UBLOCK_IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION
|
|
UBLOCK_IMPERIAL_ARAMAIC
|
|
UBLOCK_INDIC_SIYAQ_NUMBERS
|
|
UBLOCK_INSCRIPTIONAL_PAHLAVI
|
|
UBLOCK_INSCRIPTIONAL_PARTHIAN
|
|
UBLOCK_INVALID_CODE
|
|
UBLOCK_IPA_EXTENSIONS
|
|
UBLOCK_JAVANESE
|
|
UBLOCK_KAITHI
|
|
UBLOCK_KAKTOVIK_NUMERALS
|
|
UBLOCK_KANA_EXTENDED_A
|
|
UBLOCK_KANA_EXTENDED_B
|
|
UBLOCK_KANA_SUPPLEMENT
|
|
UBLOCK_KANBUN
|
|
UBLOCK_KANGXI_RADICALS
|
|
UBLOCK_KANNADA
|
|
UBLOCK_KATAKANA
|
|
UBLOCK_KATAKANA_PHONETIC_EXTENSIONS
|
|
UBLOCK_KAWI
|
|
UBLOCK_KAYAH_LI
|
|
UBLOCK_KHAROSHTHI
|
|
UBLOCK_KHITAN_SMALL_SCRIPT
|
|
UBLOCK_KHMER
|
|
UBLOCK_KHMER_SYMBOLS
|
|
UBLOCK_KHOJKI
|
|
UBLOCK_KHUDAWADI
|
|
UBLOCK_LAO
|
|
UBLOCK_LATIN_1_SUPPLEMENT
|
|
UBLOCK_LATIN_EXTENDED_A
|
|
UBLOCK_LATIN_EXTENDED_ADDITIONAL
|
|
UBLOCK_LATIN_EXTENDED_B
|
|
UBLOCK_LATIN_EXTENDED_C
|
|
UBLOCK_LATIN_EXTENDED_D
|
|
UBLOCK_LATIN_EXTENDED_E
|
|
UBLOCK_LATIN_EXTENDED_F
|
|
UBLOCK_LATIN_EXTENDED_G
|
|
UBLOCK_LEPCHA
|
|
UBLOCK_LETTERLIKE_SYMBOLS
|
|
UBLOCK_LIMBU
|
|
UBLOCK_LINEAR_A
|
|
UBLOCK_LINEAR_B_IDEOGRAMS
|
|
UBLOCK_LINEAR_B_SYLLABARY
|
|
UBLOCK_LISU
|
|
UBLOCK_LISU_SUPPLEMENT
|
|
UBLOCK_LOW_SURROGATES
|
|
UBLOCK_LYCIAN
|
|
UBLOCK_LYDIAN
|
|
UBLOCK_MAHAJANI
|
|
UBLOCK_MAHJONG_TILES
|
|
UBLOCK_MAKASAR
|
|
UBLOCK_MALAYALAM
|
|
UBLOCK_MANDAIC
|
|
UBLOCK_MANICHAEAN
|
|
UBLOCK_MARCHEN
|
|
UBLOCK_MASARAM_GONDI
|
|
UBLOCK_MATHEMATICAL_ALPHANUMERIC_SYMBOLS
|
|
UBLOCK_MATHEMATICAL_OPERATORS
|
|
UBLOCK_MAYAN_NUMERALS
|
|
UBLOCK_MEDEFAIDRIN
|
|
UBLOCK_MEETEI_MAYEK
|
|
UBLOCK_MEETEI_MAYEK_EXTENSIONS
|
|
UBLOCK_MENDE_KIKAKUI
|
|
UBLOCK_MEROITIC_CURSIVE
|
|
UBLOCK_MEROITIC_HIEROGLYPHS
|
|
UBLOCK_MIAO
|
|
UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A
|
|
UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B
|
|
UBLOCK_MISCELLANEOUS_SYMBOLS
|
|
UBLOCK_MISCELLANEOUS_SYMBOLS_AND_ARROWS
|
|
UBLOCK_MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS
|
|
UBLOCK_MISCELLANEOUS_TECHNICAL
|
|
UBLOCK_MODI
|
|
UBLOCK_MODIFIER_TONE_LETTERS
|
|
UBLOCK_MONGOLIAN
|
|
UBLOCK_MONGOLIAN_SUPPLEMENT
|
|
UBLOCK_MRO
|
|
UBLOCK_MULTANI
|
|
UBLOCK_MUSICAL_SYMBOLS
|
|
UBLOCK_MYANMAR
|
|
UBLOCK_MYANMAR_EXTENDED_A
|
|
UBLOCK_MYANMAR_EXTENDED_B
|
|
UBLOCK_NABATAEAN
|
|
UBLOCK_NAG_MUNDARI
|
|
UBLOCK_NANDINAGARI
|
|
UBLOCK_NEWA
|
|
UBLOCK_NEW_TAI_LUE
|
|
UBLOCK_NKO
|
|
UBLOCK_NO_BLOCK
|
New No_Block value in Unicode 4. |
UBLOCK_NUMBER_FORMS
|
|
UBLOCK_NUSHU
|
|
UBLOCK_NYIAKENG_PUACHUE_HMONG
|
|
UBLOCK_OGHAM
|
|
UBLOCK_OLD_HUNGARIAN
|
|
UBLOCK_OLD_ITALIC
|
|
UBLOCK_OLD_NORTH_ARABIAN
|
|
UBLOCK_OLD_PERMIC
|
|
UBLOCK_OLD_PERSIAN
|
|
UBLOCK_OLD_SOGDIAN
|
|
UBLOCK_OLD_SOUTH_ARABIAN
|
|
UBLOCK_OLD_TURKIC
|
|
UBLOCK_OLD_UYGHUR
|
|
UBLOCK_OL_CHIKI
|
|
UBLOCK_OPTICAL_CHARACTER_RECOGNITION
|
|
UBLOCK_ORIYA
|
|
UBLOCK_ORNAMENTAL_DINGBATS
|
|
UBLOCK_OSAGE
|
|
UBLOCK_OSMANYA
|
|
UBLOCK_OTTOMAN_SIYAQ_NUMBERS
|
|
UBLOCK_PAHAWH_HMONG
|
|
UBLOCK_PALMYRENE
|
|
UBLOCK_PAU_CIN_HAU
|
|
UBLOCK_PHAGS_PA
|
|
UBLOCK_PHAISTOS_DISC
|
|
UBLOCK_PHOENICIAN
|
|
UBLOCK_PHONETIC_EXTENSIONS
|
|
UBLOCK_PHONETIC_EXTENSIONS_SUPPLEMENT
|
|
UBLOCK_PLAYING_CARDS
|
|
UBLOCK_PRIVATE_USE
|
Same as UBLOCK_PRIVATE_USE_AREA. Until Unicode 3.1.1, the corresponding block name was "Private Use", and multiple code point ranges had this block. Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" and adds separate blocks for the supplementary PUAs. |
UBLOCK_PRIVATE_USE_AREA
|
Same as UBLOCK_PRIVATE_USE. Until Unicode 3.1.1, the corresponding block name was "Private Use", and multiple code point ranges had this block. Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" and adds separate blocks for the supplementary PUAs. |
UBLOCK_PSALTER_PAHLAVI
|
|
UBLOCK_REJANG
|
|
UBLOCK_RUMI_NUMERAL_SYMBOLS
|
|
UBLOCK_RUNIC
|
|
UBLOCK_SAMARITAN
|
|
UBLOCK_SAURASHTRA
|
|
UBLOCK_SHARADA
|
|
UBLOCK_SHAVIAN
|
|
UBLOCK_SHORTHAND_FORMAT_CONTROLS
|
|
UBLOCK_SIDDHAM
|
|
UBLOCK_SINHALA
|
|
UBLOCK_SINHALA_ARCHAIC_NUMBERS
|
|
UBLOCK_SMALL_FORM_VARIANTS
|
|
UBLOCK_SMALL_KANA_EXTENSION
|
|
UBLOCK_SOGDIAN
|
|
UBLOCK_SORA_SOMPENG
|
|
UBLOCK_SOYOMBO
|
|
UBLOCK_SPACING_MODIFIER_LETTERS
|
|
UBLOCK_SPECIALS
|
|
UBLOCK_SUNDANESE
|
|
UBLOCK_SUNDANESE_SUPPLEMENT
|
|
UBLOCK_SUPERSCRIPTS_AND_SUBSCRIPTS
|
|
UBLOCK_SUPPLEMENTAL_ARROWS_A
|
|
UBLOCK_SUPPLEMENTAL_ARROWS_B
|
|
UBLOCK_SUPPLEMENTAL_ARROWS_C
|
|
UBLOCK_SUPPLEMENTAL_MATHEMATICAL_OPERATORS
|
|
UBLOCK_SUPPLEMENTAL_PUNCTUATION
|
|
UBLOCK_SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS
|
|
UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_A
|
|
UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B
|
|
UBLOCK_SUTTON_SIGNWRITING
|
|
UBLOCK_SYLOTI_NAGRI
|
|
UBLOCK_SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A
|
|
UBLOCK_SYMBOLS_FOR_LEGACY_COMPUTING
|
|
UBLOCK_SYRIAC
|
|
UBLOCK_SYRIAC_SUPPLEMENT
|
|
UBLOCK_TAGALOG
|
|
UBLOCK_TAGBANWA
|
|
UBLOCK_TAGS
|
|
UBLOCK_TAI_LE
|
|
UBLOCK_TAI_THAM
|
|
UBLOCK_TAI_VIET
|
|
UBLOCK_TAI_XUAN_JING_SYMBOLS
|
|
UBLOCK_TAKRI
|
|
UBLOCK_TAMIL
|
|
UBLOCK_TAMIL_SUPPLEMENT
|
|
UBLOCK_TANGSA
|
|
UBLOCK_TANGUT
|
|
UBLOCK_TANGUT_COMPONENTS
|
|
UBLOCK_TANGUT_SUPPLEMENT
|
|
UBLOCK_TELUGU
|
|
UBLOCK_THAANA
|
|
UBLOCK_THAI
|
|
UBLOCK_TIBETAN
|
|
UBLOCK_TIFINAGH
|
|
UBLOCK_TIRHUTA
|
|
UBLOCK_TOTO
|
|
UBLOCK_TRANSPORT_AND_MAP_SYMBOLS
|
|
UBLOCK_UGARITIC
|
|
UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS
|
|
UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED
|
|
UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A
|
|
UBLOCK_VAI
|
|
UBLOCK_VARIATION_SELECTORS
|
|
UBLOCK_VARIATION_SELECTORS_SUPPLEMENT
|
|
UBLOCK_VEDIC_EXTENSIONS
|
|
UBLOCK_VERTICAL_FORMS
|
|
UBLOCK_VITHKUQI
|
|
UBLOCK_WANCHO
|
|
UBLOCK_WARANG_CITI
|
|
UBLOCK_YEZIDI
|
|
UBLOCK_YIJING_HEXAGRAM_SYMBOLS
|
|
UBLOCK_YI_RADICALS
|
|
UBLOCK_YI_SYLLABLES
|
|
UBLOCK_ZANABAZAR_SQUARE
|
|
UBLOCK_ZNAMENNY_MUSICAL_NOTATION
|
UBreakIteratorType
UBreakIteratorType
The possible types of text boundaries.
Properties | |
---|---|
UBRK_CHARACTER
|
Character breaks. |
UBRK_COUNT
|
One more than the highest normal UBreakIteratorType value. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
UBRK_LINE
|
Line breaks. |
UBRK_SENTENCE
|
Sentence breaks. |
UBRK_TITLE
|
Title Case breaks The iterator created using this type locates title boundaries as described for Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration, please use Word Boundary iterator. Deprecated. ICU 2.8 Use the word break iterator for titlecasing for Unicode 4 and later. |
UBRK_WORD
|
Word breaks. |
UCPMapRangeOption
UCPMapRangeOption
Selectors for how ucpmap_getRange() etc.
should report value ranges overlapping with surrogates. Most users should use UCPMAP_RANGE_NORMAL.
See also:ucpmap_getRange See also:ucptrie_getRange See also:umutablecptrie_getRange
Properties | |
---|---|
UCPMAP_RANGE_FIXED_ALL_SURROGATES
|
ucpmap_getRange() enumerates all same-value ranges as stored in the map, except that all surrogates (U+D800..U+DFFF) are treated as having the surrogateValue, which is passed to getRange() as a separate parameter. The surrogateValue is not transformed via filter(). See U_IS_SURROGATE(c). Most users should use UCPMAP_RANGE_NORMAL instead. This option is useful for maps that map surrogate code units to special values optimized for UTF-16 string processing or for special error behavior for unpaired surrogates, but those values are not to be associated with the lead surrogate code points. |
UCPMAP_RANGE_FIXED_LEAD_SURROGATES
|
ucpmap_getRange() enumerates all same-value ranges as stored in the map, except that lead surrogates (U+D800..U+DBFF) are treated as having the surrogateValue, which is passed to getRange() as a separate parameter. The surrogateValue is not transformed via filter(). See U_IS_LEAD(c). Most users should use UCPMAP_RANGE_NORMAL instead. This option is useful for maps that map surrogate code units to special values optimized for UTF-16 string processing or for special error behavior for unpaired surrogates, but those values are not to be associated with the lead surrogate code points. |
UCPMAP_RANGE_NORMAL
|
ucpmap_getRange() enumerates all same-value ranges as stored in the map. Most users should use this option. |
UCharCategory
UCharCategory
Data for enumerated Unicode general category types.
See http://www.unicode.org/Public/UNIDATA/UnicodeData.html .
Properties | |
---|---|
U_CHAR_CATEGORY_COUNT
|
One higher than the last enum UCharCategory constant. This numeric value is stable (will not change), see http://www.unicode.org/policies/stability_policy.html#Property_Value |
U_COMBINING_SPACING_MARK
|
Mc. |
U_CONNECTOR_PUNCTUATION
|
Pc. |
U_CONTROL_CHAR
|
Cc. |
U_CURRENCY_SYMBOL
|
Sc. |
U_DASH_PUNCTUATION
|
Pd. |
U_DECIMAL_DIGIT_NUMBER
|
Nd. |
U_ENCLOSING_MARK
|
Me. |
U_END_PUNCTUATION
|
Pe. |
U_FINAL_PUNCTUATION
|
Pf. |
U_FORMAT_CHAR
|
Cf. |
U_GENERAL_OTHER_TYPES
|
Cn "Other, Not Assigned (no characters in [UnicodeData.txt] have this property)" (same as U_UNASSIGNED!) |
U_INITIAL_PUNCTUATION
|
Pi. |
U_LETTER_NUMBER
|
Nl. |
U_LINE_SEPARATOR
|
Zl. |
U_LOWERCASE_LETTER
|
Ll. |
U_MATH_SYMBOL
|
Sm. |
U_MODIFIER_LETTER
|
Lm. |
U_MODIFIER_SYMBOL
|
Sk. |
U_NON_SPACING_MARK
|
Mn. |
U_OTHER_LETTER
|
Lo. |
U_OTHER_NUMBER
|
No. |
U_OTHER_PUNCTUATION
|
Po. |
U_OTHER_SYMBOL
|
So. |
U_PARAGRAPH_SEPARATOR
|
Zp. |
U_PRIVATE_USE_CHAR
|
Co. |
U_SPACE_SEPARATOR
|
Zs. |
U_START_PUNCTUATION
|
Ps. |
U_SURROGATE
|
Cs. |
U_TITLECASE_LETTER
|
Lt. |
U_UNASSIGNED
|
Non-category for unassigned and non-character code points. |
U_UPPERCASE_LETTER
|
Lu. |
UCharDirection
UCharDirection
This specifies the language directional property of a character set.
Properties | |
---|---|
U_ARABIC_NUMBER
|
AN. |
U_BLOCK_SEPARATOR
|
B. |
U_BOUNDARY_NEUTRAL
|
BN. |
U_CHAR_DIRECTION_COUNT
|
One more than the highest UCharDirection value. The highest value is available via u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS). Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
U_COMMON_NUMBER_SEPARATOR
|
CS. |
U_DIR_NON_SPACING_MARK
|
NSM. |
U_EUROPEAN_NUMBER
|
EN. |
U_EUROPEAN_NUMBER_SEPARATOR
|
ES. |
U_EUROPEAN_NUMBER_TERMINATOR
|
ET. |
U_FIRST_STRONG_ISOLATE
|
FSI. |
U_LEFT_TO_RIGHT
|
L. |
U_LEFT_TO_RIGHT_EMBEDDING
|
LRE. |
U_LEFT_TO_RIGHT_ISOLATE
|
LRI. |
U_LEFT_TO_RIGHT_OVERRIDE
|
LRO. |
U_OTHER_NEUTRAL
|
ON. |
U_POP_DIRECTIONAL_FORMAT
|
PDF. |
U_POP_DIRECTIONAL_ISOLATE
|
PDI. |
U_RIGHT_TO_LEFT
|
R. |
U_RIGHT_TO_LEFT_ARABIC
|
AL. |
U_RIGHT_TO_LEFT_EMBEDDING
|
RLE. |
U_RIGHT_TO_LEFT_ISOLATE
|
RLI. |
U_RIGHT_TO_LEFT_OVERRIDE
|
RLO. |
U_SEGMENT_SEPARATOR
|
S. |
U_WHITE_SPACE_NEUTRAL
|
WS. |
UCharNameChoice
UCharNameChoice
Selector constants for u_charName().
u_charName() returns the "modern" name of a Unicode character; or the name that was defined in Unicode version 1.0, before the Unicode standard merged with ISO-10646; or an "extended" name that gives each Unicode code point a unique name.
See also: u_charName
Properties | |
---|---|
U_CHAR_NAME_ALIAS
|
Corrected name from NameAliases.txt. |
U_CHAR_NAME_CHOICE_COUNT
|
One more than the highest normal UCharNameChoice value. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
U_EXTENDED_CHAR_NAME
|
Standard or synthetic character name. |
U_UNICODE_10_CHAR_NAME
|
The Unicode_1_Name property value which is of little practical value. Beginning with ICU 49, ICU APIs return an empty string for this name choice. Deprecated. ICU 49 |
U_UNICODE_CHAR_NAME
|
Unicode character name (Name property). |
UColAttribute
UColAttribute
Attributes that collation service understands.
All the attributes can take UCOL_DEFAULT value, as well as the values specific to each one.
Properties | |
---|---|
UCOL_ALTERNATE_HANDLING
|
Attribute for handling variable elements. Acceptable values are UCOL_NON_IGNORABLE (default) which treats all the codepoints with non-ignorable primary weights in the same way, and UCOL_SHIFTED which causes codepoints with primary weights that are equal or below the variable top value to be ignored on primary level and moved to the quaternary level. |
UCOL_ATTRIBUTE_COUNT
|
One more than the highest normal UColAttribute value. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
UCOL_CASE_FIRST
|
Controls the ordering of upper and lower case letters. Acceptable values are UCOL_OFF (default), which orders upper and lower case letters in accordance to their tertiary weights, UCOL_UPPER_FIRST which forces upper case letters to sort before lower case letters, and UCOL_LOWER_FIRST which does the opposite. |
UCOL_CASE_LEVEL
|
Controls whether an extra case level (positioned before the third level) is generated or not. Acceptable values are UCOL_OFF (default), when case level is not generated, and UCOL_ON which causes the case level to be generated. Contents of the case level are affected by the value of UCOL_CASE_FIRST attribute. A simple way to ignore accent differences in a string is to set the strength to UCOL_PRIMARY and enable case level. |
UCOL_DECOMPOSITION_MODE
|
An alias for UCOL_NORMALIZATION_MODE attribute. |
UCOL_FRENCH_COLLATION
|
Attribute for direction of secondary weights - used in Canadian French. Acceptable values are UCOL_ON, which results in secondary weights being considered backwards and UCOL_OFF which treats secondary weights in the order they appear. |
UCOL_HIRAGANA_QUATERNARY_MODE
|
When turned on, this attribute positions Hiragana before all non-ignorables on quaternary level This is a sneaky way to produce JIS sort order. This attribute was an implementation detail of the CLDR Japanese tailoring. Since ICU 50, this attribute is not settable any more via API functions. Since CLDR 25/ICU 53, explicit quaternary relations are used to achieve the same Japanese sort order. Deprecated. ICU 50 Implementation detail, cannot be set via API, was removed from implementation. |
UCOL_NORMALIZATION_MODE
|
Controls whether the normalization check and necessary normalizations are performed. When set to UCOL_OFF (default) no normalization check is performed. The correctness of the result is guaranteed only if the input data is in so-called FCD form (see users manual for more info). When set to UCOL_ON, an incremental check is performed to see whether the input data is in the FCD form. If the data is not in the FCD form, incremental NFD normalization is performed. |
UCOL_NUMERIC_COLLATION
|
When turned on, this attribute makes substrings of digits sort according to their numeric values. This is a way to get '100' to sort AFTER '2'. Note that the longest digit substring that can be treated as a single unit is 254 digits (not counting leading zeros). If a digit substring is longer than that, the digits beyond the limit will be treated as a separate digit substring. A "digit" in this sense is a code point with General_Category=Nd, which does not include circled numbers, roman numerals, etc. Only a contiguous digit substring is considered, that is, non-negative integers without separators. There is no support for plus/minus signs, decimals, exponents, etc. |
UCOL_STRENGTH
|
The strength attribute. Can be either UCOL_PRIMARY, UCOL_SECONDARY, UCOL_TERTIARY, UCOL_QUATERNARY or UCOL_IDENTICAL. The usual strength for most locales (except Japanese) is tertiary. Quaternary strength is useful when combined with shifted setting for alternate handling attribute and for JIS X 4061 collation, when it is used to distinguish between Katakana and Hiragana. Otherwise, quaternary level is affected only by the number of non-ignorable code points in the string. Identical strength is rarely useful, as it amounts to codepoints of the NFD form of the string. |
UColAttributeValue
UColAttributeValue
Enum containing attribute values for controlling collation behavior.
Here are all the allowable values. Not every attribute can take every value. The only universal value is UCOL_DEFAULT, which resets the attribute value to the predefined value for that locale
Properties | |
---|---|
UCOL_ATTRIBUTE_VALUE_COUNT
|
One more than the highest normal UColAttributeValue value. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
UCOL_CE_STRENGTH_LIMIT
|
|
UCOL_DEFAULT
|
accepted by most attributes |
UCOL_DEFAULT_STRENGTH
|
Default collation strength. |
UCOL_IDENTICAL
|
Identical collation strength. |
UCOL_LOWER_FIRST
|
Valid for UCOL_CASE_FIRST - lower case sorts before upper case. |
UCOL_NON_IGNORABLE
|
Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be non ignorable |
UCOL_OFF
|
Turn the feature off - works for UCOL_FRENCH_COLLATION, UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE & UCOL_DECOMPOSITION_MODE. |
UCOL_ON
|
Turn the feature on - works for UCOL_FRENCH_COLLATION, UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE & UCOL_DECOMPOSITION_MODE. |
UCOL_PRIMARY
|
Primary collation strength. |
UCOL_QUATERNARY
|
Quaternary collation strength. |
UCOL_SECONDARY
|
Secondary collation strength. |
UCOL_SHIFTED
|
Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be shifted |
UCOL_STRENGTH_LIMIT
|
|
UCOL_TERTIARY
|
Tertiary collation strength. |
UCOL_UPPER_FIRST
|
upper case sorts before lower case |
UColBoundMode
UColBoundMode
enum that is taken by ucol_getBound API See below for explanation do not change the values assigned to the members of this enum.
Underlying code depends on them having these numbers
Properties | |
---|---|
UCOL_BOUND_LOWER
|
lower bound |
UCOL_BOUND_UPPER
|
upper bound that will match strings of exact size |
UCOL_BOUND_UPPER_LONG
|
upper bound that will match all the strings that have the same initial substring as the given string |
UCOL_BOUND_VALUE_COUNT
|
One more than the highest normal UColBoundMode value. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
UColReorderCode
UColReorderCode
Enum containing the codes for reordering segments of the collation table that are not script codes.
These reordering codes are to be used in conjunction with the script codes. See also:ucol_getReorderCodesSee also:ucol_setReorderCodesSee also:ucol_getEquivalentReorderCodesSee also:UScriptCode
Properties | |
---|---|
UCOL_REORDER_CODE_CURRENCY
|
Characters with the currency property. This is equivalent to the rule value "currency". |
UCOL_REORDER_CODE_DEFAULT
|
A special reordering code that is used to specify the default reordering codes for a locale. |
UCOL_REORDER_CODE_DIGIT
|
Characters with the digit property. This is equivalent to the rule value "digit". |
UCOL_REORDER_CODE_FIRST
|
The first entry in the enumeration of reordering groups. This is intended for use in range checking and enumeration of the reorder codes. |
UCOL_REORDER_CODE_LIMIT
|
One more than the highest normal UColReorderCode value. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
UCOL_REORDER_CODE_NONE
|
A special reordering code that is used to specify no reordering codes. |
UCOL_REORDER_CODE_OTHERS
|
A special reordering code that is used to specify all other codes used for reordering except for the codes lised as UColReorderCode values and those listed explicitly in a reordering. |
UCOL_REORDER_CODE_PUNCTUATION
|
Characters with the punctuation property. This is equivalent to the rule value "punct". |
UCOL_REORDER_CODE_SPACE
|
Characters with the space property. This is equivalent to the rule value "space". |
UCOL_REORDER_CODE_SYMBOL
|
Characters with the symbol property. This is equivalent to the rule value "symbol". |
UCollationResult
UCollationResult
UCOL_LESS is returned if source string is compared to be less than target string in the ucol_strcoll() method.
UCOL_EQUAL is returned if source string is compared to be equal to target string in the ucol_strcoll() method. UCOL_GREATER is returned if source string is compared to be greater than target string in the ucol_strcoll() method. See also:ucol_strcoll() Possible values for a comparison result
Properties | |
---|---|
UCOL_EQUAL
|
string a == string b |
UCOL_GREATER
|
string a > string b |
UCOL_LESS
|
string a < string b |
UDecompositionType
UDecompositionType
Decomposition Type constants.
See also: UCHAR_DECOMPOSITION_TYPE
Properties | |
---|---|
U_DT_CANONICAL
|
|
U_DT_CIRCLE
|
|
U_DT_COMPAT
|
|
U_DT_COUNT
|
One more than the highest normal UDecompositionType value. The highest value is available via u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE). Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
U_DT_FINAL
|
|
U_DT_FONT
|
|
U_DT_FRACTION
|
|
U_DT_INITIAL
|
|
U_DT_ISOLATED
|
|
U_DT_MEDIAL
|
|
U_DT_NARROW
|
|
U_DT_NOBREAK
|
|
U_DT_NONE
|
|
U_DT_SMALL
|
|
U_DT_SQUARE
|
|
U_DT_SUB
|
|
U_DT_SUPER
|
|
U_DT_VERTICAL
|
|
U_DT_WIDE
|
UDialectHandling
UDialectHandling
UDisplayContext
UDisplayContext
Display context settings.
Note, the specific numeric values are internal and may change.
UDisplayContextType
UDisplayContextType
Display context types, for getting values of a particular setting.
Note, the specific numeric values are internal and may change.
UEastAsianWidth
UEastAsianWidth
East Asian Width constants.
See also: UCHAR_EAST_ASIAN_WIDTH See also: u_getIntPropertyValue
Properties | |
---|---|
U_EA_AMBIGUOUS
|
|
U_EA_COUNT
|
One more than the highest normal UEastAsianWidth value. The highest value is available via u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH). Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
U_EA_FULLWIDTH
|
|
U_EA_HALFWIDTH
|
|
U_EA_NARROW
|
|
U_EA_NEUTRAL
|
|
U_EA_WIDE
|
UErrorCode
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Initialize the UErrorCode with U_ZERO_ERROR, and check for success or failure using U_SUCCESS() or U_FAILURE():
UErrorCode errorCode = U_ZERO_ERROR; // call ICU API that needs an error code parameter. if (U_FAILURE(errorCode)) { // An error occurred. Handle it here. }
C++ code should use icu::ErrorCode, available in unicode/errorcode.h, or a suitable subclass.
For more information, see: https://unicode-org.github.io/icu/userguide/dev/codingguidelines#details-about-icu-error-codes
Note: By convention, ICU functions that take a reference (C++) or a pointer (C) to a UErrorCode first test:
if (U_FAILURE(errorCode)) { return immediately; }
so that in a chain of such functions the first one that sets an error code causes the following ones to not perform any operations.
Properties | |
---|---|
U_AMBIGUOUS_ALIAS_WARNING
|
This converter alias can go to different converter implementations. |
U_ARGUMENT_TYPE_MISMATCH
|
Argument name and argument index mismatch in MessageFormat functions. |
U_BAD_VARIABLE_DEFINITION
|
Missing '$' or duplicate variable name. |
U_BRK_ASSIGN_ERROR
|
Syntax error in RBBI rule assignment statement. |
U_BRK_ERROR_LIMIT
|
One more than the highest normal BreakIterator error code. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
U_BRK_ERROR_START
|
Start of codes indicating Break Iterator failures. |
U_BRK_HEX_DIGITS_EXPECTED
|
Hex digits expected as part of a escaped char in a rule. |
U_BRK_INIT_ERROR
|
Initialization failure. Probable missing ICU Data. |
U_BRK_INTERNAL_ERROR
|
An internal error (bug) was detected. |
U_BRK_MALFORMED_RULE_TAG
|
The {nnn} tag on a rule is malformed. |
U_BRK_MISMATCHED_PAREN
|
Mis-matched parentheses in an RBBI rule. |
U_BRK_NEW_LINE_IN_QUOTED_STRING
|
Missing closing quote in an RBBI rule. |
U_BRK_RULE_EMPTY_SET
|
Rule contains an empty Unicode Set. |
U_BRK_RULE_SYNTAX
|
Syntax error in RBBI rule. |
U_BRK_SEMICOLON_EXPECTED
|
Missing ';' at the end of a RBBI rule. |
U_BRK_UNCLOSED_SET
|
UnicodeSet writing an RBBI rule missing a closing ']'. |
U_BRK_UNDEFINED_VARIABLE
|
Use of an undefined $Variable in an RBBI rule. |
U_BRK_UNRECOGNIZED_OPTION
|
!!option in RBBI rules not recognized. |
U_BRK_VARIABLE_REDFINITION
|
RBBI rule $Variable redefined. |
U_BUFFER_OVERFLOW_ERROR
|
A result would not fit in the supplied buffer. |
U_CE_NOT_FOUND_ERROR
|
Currently used only while setting variable top, but can be used generally. |
U_COLLATOR_VERSION_MISMATCH
|
Collator version is not compatible with the base version. |
U_DECIMAL_NUMBER_SYNTAX_ERROR
|
Decimal number syntax error. |
U_DEFAULT_KEYWORD_MISSING
|
Missing DEFAULT rule in plural rules. |
U_DIFFERENT_UCA_VERSION
|
ucol_open encountered a mismatch between UCA version and collator image version, so the collator was constructed from rules. No impact to further function |
U_DUPLICATE_KEYWORD
|
Duplicate keyword in PluralFormat. |
U_ENUM_OUT_OF_SYNC_ERROR
|
UEnumeration out of sync with underlying collection. |
U_ERROR_LIMIT
|
One more than the highest normal error code. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
U_ERROR_WARNING_LIMIT
|
One more than the highest normal UErrorCode warning value. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
U_ERROR_WARNING_START
|
Start of information results (semantically successful) |
U_FILE_ACCESS_ERROR
|
The requested file cannot be found. |
U_FMT_PARSE_ERROR_LIMIT
|
One more than the highest normal formatting API error code. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
U_FMT_PARSE_ERROR_START
|
Start of format library errors. |
U_FORMAT_INEXACT_ERROR
|
Cannot format a number exactly and rounding mode is ROUND_UNNECESSARY. |
U_IDNA_ACE_PREFIX_ERROR
|
|
U_IDNA_CHECK_BIDI_ERROR
|
|
U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR
|
|
U_IDNA_ERROR_LIMIT
|
One more than the highest normal IDNA error code. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
U_IDNA_ERROR_START
|
|
U_IDNA_LABEL_TOO_LONG_ERROR
|
|
U_IDNA_PROHIBITED_ERROR
|
|
U_IDNA_STD3_ASCII_RULES_ERROR
|
|
U_IDNA_UNASSIGNED_ERROR
|
|
U_IDNA_VERIFICATION_ERROR
|
|
U_IDNA_ZERO_LENGTH_LABEL_ERROR
|
|
U_ILLEGAL_ARGUMENT_ERROR
|
Start of codes indicating failure. |
U_ILLEGAL_CHARACTER
|
A special character is outside its allowed context. |
U_ILLEGAL_CHAR_FOUND
|
Character conversion: Illegal input sequence/combination of input units. |
U_ILLEGAL_CHAR_IN_SEGMENT
|
UNUSED as of ICU 2.4. |
U_ILLEGAL_ESCAPE_SEQUENCE
|
ISO-2022 illegal escape sequence. |
U_ILLEGAL_PAD_POSITION
|
Pad symbol misplaced in number pattern. |
U_INDEX_OUTOFBOUNDS_ERROR
|
Trying to access the index that is out of bounds. |
U_INPUT_TOO_LONG_ERROR
|
The input is impractically long for an operation. It is rejected because it may lead to problems such as excessive processing time, stack depth, or heap memory requirements. |
U_INTERNAL_PROGRAM_ERROR
|
Indicates a bug in the library code. |
U_INTERNAL_TRANSLITERATOR_ERROR
|
Internal transliterator system error. |
U_INVALID_CHAR_FOUND
|
Character conversion: Unmappable input sequence. In other APIs: Invalid character. |
U_INVALID_FORMAT_ERROR
|
Data format is not what is expected. |
U_INVALID_FUNCTION
|
A "&fn()" rule specifies an unknown transliterator. |
U_INVALID_ID
|
A "::id" rule specifies an unknown transliterator. |
U_INVALID_PROPERTY_PATTERN
|
UNUSED as of ICU 2.4. |
U_INVALID_RBT_SYNTAX
|
A "::id" rule was passed to the RuleBasedTransliterator parser. |
U_INVALID_STATE_ERROR
|
Requested operation can not be completed with ICU in its current state. |
U_INVALID_TABLE_FILE
|
Conversion table file not found. |
U_INVALID_TABLE_FORMAT
|
Conversion table file found, but corrupted. |
U_INVARIANT_CONVERSION_ERROR
|
Unable to convert a UChar* string to char* with the invariant converter. |
U_MALFORMED_EXPONENTIAL_PATTERN
|
Grouping symbol in exponent pattern. |
U_MALFORMED_PRAGMA
|
A 'use' pragma is invalid. |
U_MALFORMED_RULE
|
Elements of a rule are misplaced. |
U_MALFORMED_SET
|
A UnicodeSet pattern is invalid. |
U_MALFORMED_SYMBOL_REFERENCE
|
UNUSED as of ICU 2.4. |
U_MALFORMED_UNICODE_ESCAPE
|
A Unicode escape pattern is invalid. |
U_MALFORMED_VARIABLE_DEFINITION
|
A variable definition is invalid. |
U_MALFORMED_VARIABLE_REFERENCE
|
A variable reference is invalid. |
U_MEMORY_ALLOCATION_ERROR
|
Memory allocation error. |
U_MESSAGE_PARSE_ERROR
|
Unable to parse a message (message format) |
U_MISMATCHED_SEGMENT_DELIMITERS
|
UNUSED as of ICU 2.4. |
U_MISPLACED_ANCHOR_START
|
A start anchor appears at an illegal position. |
U_MISPLACED_COMPOUND_FILTER
|
A compound filter is in an invalid location. |
U_MISPLACED_CURSOR_OFFSET
|
A cursor offset occurs at an illegal position. |
U_MISPLACED_QUANTIFIER
|
A quantifier appears after a segment close delimiter. |
U_MISSING_OPERATOR
|
A rule contains no operator. |
U_MISSING_RESOURCE_ERROR
|
The requested resource cannot be found. |
U_MISSING_SEGMENT_CLOSE
|
UNUSED as of ICU 2.4. |
U_MULTIPLE_ANTE_CONTEXTS
|
More than one ante context. |
U_MULTIPLE_COMPOUND_FILTERS
|
More than one compound filter. |
U_MULTIPLE_CURSORS
|
More than one cursor. |
U_MULTIPLE_DECIMAL_SEPARATORS
|
More than one decimal separator in number pattern. |
U_MULTIPLE_DECIMAL_SEPERATORS
|
Typo: kept for backward compatibility. Use U_MULTIPLE_DECIMAL_SEPARATORS |
U_MULTIPLE_EXPONENTIAL_SYMBOLS
|
More than one exponent symbol in number pattern. |
U_MULTIPLE_PAD_SPECIFIERS
|
More than one pad symbol in number pattern. |
U_MULTIPLE_PERCENT_SYMBOLS
|
More than one percent symbol in number pattern. |
U_MULTIPLE_PERMILL_SYMBOLS
|
More than one permill symbol in number pattern. |
U_MULTIPLE_POST_CONTEXTS
|
More than one post context. |
U_NO_SPACE_AVAILABLE
|
No space available for in-buffer expansion for Arabic shaping. |
U_NO_WRITE_PERMISSION
|
Attempt to modify read-only or constant data. |
U_NUMBER_ARG_OUTOFBOUNDS_ERROR
|
The argument to a NumberFormatter helper method was out of bounds; the bounds are usually 0 to 999. |
U_NUMBER_SKELETON_SYNTAX_ERROR
|
The number skeleton passed to C++ NumberFormatter or C UNumberFormatter was invalid or contained a syntax error. |
U_PARSE_ERROR
|
Equivalent to Java ParseException. |
U_PARSE_ERROR_LIMIT
|
One more than the highest normal Transliterator error code. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
U_PARSE_ERROR_START
|
Start of Transliterator errors. |
U_PATTERN_SYNTAX_ERROR
|
Syntax error in format pattern. |
U_PLUGIN_CHANGED_LEVEL_WARNING
|
A plugin caused a level change. May not be an error, but later plugins may not load. |
U_PLUGIN_DIDNT_SET_LEVEL
|
The plugin didn't call uplug_setPlugLevel in response to a QUERY. |
U_PLUGIN_ERROR_LIMIT
|
One more than the highest normal plug-in error code. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
U_PLUGIN_ERROR_START
|
Start of codes indicating plugin failures. |
U_PLUGIN_TOO_HIGH
|
The plugin's level is too high to be loaded right now. |
U_PRIMARY_TOO_LONG_ERROR
|
User tried to set variable top to a primary that is longer than two bytes. |
U_REGEX_BAD_ESCAPE_SEQUENCE
|
Unrecognized backslash escape sequence in pattern. |
U_REGEX_BAD_INTERVAL
|
Error in {min,max} interval. |
U_REGEX_ERROR_LIMIT
|
One more than the highest normal regular expression error code. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
U_REGEX_ERROR_START
|
Start of codes indicating Regexp failures. |
U_REGEX_INTERNAL_ERROR
|
An internal error (bug) was detected. |
U_REGEX_INVALID_BACK_REF
|
Back-reference to a non-existent capture group. |
U_REGEX_INVALID_CAPTURE_GROUP_NAME
|
Invalid capture group name. |
U_REGEX_INVALID_FLAG
|
Invalid value for match mode flags. |
U_REGEX_INVALID_RANGE
|
In a character range [x-y], x is greater than y. |
U_REGEX_INVALID_STATE
|
RegexMatcher in invalid state for requested operation. |
U_REGEX_LOOK_BEHIND_LIMIT
|
Look-Behind pattern matches must have a bounded maximum length. |
U_REGEX_MAX_LT_MIN
|
In {min,max}, max is less than min. |
U_REGEX_MISMATCHED_PAREN
|
Incorrectly nested parentheses in regexp pattern. |
U_REGEX_MISSING_CLOSE_BRACKET
|
Missing closing bracket on a bracket expression. |
U_REGEX_NUMBER_TOO_BIG
|
Decimal number is too large. |
U_REGEX_OCTAL_TOO_BIG
|
Octal character constants must be <= 0377. Deprecated. ICU 54. This error cannot occur. |
U_REGEX_PATTERN_TOO_BIG
|
Pattern exceeds limits on size or complexity. |
U_REGEX_PROPERTY_SYNTAX
|
Incorrect Unicode property. |
U_REGEX_RULE_SYNTAX
|
Syntax error in regexp pattern. |
U_REGEX_SET_CONTAINS_STRING
|
Regexps cannot have UnicodeSets containing strings. |
U_REGEX_STACK_OVERFLOW
|
Regular expression backtrack stack overflow. |
U_REGEX_STOPPED_BY_CALLER
|
Matching operation aborted by user callback fn. |
U_REGEX_TIME_OUT
|
Maximum allowed match time exceeded. |
U_REGEX_UNIMPLEMENTED
|
Use of regexp feature that is not yet implemented. |
U_RESOURCE_TYPE_MISMATCH
|
an operation is requested over a resource that does not support it |
U_RULE_MASK_ERROR
|
A rule is hidden by an earlier more general rule. |
U_SAFECLONE_ALLOCATED_WARNING
|
A SafeClone operation required allocating memory (informational only) |
U_SORT_KEY_TOO_SHORT_WARNING
|
Number of levels requested in getBound is higher than the number of levels in the sort key. |
U_STANDARD_ERROR_LIMIT
|
One more than the highest standard error code. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
U_STATE_OLD_WARNING
|
ICU has to use compatibility layer to construct the service. Expect performance/memory usage degradation. Consider upgrading |
U_STATE_TOO_OLD_ERROR
|
ICU cannot construct a service from this state, as it is no longer supported. |
U_STRINGPREP_CHECK_BIDI_ERROR
|
|
U_STRINGPREP_PROHIBITED_ERROR
|
|
U_STRINGPREP_UNASSIGNED_ERROR
|
|
U_STRING_NOT_TERMINATED_WARNING
|
An output string could not be NUL-terminated because output length==destCapacity. |
U_TOO_MANY_ALIASES_ERROR
|
There are too many aliases in the path to the requested resource. It is very possible that a circular alias definition has occurred |
U_TRAILING_BACKSLASH
|
A dangling backslash. |
U_TRUNCATED_CHAR_FOUND
|
Character conversion: Incomplete input sequence. |
U_UNCLOSED_SEGMENT
|
A closing ')' is missing. |
U_UNDEFINED_KEYWORD
|
Undefined Plural keyword. |
U_UNDEFINED_SEGMENT_REFERENCE
|
A segment reference does not correspond to a defined segment. |
U_UNDEFINED_VARIABLE
|
A variable reference does not correspond to a defined variable. |
U_UNEXPECTED_TOKEN
|
Syntax error in format pattern. |
U_UNMATCHED_BRACES
|
Braces do not match in message pattern. |
U_UNQUOTED_SPECIAL
|
A special character was not quoted or escaped. |
U_UNSUPPORTED_ATTRIBUTE
|
UNUSED as of ICU 2.4. |
U_UNSUPPORTED_ERROR
|
Requested operation not supported in current context. |
U_UNSUPPORTED_ESCAPE_SEQUENCE
|
ISO-2022 unsupported escape sequence. |
U_UNSUPPORTED_PROPERTY
|
UNUSED as of ICU 2.4. |
U_UNTERMINATED_QUOTE
|
A closing single quote is missing. |
U_USELESS_COLLATOR_ERROR
|
Collator is options only and no base is specified. |
U_USING_DEFAULT_WARNING
|
A resource bundle lookup returned a result from the root locale (not an error) |
U_USING_FALLBACK_WARNING
|
A resource bundle lookup returned a fallback result (not an error) |
U_VARIABLE_RANGE_EXHAUSTED
|
Too many stand-ins generated for the given variable range. |
U_VARIABLE_RANGE_OVERLAP
|
The variable range overlaps characters used in rules. |
U_ZERO_ERROR
|
No error, no warning. |
UGraphemeClusterBreak
UGraphemeClusterBreak
Grapheme Cluster Break constants.
See also: UCHAR_GRAPHEME_CLUSTER_BREAK
Properties | |
---|---|
U_GCB_CONTROL
|
|
U_GCB_COUNT
|
One more than the highest normal UGraphemeClusterBreak value. The highest value is available via u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK). Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
U_GCB_CR
|
|
U_GCB_EXTEND
|
|
U_GCB_E_BASE
|
|
U_GCB_E_BASE_GAZ
|
|
U_GCB_E_MODIFIER
|
|
U_GCB_GLUE_AFTER_ZWJ
|
|
U_GCB_L
|
|
U_GCB_LF
|
|
U_GCB_LV
|
|
U_GCB_LVT
|
|
U_GCB_OTHER
|
|
U_GCB_PREPEND
|
|
U_GCB_REGIONAL_INDICATOR
|
|
U_GCB_SPACING_MARK
|
|
U_GCB_T
|
|
U_GCB_V
|
|
U_GCB_ZWJ
|
UHangulSyllableType
UHangulSyllableType
Hangul Syllable Type constants.
See also: UCHAR_HANGUL_SYLLABLE_TYPE
Properties | |
---|---|
U_HST_COUNT
|
One more than the highest normal UHangulSyllableType value. The highest value is available via u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE). Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
U_HST_LEADING_JAMO
|
|
U_HST_LVT_SYLLABLE
|
|
U_HST_LV_SYLLABLE
|
|
U_HST_NOT_APPLICABLE
|
|
U_HST_TRAILING_JAMO
|
|
U_HST_VOWEL_JAMO
|
UIndicPositionalCategory
UIndicPositionalCategory
Indic Positional Category constants.
See also: UCHAR_INDIC_POSITIONAL_CATEGORY
UIndicSyllabicCategory
UIndicSyllabicCategory
Indic Syllabic Category constants.
See also: UCHAR_INDIC_SYLLABIC_CATEGORY
UJoiningGroup
UJoiningGroup
Joining Group constants.
See also: UCHAR_JOINING_GROUP
Properties | |
---|---|
U_JG_AFRICAN_FEH
|
|
U_JG_AFRICAN_NOON
|
|
U_JG_AFRICAN_QAF
|
|
U_JG_AIN
|
|
U_JG_ALAPH
|
|
U_JG_ALEF
|
|
U_JG_BEH
|
|
U_JG_BETH
|
|
U_JG_BURUSHASKI_YEH_BARREE
|
|
U_JG_COUNT
|
One more than the highest normal UJoiningGroup value. The highest value is available via u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP). Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
U_JG_DAL
|
|
U_JG_DALATH_RISH
|
|
U_JG_E
|
|
U_JG_FARSI_YEH
|
|
U_JG_FE
|
|
U_JG_FEH
|
|
U_JG_FINAL_SEMKATH
|
|
U_JG_GAF
|
|
U_JG_GAMAL
|
|
U_JG_HAH
|
|
U_JG_HAMZA_ON_HEH_GOAL
|
|
U_JG_HANIFI_ROHINGYA_KINNA_YA
|
|
U_JG_HANIFI_ROHINGYA_PA
|
|
U_JG_HE
|
|
U_JG_HEH
|
|
U_JG_HEH_GOAL
|
|
U_JG_HETH
|
|
U_JG_KAF
|
|
U_JG_KAPH
|
|
U_JG_KHAPH
|
|
U_JG_KNOTTED_HEH
|
|
U_JG_LAM
|
|
U_JG_LAMADH
|
|
U_JG_MALAYALAM_BHA
|
|
U_JG_MALAYALAM_JA
|
|
U_JG_MALAYALAM_LLA
|
|
U_JG_MALAYALAM_LLLA
|
|
U_JG_MALAYALAM_NGA
|
|
U_JG_MALAYALAM_NNA
|
|
U_JG_MALAYALAM_NNNA
|
|
U_JG_MALAYALAM_NYA
|
|
U_JG_MALAYALAM_RA
|
|
U_JG_MALAYALAM_SSA
|
|
U_JG_MALAYALAM_TTA
|
|
U_JG_MANICHAEAN_ALEPH
|
|
U_JG_MANICHAEAN_AYIN
|
|
U_JG_MANICHAEAN_BETH
|
|
U_JG_MANICHAEAN_DALETH
|
|
U_JG_MANICHAEAN_DHAMEDH
|
|
U_JG_MANICHAEAN_FIVE
|
|
U_JG_MANICHAEAN_GIMEL
|
|
U_JG_MANICHAEAN_HETH
|
|
U_JG_MANICHAEAN_HUNDRED
|
|
U_JG_MANICHAEAN_KAPH
|
|
U_JG_MANICHAEAN_LAMEDH
|
|
U_JG_MANICHAEAN_MEM
|
|
U_JG_MANICHAEAN_NUN
|
|
U_JG_MANICHAEAN_ONE
|
|
U_JG_MANICHAEAN_PE
|
|
U_JG_MANICHAEAN_QOPH
|
|
U_JG_MANICHAEAN_RESH
|
|
U_JG_MANICHAEAN_SADHE
|
|
U_JG_MANICHAEAN_SAMEKH
|
|
U_JG_MANICHAEAN_TAW
|
|
U_JG_MANICHAEAN_TEN
|
|
U_JG_MANICHAEAN_TETH
|
|
U_JG_MANICHAEAN_THAMEDH
|
|
U_JG_MANICHAEAN_TWENTY
|
|
U_JG_MANICHAEAN_WAW
|
|
U_JG_MANICHAEAN_YODH
|
|
U_JG_MANICHAEAN_ZAYIN
|
|
U_JG_MEEM
|
|
U_JG_MIM
|
|
U_JG_NOON
|
|
U_JG_NO_JOINING_GROUP
|
|
U_JG_NUN
|
|
U_JG_NYA
|
|
U_JG_PE
|
|
U_JG_QAF
|
|
U_JG_QAPH
|
|
U_JG_REH
|
|
U_JG_REVERSED_PE
|
|
U_JG_ROHINGYA_YEH
|
|
U_JG_SAD
|
|
U_JG_SADHE
|
|
U_JG_SEEN
|
|
U_JG_SEMKATH
|
|
U_JG_SHIN
|
|
U_JG_STRAIGHT_WAW
|
|
U_JG_SWASH_KAF
|
|
U_JG_SYRIAC_WAW
|
|
U_JG_TAH
|
|
U_JG_TAW
|
|
U_JG_TEH_MARBUTA
|
|
U_JG_TEH_MARBUTA_GOAL
|
|
U_JG_TETH
|
|
U_JG_THIN_YEH
|
|
U_JG_VERTICAL_TAIL
|
|
U_JG_WAW
|
|
U_JG_YEH
|
|
U_JG_YEH_BARREE
|
|
U_JG_YEH_WITH_TAIL
|
|
U_JG_YUDH
|
|
U_JG_YUDH_HE
|
|
U_JG_ZAIN
|
|
U_JG_ZHAIN
|
UJoiningType
UJoiningType
Joining Type constants.
See also: UCHAR_JOINING_TYPE
Properties | |
---|---|
U_JT_COUNT
|
One more than the highest normal UJoiningType value. The highest value is available via u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE). Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
U_JT_DUAL_JOINING
|
|
U_JT_JOIN_CAUSING
|
|
U_JT_LEFT_JOINING
|
|
U_JT_NON_JOINING
|
|
U_JT_RIGHT_JOINING
|
|
U_JT_TRANSPARENT
|
ULayoutType
ULayoutType
ULineBreak
ULineBreak
Line Break constants.
See also: UCHAR_LINE_BREAK
Properties | |
---|---|
U_LB_ALPHABETIC
|
|
U_LB_AMBIGUOUS
|
|
U_LB_BREAK_AFTER
|
|
U_LB_BREAK_BEFORE
|
|
U_LB_BREAK_BOTH
|
|
U_LB_BREAK_SYMBOLS
|
|
U_LB_CARRIAGE_RETURN
|
|
U_LB_CLOSE_PARENTHESIS
|
|
U_LB_CLOSE_PUNCTUATION
|
|
U_LB_COMBINING_MARK
|
|
U_LB_COMPLEX_CONTEXT
|
|
U_LB_CONDITIONAL_JAPANESE_STARTER
|
|
U_LB_CONTINGENT_BREAK
|
|
U_LB_COUNT
|
One more than the highest normal ULineBreak value. The highest value is available via u_getIntPropertyMaxValue(UCHAR_LINE_BREAK). Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
U_LB_EXCLAMATION
|
|
U_LB_E_BASE
|
|
U_LB_E_MODIFIER
|
|
U_LB_GLUE
|
|
U_LB_H2
|
|
U_LB_H3
|
|
U_LB_HEBREW_LETTER
|
|
U_LB_HYPHEN
|
|
U_LB_IDEOGRAPHIC
|
|
U_LB_INFIX_NUMERIC
|
|
U_LB_INSEPARABLE
|
Renamed from the misspelled "inseperable" in Unicode 4.0.1/ICU 3.0. |
U_LB_INSEPERABLE
|
|
U_LB_JL
|
|
U_LB_JT
|
|
U_LB_JV
|
|
U_LB_LINE_FEED
|
|
U_LB_MANDATORY_BREAK
|
|
U_LB_NEXT_LINE
|
|
U_LB_NONSTARTER
|
|
U_LB_NUMERIC
|
|
U_LB_OPEN_PUNCTUATION
|
|
U_LB_POSTFIX_NUMERIC
|
|
U_LB_PREFIX_NUMERIC
|
|
U_LB_QUOTATION
|
|
U_LB_REGIONAL_INDICATOR
|
|
U_LB_SPACE
|
|
U_LB_SURROGATE
|
|
U_LB_UNKNOWN
|
|
U_LB_WORD_JOINER
|
|
U_LB_ZWJ
|
|
U_LB_ZWSPACE
|
ULineBreakTag
ULineBreakTag
Enum constants for the line break tags returned by getRuleStatus().
A range of values is defined for each category of word, to allow for further subdivisions of a category in future releases. Applications should check for tag values falling within the range, rather than for single individual values.
The numeric values of all of these constants are stable (will not change).
ULocAvailableType
ULocAvailableType
Types for uloc_getAvailableByType and uloc_countAvailableByType.
ULocDataLocaleType
ULocDataLocaleType
Constants for *_getLocale() Allow user to select whether she wants information on requested, valid or actual locale.
For example, a collator for "en_US_CALIFORNIA" was requested. In the current state of ICU (2.0), the requested locale is "en_US_CALIFORNIA", the valid locale is "en_US" (most specific locale supported by ICU) and the actual locale is "root" (the collation data comes unmodified from the UCA) The locale is considered supported by ICU if there is a core ICU bundle for that locale (although it may be empty).
Properties | |
---|---|
ULOC_ACTUAL_LOCALE
|
This is locale the data actually comes from. |
ULOC_DATA_LOCALE_TYPE_LIMIT
|
One more than the highest normal ULocDataLocaleType value. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
ULOC_REQUESTED_LOCALE
|
This is the requested locale. Deprecated. ICU 2.8 |
ULOC_VALID_LOCALE
|
This is the most specific locale supported by ICU. |
UNormalization2Mode
UNormalization2Mode
Constants for normalization modes.
For details about standard Unicode normalization forms and about the algorithms which are also used with custom mapping tables see http://www.unicode.org/unicode/reports/tr15/
Properties | |
---|---|
UNORM2_COMPOSE
|
Decomposition followed by composition. Same as standard NFC when using an "nfc" instance. Same as standard NFKC when using an "nfkc" instance. For details about standard Unicode normalization forms see http://www.unicode.org/unicode/reports/tr15/ |
UNORM2_COMPOSE_CONTIGUOUS
|
Compose only contiguously. Also known as "FCC" or "Fast C Contiguous". The result will often but not always be in NFC. The result will conform to FCD which is useful for processing. Not a standard Unicode normalization form. For details see http://www.unicode.org/notes/tn5/#FCC |
UNORM2_DECOMPOSE
|
Map, and reorder canonically. Same as standard NFD when using an "nfc" instance. Same as standard NFKD when using an "nfkc" instance. For details about standard Unicode normalization forms see http://www.unicode.org/unicode/reports/tr15/ |
UNORM2_FCD
|
"Fast C or D" form. If a string is in this form, then further decomposition without reordering would yield the same form as DECOMPOSE. Text in "Fast C or D" form can be processed efficiently with data tables that are "canonically closed", that is, that provide equivalent data for equivalent text, without having to be fully normalized. Not a standard Unicode normalization form. Not a unique form: Different FCD strings can be canonically equivalent. For details see http://www.unicode.org/notes/tn5/#FCD |
UNormalizationCheckResult
UNormalizationCheckResult
Result values for normalization quick check functions.
For details see http://www.unicode.org/reports/tr15/#Detecting_Normalization_Forms
UNumericType
UNumericType
Numeric Type constants.
See also: UCHAR_NUMERIC_TYPE
Properties | |
---|---|
U_NT_COUNT
|
One more than the highest normal UNumericType value. The highest value is available via u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE). Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
U_NT_DECIMAL
|
|
U_NT_DIGIT
|
|
U_NT_NONE
|
|
U_NT_NUMERIC
|
UProperty
UProperty
Selection constants for Unicode properties.
These constants are used in functions like u_hasBinaryProperty to select one of the Unicode properties.
The properties APIs are intended to reflect Unicode properties as defined in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR).
For details about the properties see UAX #44: Unicode Character Database (http://www.unicode.org/reports/tr44/).
Important: If ICU is built with UCD files from Unicode versions below, e.g., 3.2, then properties marked with "new in Unicode 3.2" are not or not fully available. Check u_getUnicodeVersion to be sure.
See also: u_hasBinaryProperty See also: u_getIntPropertyValue See also: u_getUnicodeVersion
Properties | |
---|---|
UCHAR_AGE
|
String property Age. Corresponds to u_charAge. |
UCHAR_ALPHABETIC
|
Binary property Alphabetic. Same as u_isUAlphabetic, different from u_isalpha. Lu+Ll+Lt+Lm+Lo+Nl+Other_Alphabetic |
UCHAR_ASCII_HEX_DIGIT
|
Binary property ASCII_Hex_Digit. 0-9 A-F a-f |
UCHAR_BASIC_EMOJI
|
Binary property of strings Basic_Emoji. |
UCHAR_BIDI_CLASS
|
Enumerated property Bidi_Class. Same as u_charDirection, returns UCharDirection values. |
UCHAR_BIDI_CONTROL
|
Binary property Bidi_Control. Format controls which have specific functions in the Bidi Algorithm. |
UCHAR_BIDI_MIRRORED
|
Binary property Bidi_Mirrored. Characters that may change display in RTL text. Same as u_isMirrored. See Bidi Algorithm, UTR 9. |
UCHAR_BIDI_MIRRORING_GLYPH
|
String property Bidi_Mirroring_Glyph. Corresponds to u_charMirror. |
UCHAR_BIDI_PAIRED_BRACKET
|
String property Bidi_Paired_Bracket (new in Unicode 6.3). Corresponds to u_getBidiPairedBracket. |
UCHAR_BIDI_PAIRED_BRACKET_TYPE
|
Enumerated property Bidi_Paired_Bracket_Type (new in Unicode 6.3). Used in UAX #9: Unicode Bidirectional Algorithm (http://www.unicode.org/reports/tr9/) Returns UBidiPairedBracketType values. |
UCHAR_BINARY_LIMIT
|
One more than the last constant for binary Unicode properties. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
UCHAR_BINARY_START
|
First constant for binary Unicode properties. |
UCHAR_BLOCK
|
Enumerated property Block. Same as ublock_getCode, returns UBlockCode values. |
UCHAR_CANONICAL_COMBINING_CLASS
|
Enumerated property Canonical_Combining_Class. Same as u_getCombiningClass, returns 8-bit numeric values. |
UCHAR_CASED
|
Binary property Cased. For Lowercase, Uppercase and Titlecase characters. |
UCHAR_CASE_FOLDING
|
String property Case_Folding. Corresponds to u_strFoldCase in ustring.h. |
UCHAR_CASE_IGNORABLE
|
Binary property Case_Ignorable. Used in context-sensitive case mappings. |
UCHAR_CASE_SENSITIVE
|
Binary property Case_Sensitive. Either the source of a case mapping or in the target of a case mapping. Not the same as the general category Cased_Letter. |
UCHAR_CHANGES_WHEN_CASEFOLDED
|
Binary property Changes_When_Casefolded. |
UCHAR_CHANGES_WHEN_CASEMAPPED
|
Binary property Changes_When_Casemapped. |
UCHAR_CHANGES_WHEN_LOWERCASED
|
Binary property Changes_When_Lowercased. |
UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED
|
Binary property Changes_When_NFKC_Casefolded. |
UCHAR_CHANGES_WHEN_TITLECASED
|
Binary property Changes_When_Titlecased. |
UCHAR_CHANGES_WHEN_UPPERCASED
|
Binary property Changes_When_Uppercased. |
UCHAR_DASH
|
Binary property Dash. Variations of dashes. |
UCHAR_DECOMPOSITION_TYPE
|
Enumerated property Decomposition_Type. Returns UDecompositionType values. |
UCHAR_DEFAULT_IGNORABLE_CODE_POINT
|
Binary property Default_Ignorable_Code_Point (new in Unicode 3.2). Ignorable in most processing. <2060..206F, FFF0..FFFB, E0000..E0FFF>+Other_Default_Ignorable_Code_Point+(Cf+Cc+Cs-White_Space) |
UCHAR_DEPRECATED
|
Binary property Deprecated (new in Unicode 3.2). The usage of deprecated characters is strongly discouraged. |
UCHAR_DIACRITIC
|
Binary property Diacritic. Characters that linguistically modify the meaning of another character to which they apply. |
UCHAR_DOUBLE_LIMIT
|
One more than the last constant for double Unicode properties. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
UCHAR_DOUBLE_START
|
First constant for double Unicode properties. |
UCHAR_EAST_ASIAN_WIDTH
|
Enumerated property East_Asian_Width. See http://www.unicode.org/reports/tr11/ Returns UEastAsianWidth values. |
UCHAR_EMOJI
|
Binary property Emoji. |
UCHAR_EMOJI_COMPONENT
|
Binary property Emoji_Component. |
UCHAR_EMOJI_KEYCAP_SEQUENCE
|
Binary property of strings Emoji_Keycap_Sequence. |
UCHAR_EMOJI_MODIFIER
|
Binary property Emoji_Modifier. |
UCHAR_EMOJI_MODIFIER_BASE
|
Binary property Emoji_Modifier_Base. |
UCHAR_EMOJI_PRESENTATION
|
Binary property Emoji_Presentation. |
UCHAR_EXTENDED_PICTOGRAPHIC
|
Binary property Extended_Pictographic. |
UCHAR_EXTENDER
|
Binary property Extender. Extend the value or shape of a preceding alphabetic character, e.g., length and iteration marks. |
UCHAR_FULL_COMPOSITION_EXCLUSION
|
Binary property Full_Composition_Exclusion. CompositionExclusions.txt+Singleton Decompositions+ Non-Starter Decompositions. |
UCHAR_GENERAL_CATEGORY
|
Enumerated property General_Category. Same as u_charType, returns UCharCategory values. |
UCHAR_GENERAL_CATEGORY_MASK
|
Bitmask property General_Category_Mask. This is the General_Category property returned as a bit mask. When used in u_getIntPropertyValue(c), same as U_MASK(u_charType(c)), returns bit masks for UCharCategory values where exactly one bit is set. When used with u_getPropertyValueName() and u_getPropertyValueEnum(), a multi-bit mask is used for sets of categories like "Letters". Mask values should be cast to uint32_t. |
UCHAR_GRAPHEME_BASE
|
Binary property Grapheme_Base (new in Unicode 3.2). For programmatic determination of grapheme cluster boundaries. [0..10FFFF]-Cc-Cf-Cs-Co-Cn-Zl-Zp-Grapheme_Link-Grapheme_Extend-CGJ |
UCHAR_GRAPHEME_CLUSTER_BREAK
|
Enumerated property Grapheme_Cluster_Break (new in Unicode 4.1). Used in UAX #29: Text Boundaries (http://www.unicode.org/reports/tr29/) Returns UGraphemeClusterBreak values. |
UCHAR_GRAPHEME_EXTEND
|
Binary property Grapheme_Extend (new in Unicode 3.2). For programmatic determination of grapheme cluster boundaries. Me+Mn+Mc+Other_Grapheme_Extend-Grapheme_Link-CGJ |
UCHAR_GRAPHEME_LINK
|
Binary property Grapheme_Link (new in Unicode 3.2). For programmatic determination of grapheme cluster boundaries. |
UCHAR_HANGUL_SYLLABLE_TYPE
|
Enumerated property Hangul_Syllable_Type, new in Unicode 4. Returns UHangulSyllableType values. |
UCHAR_HEX_DIGIT
|
Binary property Hex_Digit. Characters commonly used for hexadecimal numbers. |
UCHAR_HYPHEN
|
Binary property Hyphen. Dashes used to mark connections between pieces of words, plus the Katakana middle dot. |
UCHAR_IDEOGRAPHIC
|
Binary property Ideographic. CJKV ideographs. |
UCHAR_IDS_BINARY_OPERATOR
|
Binary property IDS_Binary_Operator (new in Unicode 3.2). For programmatic determination of Ideographic Description Sequences. |
UCHAR_IDS_TRINARY_OPERATOR
|
Binary property IDS_Trinary_Operator (new in Unicode 3.2). For programmatic determination of Ideographic Description Sequences. |
UCHAR_ID_CONTINUE
|
Binary property ID_Continue. Characters that can continue an identifier. DerivedCoreProperties.txt also says "NOTE: Cf characters should be filtered out." ID_Start+Mn+Mc+Nd+Pc |
UCHAR_ID_START
|
Binary property ID_Start. Characters that can start an identifier. Lu+Ll+Lt+Lm+Lo+Nl |
UCHAR_INDIC_POSITIONAL_CATEGORY
|
Enumerated property Indic_Positional_Category. New in Unicode 6.0 as provisional property Indic_Matra_Category; renamed and changed to informative in Unicode 8.0. See http://www.unicode.org/reports/tr44/#IndicPositionalCategory.txt |
UCHAR_INDIC_SYLLABIC_CATEGORY
|
Enumerated property Indic_Syllabic_Category. New in Unicode 6.0 as provisional; informative since Unicode 8.0. See http://www.unicode.org/reports/tr44/#IndicSyllabicCategory.txt |
UCHAR_INT_LIMIT
|
One more than the last constant for enumerated/integer Unicode properties. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
UCHAR_INT_START
|
First constant for enumerated/integer Unicode properties. |
UCHAR_INVALID_CODE
|
Represents a nonexistent or invalid property or property value. |
UCHAR_ISO_COMMENT
|
Deprecated string property ISO_Comment. Corresponds to u_getISOComment. Deprecated. ICU 49 |
UCHAR_JOINING_GROUP
|
Enumerated property Joining_Group. Returns UJoiningGroup values. |
UCHAR_JOINING_TYPE
|
Enumerated property Joining_Type. Returns UJoiningType values. |
UCHAR_JOIN_CONTROL
|
Binary property Join_Control. Format controls for cursive joining and ligation. |
UCHAR_LEAD_CANONICAL_COMBINING_CLASS
|
Enumerated property Lead_Canonical_Combining_Class. ICU-specific property for the ccc of the first code point of the decomposition, or lccc(c)=ccc(NFD(c)[0]). Useful for checking for canonically ordered text; see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD . Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS. |
UCHAR_LINE_BREAK
|
Enumerated property Line_Break. Returns ULineBreak values. |
UCHAR_LOGICAL_ORDER_EXCEPTION
|
Binary property Logical_Order_Exception (new in Unicode 3.2). Characters that do not use logical order and require special handling in most processing. |
UCHAR_LOWERCASE
|
Binary property Lowercase. Same as u_isULowercase, different from u_islower. Ll+Other_Lowercase |
UCHAR_LOWERCASE_MAPPING
|
String property Lowercase_Mapping. Corresponds to u_strToLower in ustring.h. |
UCHAR_MASK_LIMIT
|
One more than the last constant for bit-mask Unicode properties. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
UCHAR_MASK_START
|
First constant for bit-mask Unicode properties. |
UCHAR_MATH
|
Binary property Math. Sm+Other_Math |
UCHAR_NAME
|
String property Name. Corresponds to u_charName. |
UCHAR_NFC_INERT
|
Binary property NFC_Inert. ICU-specific property for characters that are inert under NFC, i.e., they do not interact with adjacent characters. See the documentation for the Normalizer2 class and the Normalizer2::isInert() method. |
UCHAR_NFC_QUICK_CHECK
|
Enumerated property NFC_Quick_Check. Returns UNormalizationCheckResult values. |
UCHAR_NFD_INERT
|
Binary property NFD_Inert. ICU-specific property for characters that are inert under NFD, i.e., they do not interact with adjacent characters. See the documentation for the Normalizer2 class and the Normalizer2::isInert() method. |
UCHAR_NFD_QUICK_CHECK
|
Enumerated property NFD_Quick_Check. Returns UNormalizationCheckResult values. |
UCHAR_NFKC_INERT
|
Binary property NFKC_Inert. ICU-specific property for characters that are inert under NFKC, i.e., they do not interact with adjacent characters. See the documentation for the Normalizer2 class and the Normalizer2::isInert() method. |
UCHAR_NFKC_QUICK_CHECK
|
Enumerated property NFKC_Quick_Check. Returns UNormalizationCheckResult values. |
UCHAR_NFKD_INERT
|
Binary property NFKD_Inert. ICU-specific property for characters that are inert under NFKD, i.e., they do not interact with adjacent characters. See the documentation for the Normalizer2 class and the Normalizer2::isInert() method. |
UCHAR_NFKD_QUICK_CHECK
|
Enumerated property NFKD_Quick_Check. Returns UNormalizationCheckResult values. |
UCHAR_NONCHARACTER_CODE_POINT
|
Binary property Noncharacter_Code_Point. Code points that are explicitly defined as illegal for the encoding of characters. |
UCHAR_NUMERIC_TYPE
|
Enumerated property Numeric_Type. Returns UNumericType values. |
UCHAR_NUMERIC_VALUE
|
Double property Numeric_Value. Corresponds to u_getNumericValue. |
UCHAR_OTHER_PROPERTY_LIMIT
|
One more than the last constant for Unicode properties with unusual value types. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
UCHAR_OTHER_PROPERTY_START
|
First constant for Unicode properties with unusual value types. |
UCHAR_PATTERN_SYNTAX
|
Binary property Pattern_Syntax (new in Unicode 4.1). See UAX #31 Identifier and Pattern Syntax (http://www.unicode.org/reports/tr31/) |
UCHAR_PATTERN_WHITE_SPACE
|
Binary property Pattern_White_Space (new in Unicode 4.1). See UAX #31 Identifier and Pattern Syntax (http://www.unicode.org/reports/tr31/) |
UCHAR_POSIX_ALNUM
|
Binary property alnum (a C/POSIX character class). Implemented according to the UTS #18 Annex C Standard Recommendation. See the uchar.h file documentation. |
UCHAR_POSIX_BLANK
|
Binary property blank (a C/POSIX character class). Implemented according to the UTS #18 Annex C Standard Recommendation. See the uchar.h file documentation. |
UCHAR_POSIX_GRAPH
|
Binary property graph (a C/POSIX character class). Implemented according to the UTS #18 Annex C Standard Recommendation. See the uchar.h file documentation. |
UCHAR_POSIX_PRINT
|
Binary property print (a C/POSIX character class). Implemented according to the UTS #18 Annex C Standard Recommendation. See the uchar.h file documentation. |
UCHAR_POSIX_XDIGIT
|
Binary property xdigit (a C/POSIX character class). Implemented according to the UTS #18 Annex C Standard Recommendation. See the uchar.h file documentation. |
UCHAR_PREPENDED_CONCATENATION_MARK
|
Binary property Prepended_Concatenation_Mark. |
UCHAR_QUOTATION_MARK
|
Binary property Quotation_Mark. |
UCHAR_RADICAL
|
Binary property Radical (new in Unicode 3.2). For programmatic determination of Ideographic Description Sequences. |
UCHAR_REGIONAL_INDICATOR
|
Binary property Regional_Indicator. |
UCHAR_RGI_EMOJI
|
Binary property of strings RGI_Emoji. |
UCHAR_RGI_EMOJI_FLAG_SEQUENCE
|
Binary property of strings RGI_Emoji_Flag_Sequence. |
UCHAR_RGI_EMOJI_MODIFIER_SEQUENCE
|
Binary property of strings RGI_Emoji_Modifier_Sequence. |
UCHAR_RGI_EMOJI_TAG_SEQUENCE
|
Binary property of strings RGI_Emoji_Tag_Sequence. |
UCHAR_RGI_EMOJI_ZWJ_SEQUENCE
|
Binary property of strings RGI_Emoji_ZWJ_Sequence. |
UCHAR_SCRIPT
|
Enumerated property Script. Same as uscript_getScript, returns UScriptCode values. |
UCHAR_SCRIPT_EXTENSIONS
|
Miscellaneous property Script_Extensions (new in Unicode 6.0). Some characters are commonly used in multiple scripts. For more information, see UAX #24: http://www.unicode.org/reports/tr24/. Corresponds to uscript_hasScript and uscript_getScriptExtensions in uscript.h. |
UCHAR_SEGMENT_STARTER
|
Binary Property Segment_Starter. ICU-specific property for characters that are starters in terms of Unicode normalization and combining character sequences. They have ccc=0 and do not occur in non-initial position of the canonical decomposition of any character (like a-umlaut in NFD and a Jamo T in an NFD(Hangul LVT)). ICU uses this property for segmenting a string for generating a set of canonically equivalent strings, e.g. for canonical closure while processing collation tailoring rules. |
UCHAR_SENTENCE_BREAK
|
Enumerated property Sentence_Break (new in Unicode 4.1). Used in UAX #29: Text Boundaries (http://www.unicode.org/reports/tr29/) Returns USentenceBreak values. |
UCHAR_SIMPLE_CASE_FOLDING
|
String property Simple_Case_Folding. Corresponds to u_foldCase. |
UCHAR_SIMPLE_LOWERCASE_MAPPING
|
String property Simple_Lowercase_Mapping. Corresponds to u_tolower. |
UCHAR_SIMPLE_TITLECASE_MAPPING
|
String property Simple_Titlecase_Mapping. Corresponds to u_totitle. |
UCHAR_SIMPLE_UPPERCASE_MAPPING
|
String property Simple_Uppercase_Mapping. Corresponds to u_toupper. |
UCHAR_SOFT_DOTTED
|
Binary property Soft_Dotted (new in Unicode 3.2). Characters with a "soft dot", like i or j. An accent placed on these characters causes the dot to disappear. |
UCHAR_STRING_LIMIT
|
One more than the last constant for string Unicode properties. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
UCHAR_STRING_START
|
First constant for string Unicode properties. |
UCHAR_S_TERM
|
Binary property STerm (new in Unicode 4.0.1). Sentence Terminal. Used in UAX #29: Text Boundaries (http://www.unicode.org/reports/tr29/) |
UCHAR_TERMINAL_PUNCTUATION
|
Binary property Terminal_Punctuation. Punctuation characters that generally mark the end of textual units. |
UCHAR_TITLECASE_MAPPING
|
String property Titlecase_Mapping. Corresponds to u_strToTitle in ustring.h. |
UCHAR_TRAIL_CANONICAL_COMBINING_CLASS
|
Enumerated property Trail_Canonical_Combining_Class. ICU-specific property for the ccc of the last code point of the decomposition, or tccc(c)=ccc(NFD(c)[last]). Useful for checking for canonically ordered text; see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD . Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS. |
UCHAR_UNICODE_1_NAME
|
String property Unicode_1_Name. This property is of little practical value. Beginning with ICU 49, ICU APIs return an empty string for this property. Corresponds to u_charName(U_UNICODE_10_CHAR_NAME). Deprecated. ICU 49 |
UCHAR_UNIFIED_IDEOGRAPH
|
Binary property Unified_Ideograph (new in Unicode 3.2). For programmatic determination of Ideographic Description Sequences. |
UCHAR_UPPERCASE
|
Binary property Uppercase. Same as u_isUUppercase, different from u_isupper. Lu+Other_Uppercase |
UCHAR_UPPERCASE_MAPPING
|
String property Uppercase_Mapping. Corresponds to u_strToUpper in ustring.h. |
UCHAR_VARIATION_SELECTOR
|
Binary property Variation_Selector (new in Unicode 4.0.1). Indicates all those characters that qualify as Variation Selectors. For details on the behavior of these characters, see StandardizedVariants.html and 15.6 Variation Selectors. |
UCHAR_VERTICAL_ORIENTATION
|
Enumerated property Vertical_Orientation. Used for UAX #50 Unicode Vertical Text Layout (https://www.unicode.org/reports/tr50/). New as a UCD property in Unicode 10.0. |
UCHAR_WHITE_SPACE
|
Binary property White_Space. Same as u_isUWhiteSpace, different from u_isspace and u_isWhitespace. Space characters+TAB+CR+LF-ZWSP-ZWNBSP |
UCHAR_WORD_BREAK
|
Enumerated property Word_Break (new in Unicode 4.1). Used in UAX #29: Text Boundaries (http://www.unicode.org/reports/tr29/) Returns UWordBreakValues values. |
UCHAR_XID_CONTINUE
|
Binary property XID_Continue. ID_Continue modified to allow closure under normalization forms NFKC and NFKD. |
UCHAR_XID_START
|
Binary property XID_Start. ID_Start modified to allow closure under normalization forms NFKC and NFKD. |
UPropertyNameChoice
UPropertyNameChoice
Selector constants for u_getPropertyName() and u_getPropertyValueName().
These selectors are used to choose which name is returned for a given property or value. All properties and values have a long name. Most have a short name, but some do not. Unicode allows for additional names, beyond the long and short name, which would be indicated by U_LONG_PROPERTY_NAME + i, where i=1, 2,...
See also: u_getPropertyName() See also: u_getPropertyValueName()
Properties | |
---|---|
U_LONG_PROPERTY_NAME
|
|
U_PROPERTY_NAME_CHOICE_COUNT
|
One more than the highest normal UPropertyNameChoice value. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
U_SHORT_PROPERTY_NAME
|
UScriptCode
UScriptCode
Constants for ISO 15924 script codes.
The current set of script code constants supports at least all scripts that are encoded in the version of Unicode which ICU currently supports. The names of the constants are usually derived from the Unicode script property value aliases. See UAX #24 Unicode Script Property (http://www.unicode.org/reports/tr24/) and http://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt .
In addition, constants for many ISO 15924 script codes are included, for use with language tags, CLDR data, and similar. Some of those codes are not used in the Unicode Character Database (UCD). For example, there are no characters that have a UCD script property value of Hans or Hant. All Han ideographs have the Hani script property value in Unicode.
Private-use codes Qaaa..Qabx are not included, except as used in the UCD or in CLDR.
Starting with ICU 55, script codes are only added when their scripts have been or will certainly be encoded in Unicode, and have been assigned Unicode script property value aliases, to ensure that their script names are stable and match the names of the constants. Script codes like Latf and Aran that are not subject to separate encoding may be added at any time.
Properties | |
---|---|
USCRIPT_ADLAM
|
|
USCRIPT_AFAKA
|
|
USCRIPT_AHOM
|
|
USCRIPT_ANATOLIAN_HIEROGLYPHS
|
|
USCRIPT_ARABIC
|
|
USCRIPT_ARMENIAN
|
|
USCRIPT_AVESTAN
|
|
USCRIPT_BALINESE
|
|
USCRIPT_BAMUM
|
|
USCRIPT_BASSA_VAH
|
|
USCRIPT_BATAK
|
|
USCRIPT_BENGALI
|
|
USCRIPT_BHAIKSUKI
|
|
USCRIPT_BLISSYMBOLS
|
|
USCRIPT_BOOK_PAHLAVI
|
|
USCRIPT_BOPOMOFO
|
|
USCRIPT_BRAHMI
|
|
USCRIPT_BRAILLE
|
|
USCRIPT_BUGINESE
|
|
USCRIPT_BUHID
|
|
USCRIPT_CANADIAN_ABORIGINAL
|
Canadian_Aboriginal script. |
USCRIPT_CARIAN
|
|
USCRIPT_CAUCASIAN_ALBANIAN
|
|
USCRIPT_CHAKMA
|
|
USCRIPT_CHAM
|
|
USCRIPT_CHEROKEE
|
|
USCRIPT_CHORASMIAN
|
|
USCRIPT_CIRTH
|
|
USCRIPT_CODE_LIMIT
|
One more than the highest normal UScriptCode value. The highest value is available via u_getIntPropertyMaxValue(UCHAR_SCRIPT). Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
USCRIPT_COMMON
|
|
USCRIPT_COPTIC
|
|
USCRIPT_CUNEIFORM
|
|
USCRIPT_CYPRIOT
|
|
USCRIPT_CYPRO_MINOAN
|
|
USCRIPT_CYRILLIC
|
|
USCRIPT_DEMOTIC_EGYPTIAN
|
|
USCRIPT_DESERET
|
|
USCRIPT_DEVANAGARI
|
|
USCRIPT_DIVES_AKURU
|
|
USCRIPT_DOGRA
|
|
USCRIPT_DUPLOYAN
|
|
USCRIPT_DUPLOYAN_SHORTAND
|
Deprecated. ICU 54 Typo, use USCRIPT_DUPLOYAN |
USCRIPT_EASTERN_SYRIAC
|
|
USCRIPT_EGYPTIAN_HIEROGLYPHS
|
|
USCRIPT_ELBASAN
|
|
USCRIPT_ELYMAIC
|
|
USCRIPT_ESTRANGELO_SYRIAC
|
|
USCRIPT_ETHIOPIC
|
|
USCRIPT_GEORGIAN
|
|
USCRIPT_GLAGOLITIC
|
|
USCRIPT_GOTHIC
|
|
USCRIPT_GRANTHA
|
|
USCRIPT_GREEK
|
|
USCRIPT_GUJARATI
|
|
USCRIPT_GUNJALA_GONDI
|
|
USCRIPT_GURMUKHI
|
|
USCRIPT_HAN
|
|
USCRIPT_HANGUL
|
|
USCRIPT_HANIFI_ROHINGYA
|
|
USCRIPT_HANUNOO
|
|
USCRIPT_HAN_WITH_BOPOMOFO
|
|
USCRIPT_HARAPPAN_INDUS
|
|
USCRIPT_HATRAN
|
|
USCRIPT_HEBREW
|
|
USCRIPT_HIERATIC_EGYPTIAN
|
|
USCRIPT_HIRAGANA
|
|
USCRIPT_IMPERIAL_ARAMAIC
|
|
USCRIPT_INHERITED
|
|
USCRIPT_INSCRIPTIONAL_PAHLAVI
|
|
USCRIPT_INSCRIPTIONAL_PARTHIAN
|
|
USCRIPT_INVALID_CODE
|
|
USCRIPT_JAMO
|
|
USCRIPT_JAPANESE
|
|
USCRIPT_JAVANESE
|
|
USCRIPT_JURCHEN
|
|
USCRIPT_KAITHI
|
|
USCRIPT_KANNADA
|
|
USCRIPT_KATAKANA
|
|
USCRIPT_KATAKANA_OR_HIRAGANA
|
New script code in Unicode 4.0.1. |
USCRIPT_KAWI
|
|
USCRIPT_KAYAH_LI
|
|
USCRIPT_KHAROSHTHI
|
|
USCRIPT_KHITAN_SMALL_SCRIPT
|
|
USCRIPT_KHMER
|
|
USCRIPT_KHOJKI
|
|
USCRIPT_KHUDAWADI
|
|
USCRIPT_KHUTSURI
|
|
USCRIPT_KOREAN
|
|
USCRIPT_KPELLE
|
|
USCRIPT_LANNA
|
|
USCRIPT_LAO
|
|
USCRIPT_LATIN
|
|
USCRIPT_LATIN_FRAKTUR
|
|
USCRIPT_LATIN_GAELIC
|
|
USCRIPT_LEPCHA
|
|
USCRIPT_LIMBU
|
|
USCRIPT_LINEAR_A
|
|
USCRIPT_LINEAR_B
|
|
USCRIPT_LISU
|
|
USCRIPT_LOMA
|
|
USCRIPT_LYCIAN
|
|
USCRIPT_LYDIAN
|
|
USCRIPT_MAHAJANI
|
|
USCRIPT_MAKASAR
|
|
USCRIPT_MALAYALAM
|
|
USCRIPT_MANDAEAN
|
|
USCRIPT_MANDAIC
|
|
USCRIPT_MANICHAEAN
|
|
USCRIPT_MARCHEN
|
|
USCRIPT_MASARAM_GONDI
|
|
USCRIPT_MATHEMATICAL_NOTATION
|
|
USCRIPT_MAYAN_HIEROGLYPHS
|
|
USCRIPT_MEDEFAIDRIN
|
|
USCRIPT_MEITEI_MAYEK
|
|
USCRIPT_MENDE
|
Mende Kikakui. |
USCRIPT_MEROITIC
|
|
USCRIPT_MEROITIC_CURSIVE
|
|
USCRIPT_MEROITIC_HIEROGLYPHS
|
|
USCRIPT_MIAO
|
|
USCRIPT_MODI
|
|
USCRIPT_MONGOLIAN
|
|
USCRIPT_MOON
|
|
USCRIPT_MRO
|
|
USCRIPT_MULTANI
|
|
USCRIPT_MYANMAR
|
|
USCRIPT_NABATAEAN
|
|
USCRIPT_NAG_MUNDARI
|
|
USCRIPT_NAKHI_GEBA
|
|
USCRIPT_NANDINAGARI
|
|
USCRIPT_NEWA
|
|
USCRIPT_NEW_TAI_LUE
|
|
USCRIPT_NKO
|
|
USCRIPT_NUSHU
|
|
USCRIPT_NYIAKENG_PUACHUE_HMONG
|
|
USCRIPT_OGHAM
|
|
USCRIPT_OLD_CHURCH_SLAVONIC_CYRILLIC
|
|
USCRIPT_OLD_HUNGARIAN
|
|
USCRIPT_OLD_ITALIC
|
|
USCRIPT_OLD_NORTH_ARABIAN
|
|
USCRIPT_OLD_PERMIC
|
|
USCRIPT_OLD_PERSIAN
|
|
USCRIPT_OLD_SOGDIAN
|
|
USCRIPT_OLD_SOUTH_ARABIAN
|
|
USCRIPT_OLD_UYGHUR
|
|
USCRIPT_OL_CHIKI
|
|
USCRIPT_ORIYA
|
|
USCRIPT_ORKHON
|
|
USCRIPT_OSAGE
|
|
USCRIPT_OSMANYA
|
|
USCRIPT_PAHAWH_HMONG
|
|
USCRIPT_PALMYRENE
|
|
USCRIPT_PAU_CIN_HAU
|
|
USCRIPT_PHAGS_PA
|
|
USCRIPT_PHOENICIAN
|
|
USCRIPT_PHONETIC_POLLARD
|
|
USCRIPT_PSALTER_PAHLAVI
|
|
USCRIPT_REJANG
|
|
USCRIPT_RONGORONGO
|
|
USCRIPT_RUNIC
|
|
USCRIPT_SAMARITAN
|
|
USCRIPT_SARATI
|
|
USCRIPT_SAURASHTRA
|
|
USCRIPT_SHARADA
|
|
USCRIPT_SHAVIAN
|
|
USCRIPT_SIDDHAM
|
|
USCRIPT_SIGN_WRITING
|
Sutton SignWriting. |
USCRIPT_SIMPLIFIED_HAN
|
|
USCRIPT_SINDHI
|
|
USCRIPT_SINHALA
|
|
USCRIPT_SOGDIAN
|
|
USCRIPT_SORA_SOMPENG
|
|
USCRIPT_SOYOMBO
|
|
USCRIPT_SUNDANESE
|
|
USCRIPT_SYLOTI_NAGRI
|
|
USCRIPT_SYMBOLS
|
|
USCRIPT_SYMBOLS_EMOJI
|
|
USCRIPT_SYRIAC
|
|
USCRIPT_TAGALOG
|
|
USCRIPT_TAGBANWA
|
|
USCRIPT_TAI_LE
|
|
USCRIPT_TAI_VIET
|
|
USCRIPT_TAKRI
|
|
USCRIPT_TAMIL
|
|
USCRIPT_TANGSA
|
|
USCRIPT_TANGUT
|
|
USCRIPT_TELUGU
|
|
USCRIPT_TENGWAR
|
|
USCRIPT_THAANA
|
|
USCRIPT_THAI
|
|
USCRIPT_TIBETAN
|
|
USCRIPT_TIFINAGH
|
|
USCRIPT_TIRHUTA
|
|
USCRIPT_TOTO
|
|
USCRIPT_TRADITIONAL_HAN
|
|
USCRIPT_UCAS
|
Canadian_Aboriginal script (alias). |
USCRIPT_UGARITIC
|
|
USCRIPT_UNKNOWN
|
|
USCRIPT_UNWRITTEN_LANGUAGES
|
|
USCRIPT_VAI
|
|
USCRIPT_VISIBLE_SPEECH
|
|
USCRIPT_VITHKUQI
|
|
USCRIPT_WANCHO
|
|
USCRIPT_WARANG_CITI
|
|
USCRIPT_WESTERN_SYRIAC
|
|
USCRIPT_WOLEAI
|
|
USCRIPT_YEZIDI
|
|
USCRIPT_YI
|
|
USCRIPT_ZANABAZAR_SQUARE
|
UScriptUsage
UScriptUsage
Script usage constants.
See UAX #31 Unicode Identifier and Pattern Syntax. http://www.unicode.org/reports/tr31/#Table_Candidate_Characters_for_Exclusion_from_Identifiers
USentenceBreak
USentenceBreak
Sentence Break constants.
See also: UCHAR_SENTENCE_BREAK
Properties | |
---|---|
U_SB_ATERM
|
|
U_SB_CLOSE
|
|
U_SB_COUNT
|
One more than the highest normal USentenceBreak value. The highest value is available via u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK). Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
U_SB_CR
|
|
U_SB_EXTEND
|
|
U_SB_FORMAT
|
|
U_SB_LF
|
|
U_SB_LOWER
|
|
U_SB_NUMERIC
|
|
U_SB_OLETTER
|
|
U_SB_OTHER
|
|
U_SB_SCONTINUE
|
|
U_SB_SEP
|
|
U_SB_SP
|
|
U_SB_STERM
|
|
U_SB_UPPER
|
USentenceBreakTag
USentenceBreakTag
Enum constants for the sentence break tags returned by getRuleStatus().
A range of values is defined for each category of sentence, to allow for further subdivisions of a category in future releases. Applications should check for tag values falling within the range, rather than for single individual values.
The numeric values of all of these constants are stable (will not change).
Properties | |
---|---|
UBRK_SENTENCE_SEP
|
Tag value for for sentences that do not contain an ending sen |