C API: Unicode Script Information. More...
#include "unicode/utypes.h"
Go to the source code of this file.
Enumerations | |
enum | UScriptCode { USCRIPT_INVALID_CODE = -1, USCRIPT_COMMON = 0, USCRIPT_INHERITED = 1, USCRIPT_ARABIC = 2, USCRIPT_ARMENIAN = 3, USCRIPT_BENGALI = 4, USCRIPT_BOPOMOFO = 5, USCRIPT_CHEROKEE = 6, USCRIPT_COPTIC = 7, USCRIPT_CYRILLIC = 8, USCRIPT_DESERET = 9, USCRIPT_DEVANAGARI = 10, USCRIPT_ETHIOPIC = 11, USCRIPT_GEORGIAN = 12, USCRIPT_GOTHIC = 13, USCRIPT_GREEK = 14, USCRIPT_GUJARATI = 15, USCRIPT_GURMUKHI = 16, USCRIPT_HAN = 17, USCRIPT_HANGUL = 18, USCRIPT_HEBREW = 19, USCRIPT_HIRAGANA = 20, USCRIPT_KANNADA = 21, USCRIPT_KATAKANA = 22, USCRIPT_KHMER = 23, USCRIPT_LAO = 24, USCRIPT_LATIN = 25, USCRIPT_MALAYALAM = 26, USCRIPT_MONGOLIAN = 27, USCRIPT_MYANMAR = 28, USCRIPT_OGHAM = 29, USCRIPT_OLD_ITALIC = 30, USCRIPT_ORIYA = 31, USCRIPT_RUNIC = 32, USCRIPT_SINHALA = 33, USCRIPT_SYRIAC = 34, USCRIPT_TAMIL = 35, USCRIPT_TELUGU = 36, USCRIPT_THAANA = 37, USCRIPT_THAI = 38, USCRIPT_TIBETAN = 39, USCRIPT_CANADIAN_ABORIGINAL = 40, USCRIPT_UCAS = USCRIPT_CANADIAN_ABORIGINAL, USCRIPT_YI = 41, USCRIPT_TAGALOG = 42, USCRIPT_HANUNOO = 43, USCRIPT_BUHID = 44, USCRIPT_TAGBANWA = 45, USCRIPT_BRAILLE = 46, USCRIPT_CYPRIOT = 47, USCRIPT_LIMBU = 48, USCRIPT_LINEAR_B = 49, USCRIPT_OSMANYA = 50, USCRIPT_SHAVIAN = 51, USCRIPT_TAI_LE = 52, USCRIPT_UGARITIC = 53, USCRIPT_KATAKANA_OR_HIRAGANA = 54, USCRIPT_BUGINESE = 55, USCRIPT_GLAGOLITIC = 56, USCRIPT_KHAROSHTHI = 57, USCRIPT_SYLOTI_NAGRI = 58, USCRIPT_NEW_TAI_LUE = 59, USCRIPT_TIFINAGH = 60, USCRIPT_OLD_PERSIAN = 61, USCRIPT_BALINESE = 62, USCRIPT_BATAK = 63, USCRIPT_BLISSYMBOLS = 64, USCRIPT_BRAHMI = 65, USCRIPT_CHAM = 66, USCRIPT_CIRTH = 67, USCRIPT_OLD_CHURCH_SLAVONIC_CYRILLIC = 68, USCRIPT_DEMOTIC_EGYPTIAN = 69, USCRIPT_HIERATIC_EGYPTIAN = 70, USCRIPT_EGYPTIAN_HIEROGLYPHS = 71, USCRIPT_KHUTSURI = 72, USCRIPT_SIMPLIFIED_HAN = 73, USCRIPT_TRADITIONAL_HAN = 74, USCRIPT_PAHAWH_HMONG = 75, USCRIPT_OLD_HUNGARIAN = 76, USCRIPT_HARAPPAN_INDUS = 77, USCRIPT_JAVANESE = 78, USCRIPT_KAYAH_LI = 79, USCRIPT_LATIN_FRAKTUR = 80, USCRIPT_LATIN_GAELIC = 81, USCRIPT_LEPCHA = 82, USCRIPT_LINEAR_A = 83, USCRIPT_MANDAEAN = 84, USCRIPT_MAYAN_HIEROGLYPHS = 85, USCRIPT_MEROITIC = 86, USCRIPT_NKO = 87, USCRIPT_ORKHON = 88, USCRIPT_OLD_PERMIC = 89, USCRIPT_PHAGS_PA = 90, USCRIPT_PHOENICIAN = 91, USCRIPT_PHONETIC_POLLARD = 92, USCRIPT_RONGORONGO = 93, USCRIPT_SARATI = 94, USCRIPT_ESTRANGELO_SYRIAC = 95, USCRIPT_WESTERN_SYRIAC = 96, USCRIPT_EASTERN_SYRIAC = 97, USCRIPT_TENGWAR = 98, USCRIPT_VAI = 99, USCRIPT_VISIBLE_SPEECH = 100, USCRIPT_CUNEIFORM = 101, USCRIPT_UNWRITTEN_LANGUAGES = 102, USCRIPT_UNKNOWN = 103, USCRIPT_CARIAN = 104, USCRIPT_JAPANESE = 105, USCRIPT_LANNA = 106, USCRIPT_LYCIAN = 107, USCRIPT_LYDIAN = 108, USCRIPT_OL_CHIKI = 109, USCRIPT_REJANG = 110, USCRIPT_SAURASHTRA = 111, USCRIPT_SIGN_WRITING = 112, USCRIPT_SUNDANESE = 113, USCRIPT_MOON = 114, USCRIPT_MEITEI_MAYEK = 115, USCRIPT_IMPERIAL_ARAMAIC = 116, USCRIPT_AVESTAN = 117, USCRIPT_CHAKMA = 118, USCRIPT_KOREAN = 119, USCRIPT_KAITHI = 120, USCRIPT_MANICHAEAN = 121, USCRIPT_INSCRIPTIONAL_PAHLAVI = 122, USCRIPT_PSALTER_PAHLAVI = 123, USCRIPT_BOOK_PAHLAVI = 124, USCRIPT_INSCRIPTIONAL_PARTHIAN = 125, USCRIPT_SAMARITAN = 126, USCRIPT_TAI_VIET = 127, USCRIPT_MATHEMATICAL_NOTATION = 128, USCRIPT_SYMBOLS = 129, USCRIPT_CODE_LIMIT = 130 } |
Constants for ISO 15924 script codes. More... | |
Functions | |
int32_t | uscript_getCode (const char *nameOrAbbrOrLocale, UScriptCode *fillIn, int32_t capacity, UErrorCode *err) |
Gets script codes associated with the given locale or ISO 15924 abbreviation or name. | |
const char * | uscript_getName (UScriptCode scriptCode) |
Gets a script name associated with the given script code. | |
const char * | uscript_getShortName (UScriptCode scriptCode) |
Gets a script name associated with the given script code. | |
UScriptCode | uscript_getScript (UChar32 codepoint, UErrorCode *err) |
Gets the script code associated with the given codepoint. |
C API: Unicode Script Information.
Definition in file uscript.h.
enum UScriptCode |
Constants for ISO 15924 script codes.
Many of these script codes - those from Unicode's ScriptNames.txt - are character property values for Unicode's Script property. See UAX #24 Script Names (http://www.unicode.org/reports/tr24/).
Starting with ICU 3.6, constants for most ISO 15924 script codes are included (currently excluding private-use codes Qaaa..Qabx). For scripts for which there are codes in ISO 15924 but which are not used in the Unicode Character Database (UCD), there are no Unicode characters associated with those scripts.
For example, there are no characters that have a UCD script code of Hans or Hant. All Han ideographs have the Hani script code. The Hans and Hant script codes are used with CLDR data.
ISO 15924 script codes are included for use with CLDR and similar.
int32_t uscript_getCode | ( | const char * | nameOrAbbrOrLocale, | |
UScriptCode * | fillIn, | |||
int32_t | capacity, | |||
UErrorCode * | err | |||
) |
Gets script codes associated with the given locale or ISO 15924 abbreviation or name.
Fills in USCRIPT_MALAYALAM given "Malayam" OR "Mlym". Fills in USCRIPT_LATIN given "en" OR "en_US" If required capacity is greater than capacity of the destination buffer then the error code is set to U_BUFFER_OVERFLOW_ERROR and the required capacity is returned
Note: To search by short or long script alias only, use u_getPropertyValueEnum(UCHAR_SCRIPT, alias) instead. This does a fast lookup with no access of the locale data.
nameOrAbbrOrLocale | name of the script, as given in PropertyValueAliases.txt, or ISO 15924 code or locale | |
fillIn | the UScriptCode buffer to fill in the script code | |
capacity | the capacity (size) fo UScriptCode buffer passed in. | |
err | the error status code. |
const char* uscript_getName | ( | UScriptCode | scriptCode | ) |
Gets a script name associated with the given script code.
Returns "Malayam" given USCRIPT_MALAYALAM
scriptCode | UScriptCode enum |
UScriptCode uscript_getScript | ( | UChar32 | codepoint, | |
UErrorCode * | err | |||
) |
Gets the script code associated with the given codepoint.
Returns USCRIPT_MALAYALAM given 0x0D02
codepoint | UChar32 codepoint | |
err | the error status code. |
const char* uscript_getShortName | ( | UScriptCode | scriptCode | ) |
Gets a script name associated with the given script code.
Returns "Mlym" given USCRIPT_MALAYALAM
scriptCode | UScriptCode enum |