Software: Apache/2.0.54 (Fedora). PHP/5.0.4 uname -a: Linux mina-info.me 2.6.17-1.2142_FC4smp #1 SMP Tue Jul 11 22:57:02 EDT 2006 i686 uid=48(apache) gid=48(apache) groups=48(apache) Safe-mode: OFF (not secure) /usr/share/gtk-doc/html/glib/ drwxr-xr-x |
Viewing file: Select action/file-type:
Synopsis#include <glib.h> typedef gunichar; typedef gunichar2; gboolean g_unichar_validate (gunichar ch); gboolean g_unichar_isalnum (gunichar c); gboolean g_unichar_isalpha (gunichar c); gboolean g_unichar_iscntrl (gunichar c); gboolean g_unichar_isdigit (gunichar c); gboolean g_unichar_isgraph (gunichar c); gboolean g_unichar_islower (gunichar c); gboolean g_unichar_isprint (gunichar c); gboolean g_unichar_ispunct (gunichar c); gboolean g_unichar_isspace (gunichar c); gboolean g_unichar_isupper (gunichar c); gboolean g_unichar_isxdigit (gunichar c); gboolean g_unichar_istitle (gunichar c); gboolean g_unichar_isdefined (gunichar c); gboolean g_unichar_iswide (gunichar c); gunichar g_unichar_toupper (gunichar c); gunichar g_unichar_tolower (gunichar c); gunichar g_unichar_totitle (gunichar c); gint g_unichar_digit_value (gunichar c); gint g_unichar_xdigit_value (gunichar c); enum GUnicodeType; GUnicodeType g_unichar_type (gunichar c); enum GUnicodeBreakType; GUnicodeBreakType g_unichar_break_type (gunichar c); void g_unicode_canonical_ordering (gunichar *string, gsize len); gunichar* g_unicode_canonical_decomposition (gunichar ch, gsize *result_len); gboolean g_unichar_get_mirror_char (gunichar ch, gunichar *mirrored_ch); #define g_utf8_next_char (p) gunichar g_utf8_get_char (const gchar *p); gunichar g_utf8_get_char_validated (const gchar *p, gssize max_len); gchar* g_utf8_offset_to_pointer (const gchar *str, glong offset); glong g_utf8_pointer_to_offset (const gchar *str, const gchar *pos); gchar* g_utf8_prev_char (const gchar *p); gchar* g_utf8_find_next_char (const gchar *p, const gchar *end); gchar* g_utf8_find_prev_char (const gchar *str, const gchar *p); glong g_utf8_strlen (const gchar *p, gssize max); gchar* g_utf8_strncpy (gchar *dest, const gchar *src, gsize n); gchar* g_utf8_strchr (const gchar *p, gssize len, gunichar c); gchar* g_utf8_strrchr (const gchar *p, gssize len, gunichar c); gchar* g_utf8_strreverse (const gchar *str, gssize len); gboolean g_utf8_validate (const gchar *str, gssize max_len, const gchar **end); gchar* g_utf8_strup (const gchar *str, gssize len); gchar* g_utf8_strdown (const gchar *str, gssize len); gchar* g_utf8_casefold (const gchar *str, gssize len); gchar* g_utf8_normalize (const gchar *str, gssize len, GNormalizeMode mode); enum GNormalizeMode; gint g_utf8_collate (const gchar *str1, const gchar *str2); gchar* g_utf8_collate_key (const gchar *str, gssize len); gunichar2* g_utf8_to_utf16 (const gchar *str, glong len, glong *items_read, glong *items_written, GError **error); gunichar* g_utf8_to_ucs4 (const gchar *str, glong len, glong *items_read, glong *items_written, GError **error); gunichar* g_utf8_to_ucs4_fast (const gchar *str, glong len, glong *items_written); gunichar* g_utf16_to_ucs4 (const gunichar2 *str, glong len, glong *items_read, glong *items_written, GError **error); gchar* g_utf16_to_utf8 (const gunichar2 *str, glong len, glong *items_read, glong *items_written, GError **error); gunichar2* g_ucs4_to_utf16 (const gunichar *str, glong len, glong *items_read, glong *items_written, GError **error); gchar* g_ucs4_to_utf8 (const gunichar *str, glong len, glong *items_read, glong *items_written, GError **error); gint g_unichar_to_utf8 (gunichar c, gchar *outbuf); Description
This section describes a number of functions for dealing with
Unicode characters and strings. There are analogues of the
traditional Detailsg_unichar_validate ()gboolean g_unichar_validate (gunichar ch);
Checks whether
g_unichar_isalnum ()gboolean g_unichar_isalnum (gunichar c);
Determines whether a character is alphanumeric.
Given some UTF-8 text, obtain a character value
with
g_unichar_isalpha ()gboolean g_unichar_isalpha (gunichar c);
Determines whether a character is alphabetic (i.e. a letter).
Given some UTF-8 text, obtain a character value with
g_unichar_iscntrl ()gboolean g_unichar_iscntrl (gunichar c);
Determines whether a character is a control character.
Given some UTF-8 text, obtain a character value with
g_unichar_isdigit ()gboolean g_unichar_isdigit (gunichar c);
Determines whether a character is numeric (i.e. a digit). This
covers ASCII 0-9 and also digits in other languages/scripts. Given
some UTF-8 text, obtain a character value with
g_unichar_isgraph ()gboolean g_unichar_isgraph (gunichar c);
Determines whether a character is printable and not a space
(returns
g_unichar_islower ()gboolean g_unichar_islower (gunichar c);
Determines whether a character is a lowercase letter.
Given some UTF-8 text, obtain a character value with
g_unichar_isprint ()gboolean g_unichar_isprint (gunichar c);
Determines whether a character is printable.
Unlike
g_unichar_ispunct ()gboolean g_unichar_ispunct (gunichar c);
Determines whether a character is punctuation or a symbol.
Given some UTF-8 text, obtain a character value with
g_unichar_isspace ()gboolean g_unichar_isspace (gunichar c);
Determines whether a character is a space, tab, or line separator
(newline, carriage return, etc.). Given some UTF-8 text, obtain a
character value with (Note: don't use this to do word breaking; you have to use Pango or equivalent to get word breaking right, the algorithm is fairly complex.)
g_unichar_isupper ()gboolean g_unichar_isupper (gunichar c); Determines if a character is uppercase.
g_unichar_isxdigit ()gboolean g_unichar_isxdigit (gunichar c); Determines if a character is a hexidecimal digit.
g_unichar_istitle ()gboolean g_unichar_istitle (gunichar c); Determines if a character is titlecase. Some characters in Unicode which are composites, such as the DZ digraph have three case variants instead of just two. The titlecase form is used at the beginning of a word where only the first letter is capitalized. The titlecase form of the DZ digraph is U+01F2 LATIN CAPITAL LETTTER D WITH SMALL LETTER Z.
g_unichar_isdefined ()gboolean g_unichar_isdefined (gunichar c); Determines if a given character is assigned in the Unicode standard.
g_unichar_iswide ()gboolean g_unichar_iswide (gunichar c); Determines if a character is typically rendered in a double-width cell.
g_unichar_toupper ()gunichar g_unichar_toupper (gunichar c); Converts a character to uppercase.
g_unichar_tolower ()gunichar g_unichar_tolower (gunichar c); Converts a character to lower case.
g_unichar_totitle ()gunichar g_unichar_totitle (gunichar c); Converts a character to the titlecase.
g_unichar_digit_value ()gint g_unichar_digit_value (gunichar c); Determines the numeric value of a character as a decimal digit.
g_unichar_xdigit_value ()gint g_unichar_xdigit_value (gunichar c); Determines the numeric value of a character as a hexidecimal digit.
enum GUnicodeTypetypedef enum { G_UNICODE_CONTROL, G_UNICODE_FORMAT, G_UNICODE_UNASSIGNED, G_UNICODE_PRIVATE_USE, G_UNICODE_SURROGATE, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_COMBINING_MARK, G_UNICODE_ENCLOSING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_CONNECT_PUNCTUATION, G_UNICODE_DASH_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_FINAL_PUNCTUATION, G_UNICODE_INITIAL_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_LINE_SEPARATOR, G_UNICODE_PARAGRAPH_SEPARATOR, G_UNICODE_SPACE_SEPARATOR } GUnicodeType; These are the possible character classifications. See http://www.unicode.org/Public/UNIDATA/UnicodeData.html. g_unichar_type ()GUnicodeType g_unichar_type (gunichar c); Classifies a Unicode character by type.
enum GUnicodeBreakTypetypedef enum { G_UNICODE_BREAK_MANDATORY, G_UNICODE_BREAK_CARRIAGE_RETURN, G_UNICODE_BREAK_LINE_FEED, G_UNICODE_BREAK_COMBINING_MARK, G_UNICODE_BREAK_SURROGATE, G_UNICODE_BREAK_ZERO_WIDTH_SPACE, G_UNICODE_BREAK_INSEPARABLE, G_UNICODE_BREAK_NON_BREAKING_GLUE, G_UNICODE_BREAK_CONTINGENT, G_UNICODE_BREAK_SPACE, G_UNICODE_BREAK_AFTER, G_UNICODE_BREAK_BEFORE, G_UNICODE_BREAK_BEFORE_AND_AFTER, G_UNICODE_BREAK_HYPHEN, G_UNICODE_BREAK_NON_STARTER, G_UNICODE_BREAK_OPEN_PUNCTUATION, G_UNICODE_BREAK_CLOSE_PUNCTUATION, G_UNICODE_BREAK_QUOTATION, G_UNICODE_BREAK_EXCLAMATION, G_UNICODE_BREAK_IDEOGRAPHIC, G_UNICODE_BREAK_NUMERIC, G_UNICODE_BREAK_INFIX_SEPARATOR, G_UNICODE_BREAK_SYMBOL, G_UNICODE_BREAK_ALPHABETIC, G_UNICODE_BREAK_PREFIX, G_UNICODE_BREAK_POSTFIX, G_UNICODE_BREAK_COMPLEX_CONTEXT, G_UNICODE_BREAK_AMBIGUOUS, G_UNICODE_BREAK_UNKNOWN, G_UNICODE_BREAK_NEXT_LINE, G_UNICODE_BREAK_WORD_JOINER } GUnicodeBreakType; These are the possible line break classifications. See http://www.unicode.org/unicode/reports/tr14/. g_unichar_break_type ()GUnicodeBreakType g_unichar_break_type (gunichar c);
Determines the break type of
g_unicode_canonical_ordering ()void g_unicode_canonical_ordering (gunichar *string, gsize len); Computes the canonical ordering of a string in-place. This rearranges decomposed characters in the string according to their combining classes. See the Unicode manual for more information.
g_unicode_canonical_decomposition ()gunichar* g_unicode_canonical_decomposition (gunichar ch, gsize *result_len); Computes the canonical decomposition of a Unicode character.
g_unichar_get_mirror_char ()gboolean g_unichar_get_mirror_char (gunichar ch, gunichar *mirrored_ch); In Unicode, some characters are mirrored. This means that their images are mirrored horizontally in text that is laid out from right to left. For instance, "(" would become its mirror image, ")", in right-to-left text.
If
Since 2.4 g_utf8_next_char()#define g_utf8_next_char(p)
Skips to the next character in a UTF-8 string. The string must be
valid; this macro is as fast as possible, and has no error-checking.
You would use this macro to iterate over a string character by
character. The macro returns the start of the next UTF-8 character.
Before using this macro, use
g_utf8_get_char ()gunichar g_utf8_get_char (const gchar *p);
Converts a sequence of bytes encoded as UTF-8 to a Unicode character.
If
g_utf8_get_char_validated ()gunichar g_utf8_get_char_validated (const gchar *p, gssize max_len); Convert a sequence of bytes encoded as UTF-8 to a Unicode character. This function checks for incomplete characters, for invalid characters such as characters that are out of the range of Unicode, and for overlong encodings of valid characters.
g_utf8_offset_to_pointer ()gchar* g_utf8_offset_to_pointer (const gchar *str, glong offset); Converts from an integer character offset to a pointer to a position within the string.
g_utf8_pointer_to_offset ()glong g_utf8_pointer_to_offset (const gchar *str, const gchar *pos); Converts from a pointer to position within a string to a integer character offset.
g_utf8_prev_char ()gchar* g_utf8_prev_char (const gchar *p);
Finds the previous UTF-8 character in the string before
g_utf8_find_next_char ()gchar* g_utf8_find_next_char (const gchar *p, const gchar *end);
Finds the start of the next UTF-8 character in the string after
g_utf8_find_prev_char ()gchar* g_utf8_find_prev_char (const gchar *str, const gchar *p);
Given a position
g_utf8_strlen ()glong g_utf8_strlen (const gchar *p, gssize max); Returns the length of the string in characters.
g_utf8_strncpy ()gchar* g_utf8_strncpy (gchar *dest, const gchar *src, gsize n);
Like the standard C
g_utf8_strchr ()gchar* g_utf8_strchr (const gchar *p, gssize len, gunichar c);
Finds the leftmost occurrence of the given ISO10646 character
in a UTF-8 encoded string, while limiting the search to
g_utf8_strrchr ()gchar* g_utf8_strrchr (const gchar *p, gssize len, gunichar c);
Find the rightmost occurrence of the given ISO10646 character
in a UTF-8 encoded string, while limiting the search to
g_utf8_strreverse ()gchar* g_utf8_strreverse (const gchar *str, gssize len);
Reverses a UTF-8 string.
Note that unlike
Since 2.2 g_utf8_validate ()gboolean g_utf8_validate (const gchar *str, gssize max_len, const gchar **end);
Validates UTF-8 encoded text.
Note that
Returns
g_utf8_strup ()gchar* g_utf8_strup (const gchar *str, gssize len); Converts all Unicode characters in the string that have a case to uppercase. The exact manner that this is done depends on the current locale, and may result in the number of characters in the string increasing. (For instance, the German ess-zet will be changed to SS.)
g_utf8_strdown ()gchar* g_utf8_strdown (const gchar *str, gssize len); Converts all Unicode characters in the string that have a case to lowercase. The exact manner that this is done depends on the current locale, and may result in the number of characters in the string changing.
g_utf8_casefold ()gchar* g_utf8_casefold (const gchar *str, gssize len);
Converts a string into a form that is independent of case. The
result will not correspond to any particular case, but can be
compared for equality or ordered with the results of calling
Note that calling
g_utf8_normalize ()gchar* g_utf8_normalize (const gchar *str, gssize len, GNormalizeMode mode);
Converts a string into canonical form, standardizing
such issues as whether a character with an accent
is represented as a base character and combining
accent or as a single precomposed character. You
should generally call
The normalization mode
enum GNormalizeModetypedef enum { G_NORMALIZE_DEFAULT, G_NORMALIZE_NFD = G_NORMALIZE_DEFAULT, G_NORMALIZE_DEFAULT_COMPOSE, G_NORMALIZE_NFC = G_NORMALIZE_DEFAULT_COMPOSE, G_NORMALIZE_ALL, G_NORMALIZE_NFKD = G_NORMALIZE_ALL, G_NORMALIZE_ALL_COMPOSE, G_NORMALIZE_NFKC = G_NORMALIZE_ALL_COMPOSE } GNormalizeMode; Defines how a Unicode string is transformed in a canonical form, standardizing such issues as whether a character with an accent is represented as a base character and combining accent or as a single precomposed character. Unicode strings should generally be normalized before comparing them.
g_utf8_collate ()gint g_utf8_collate (const gchar *str1, const gchar *str2);
Compares two strings for ordering using the linguistically
correct rules for the current locale. When sorting a large
number of strings, it will be significantly faster to
obtain collation keys with
g_utf8_collate_key ()gchar* g_utf8_collate_key (const gchar *str, gssize len);
Converts a string into a collation key that can be compared
with other collation keys using
g_utf8_to_utf16 ()gunichar2* g_utf8_to_utf16 (const gchar *str, glong len, glong *items_read, glong *items_written, GError **error); Convert a string from UTF-8 to UTF-16. A 0 word will be added to the result after the converted text.
g_utf8_to_ucs4 ()gunichar* g_utf8_to_ucs4 (const gchar *str, glong len, glong *items_read, glong *items_written, GError **error); Convert a string from UTF-8 to a 32-bit fixed width representation as UCS-4. A trailing 0 will be added to the string after the converted text.
g_utf8_to_ucs4_fast ()gunichar* g_utf8_to_ucs4_fast (const gchar *str, glong len, glong *items_written);
Convert a string from UTF-8 to a 32-bit fixed width
representation as UCS-4, assuming valid UTF-8 input.
This function is roughly twice as fast as
g_utf16_to_ucs4 ()gunichar* g_utf16_to_ucs4 (const gunichar2 *str, glong len, glong *items_read, glong *items_written, GError **error); Convert a string from UTF-16 to UCS-4. The result will be terminated with a 0 character.
g_utf16_to_utf8 ()gchar* g_utf16_to_utf8 (const gunichar2 *str, glong len, glong *items_read, glong *items_written, GError **error); Convert a string from UTF-16 to UTF-8. The result will be terminated with a 0 byte.
Note that the input is expected to be already in native endianness,
an initial byte-order-mark character is not handled specially.
g_ucs4_to_utf16 ()gunichar2* g_ucs4_to_utf16 (const gunichar *str, glong len, glong *items_read, glong *items_written, GError **error); Convert a string from UCS-4 to UTF-16. A 0 word will be added to the result after the converted text.
g_ucs4_to_utf8 ()gchar* g_ucs4_to_utf8 (const gunichar *str, glong len, glong *items_read, glong *items_written, GError **error); Convert a string from a 32-bit fixed width representation as UCS-4. to UTF-8. The result will be terminated with a 0 byte.
g_unichar_to_utf8 ()gint g_unichar_to_utf8 (gunichar c, gchar *outbuf); Converts a single character to UTF-8.
See Also
|
:: Command execute :: | |
--[ c99shell v. 1.0 pre-release build #16 powered by Captain Crunch Security Team | http://ccteam.ru | Generation time: 0.0038 ]-- |