ICU 71.1  71.1
uset.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 2002-2014, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: uset.h
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2002mar07
16 * created by: Markus W. Scherer
17 *
18 * C version of UnicodeSet.
19 */
20 
21 
29 #ifndef __USET_H__
30 #define __USET_H__
31 
32 #include "unicode/utypes.h"
33 #include "unicode/uchar.h"
34 
35 #if U_SHOW_CPLUSPLUS_API
36 #include "unicode/localpointer.h"
37 #endif // U_SHOW_CPLUSPLUS_API
38 
39 #ifndef USET_DEFINED
40 
41 #ifndef U_IN_DOXYGEN
42 #define USET_DEFINED
43 #endif
44 
50 typedef struct USet USet;
51 #endif
52 
58 enum {
64 
92 
102 };
103 
159 typedef enum USetSpanCondition {
208 #ifndef U_HIDE_DEPRECATED_API
209 
214 #endif // U_HIDE_DEPRECATED_API
216 
217 enum {
225 };
226 
232 typedef struct USerializedSet {
237  const uint16_t *array;
242  int32_t bmpLength;
247  int32_t length;
254 
255 /*********************************************************************
256  * USet API
257  *********************************************************************/
258 
266 U_CAPI USet* U_EXPORT2
267 uset_openEmpty(void);
268 
279 U_CAPI USet* U_EXPORT2
280 uset_open(UChar32 start, UChar32 end);
281 
291 U_CAPI USet* U_EXPORT2
292 uset_openPattern(const UChar* pattern, int32_t patternLength,
293  UErrorCode* ec);
294 
306 U_CAPI USet* U_EXPORT2
307 uset_openPatternOptions(const UChar* pattern, int32_t patternLength,
308  uint32_t options,
309  UErrorCode* ec);
310 
317 U_CAPI void U_EXPORT2
318 uset_close(USet* set);
319 
320 #if U_SHOW_CPLUSPLUS_API
321 
322 U_NAMESPACE_BEGIN
323 
334 
335 U_NAMESPACE_END
336 
337 #endif
338 
348 U_CAPI USet * U_EXPORT2
349 uset_clone(const USet *set);
350 
360 U_CAPI UBool U_EXPORT2
361 uset_isFrozen(const USet *set);
362 
377 U_CAPI void U_EXPORT2
378 uset_freeze(USet *set);
379 
390 U_CAPI USet * U_EXPORT2
391 uset_cloneAsThawed(const USet *set);
392 
402 U_CAPI void U_EXPORT2
403 uset_set(USet* set,
404  UChar32 start, UChar32 end);
405 
427 U_CAPI int32_t U_EXPORT2
429  const UChar *pattern, int32_t patternLength,
430  uint32_t options,
431  UErrorCode *status);
432 
455 U_CAPI void U_EXPORT2
457  UProperty prop, int32_t value, UErrorCode* ec);
458 
494 U_CAPI void U_EXPORT2
496  const UChar *prop, int32_t propLength,
497  const UChar *value, int32_t valueLength,
498  UErrorCode* ec);
499 
509 U_CAPI UBool U_EXPORT2
510 uset_resemblesPattern(const UChar *pattern, int32_t patternLength,
511  int32_t pos);
512 
528 U_CAPI int32_t U_EXPORT2
529 uset_toPattern(const USet* set,
530  UChar* result, int32_t resultCapacity,
531  UBool escapeUnprintable,
532  UErrorCode* ec);
533 
542 U_CAPI void U_EXPORT2
543 uset_add(USet* set, UChar32 c);
544 
557 U_CAPI void U_EXPORT2
558 uset_addAll(USet* set, const USet *additionalSet);
559 
569 U_CAPI void U_EXPORT2
570 uset_addRange(USet* set, UChar32 start, UChar32 end);
571 
581 U_CAPI void U_EXPORT2
582 uset_addString(USet* set, const UChar* str, int32_t strLen);
583 
593 U_CAPI void U_EXPORT2
594 uset_addAllCodePoints(USet* set, const UChar *str, int32_t strLen);
595 
604 U_CAPI void U_EXPORT2
605 uset_remove(USet* set, UChar32 c);
606 
616 U_CAPI void U_EXPORT2
617 uset_removeRange(USet* set, UChar32 start, UChar32 end);
618 
628 U_CAPI void U_EXPORT2
629 uset_removeString(USet* set, const UChar* str, int32_t strLen);
630 
640 U_CAPI void U_EXPORT2
641 uset_removeAllCodePoints(USet *set, const UChar *str, int32_t length);
642 
654 U_CAPI void U_EXPORT2
655 uset_removeAll(USet* set, const USet* removeSet);
656 
669 U_CAPI void U_EXPORT2
670 uset_retain(USet* set, UChar32 start, UChar32 end);
671 
683 U_CAPI void U_EXPORT2
684 uset_retainString(USet *set, const UChar *str, int32_t length);
685 
695 U_CAPI void U_EXPORT2
696 uset_retainAllCodePoints(USet *set, const UChar *str, int32_t length);
697 
710 U_CAPI void U_EXPORT2
711 uset_retainAll(USet* set, const USet* retain);
712 
721 U_CAPI void U_EXPORT2
722 uset_compact(USet* set);
723 
737 U_CAPI void U_EXPORT2
738 uset_complement(USet* set);
739 
753 U_CAPI void U_EXPORT2
754 uset_complementRange(USet *set, UChar32 start, UChar32 end);
755 
766 U_CAPI void U_EXPORT2
767 uset_complementString(USet *set, const UChar *str, int32_t length);
768 
778 U_CAPI void U_EXPORT2
779 uset_complementAllCodePoints(USet *set, const UChar *str, int32_t length);
780 
792 U_CAPI void U_EXPORT2
793 uset_complementAll(USet* set, const USet* complement);
794 
802 U_CAPI void U_EXPORT2
803 uset_clear(USet* set);
804 
831 U_CAPI void U_EXPORT2
832 uset_closeOver(USet* set, int32_t attributes);
833 
840 U_CAPI void U_EXPORT2
842 
850 U_CAPI UBool U_EXPORT2
851 uset_isEmpty(const USet* set);
852 
853 #ifndef U_HIDE_DRAFT_API
854 
859 U_CAPI UBool U_EXPORT2
860 uset_hasStrings(const USet *set);
861 #endif // U_HIDE_DRAFT_API
862 
871 U_CAPI UBool U_EXPORT2
872 uset_contains(const USet* set, UChar32 c);
873 
883 U_CAPI UBool U_EXPORT2
884 uset_containsRange(const USet* set, UChar32 start, UChar32 end);
885 
894 U_CAPI UBool U_EXPORT2
895 uset_containsString(const USet* set, const UChar* str, int32_t strLen);
896 
907 U_CAPI int32_t U_EXPORT2
908 uset_indexOf(const USet* set, UChar32 c);
909 
925 U_CAPI UChar32 U_EXPORT2
926 uset_charAt(const USet* set, int32_t charIndex);
927 
941 U_CAPI int32_t U_EXPORT2
942 uset_size(const USet* set);
943 
944 #ifndef U_HIDE_DRAFT_API
945 
953 U_CAPI int32_t U_EXPORT2
954 uset_getRangeCount(const USet *set);
955 #endif // U_HIDE_DRAFT_API
956 
965 U_CAPI int32_t U_EXPORT2
966 uset_getItemCount(const USet* set);
967 
996 U_CAPI int32_t U_EXPORT2
997 uset_getItem(const USet* set, int32_t itemIndex,
998  UChar32* start, UChar32* end,
999  UChar* str, int32_t strCapacity,
1000  UErrorCode* ec);
1001 
1010 U_CAPI UBool U_EXPORT2
1011 uset_containsAll(const USet* set1, const USet* set2);
1012 
1023 U_CAPI UBool U_EXPORT2
1024 uset_containsAllCodePoints(const USet* set, const UChar *str, int32_t strLen);
1025 
1034 U_CAPI UBool U_EXPORT2
1035 uset_containsNone(const USet* set1, const USet* set2);
1036 
1045 U_CAPI UBool U_EXPORT2
1046 uset_containsSome(const USet* set1, const USet* set2);
1047 
1067 U_CAPI int32_t U_EXPORT2
1068 uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
1069 
1088 U_CAPI int32_t U_EXPORT2
1089 uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
1090 
1110 U_CAPI int32_t U_EXPORT2
1111 uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
1112 
1131 U_CAPI int32_t U_EXPORT2
1132 uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
1133 
1142 U_CAPI UBool U_EXPORT2
1143 uset_equals(const USet* set1, const USet* set2);
1144 
1145 /*********************************************************************
1146  * Serialized set API
1147  *********************************************************************/
1148 
1198 U_CAPI int32_t U_EXPORT2
1199 uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* pErrorCode);
1200 
1209 U_CAPI UBool U_EXPORT2
1210 uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength);
1211 
1219 U_CAPI void U_EXPORT2
1221 
1230 U_CAPI UBool U_EXPORT2
1232 
1242 U_CAPI int32_t U_EXPORT2
1244 
1258 U_CAPI UBool U_EXPORT2
1259 uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
1260  UChar32* pStart, UChar32* pEnd);
1261 
1262 #endif
U_CAPI USet * uset_clone(const USet *set)
Returns a copy of this object.
U_CAPI void uset_clear(USet *set)
Removes all of the elements from this set.
U_CAPI void uset_set(USet *set, UChar32 start, UChar32 end)
Causes the USet object to represent the range start - end.
U_CAPI void uset_removeAll(USet *set, const USet *removeSet)
Removes from this set all of its elements that are contained in the specified set.
uint16_t staticArray[USET_SERIALIZED_STATIC_ARRAY_CAPACITY]
A small buffer for the array to reduce memory allocations.
Definition: uset.h:252
U_CAPI void uset_complementString(USet *set, const UChar *str, int32_t length)
Complements the specified string in this set.
U_CAPI void uset_complementAll(USet *set, const USet *complement)
Complements in this set all elements contained in the specified set.
U_CAPI void uset_removeAllCodePoints(USet *set, const UChar *str, int32_t length)
Removes EACH of the characters in this string.
U_CAPI UBool uset_containsAllCodePoints(const USet *set, const UChar *str, int32_t strLen)
Returns true if this set contains all the characters of the given string.
U_CAPI int32_t uset_indexOf(const USet *set, UChar32 c)
Returns the index of the given character within this set, where the set is ordered by ascending code ...
#define U_CAPI
This is used to declare a function as a public ICU C API.
Definition: umachine.h:110
U_CAPI void uset_close(USet *set)
Disposes of the storage used by a USet object.
U_CAPI int32_t uset_getRangeCount(const USet *set)
U_CAPI void uset_retainAllCodePoints(USet *set, const UChar *str, int32_t length)
Retains EACH of the characters in this string.
U_CAPI int32_t uset_toPattern(const USet *set, UChar *result, int32_t resultCapacity, UBool escapeUnprintable, UErrorCode *ec)
Returns a string representation of this set.
Spans the longest substring that is a concatenation of set elements (characters or strings)...
Definition: uset.h:187
One more than the last span condition.
Definition: uset.h:213
U_CAPI void uset_add(USet *set, UChar32 c)
Adds the given character to the given USet.
const uint16_t * array
The serialized Unicode Set.
Definition: uset.h:237
U_CAPI void uset_retainAll(USet *set, const USet *retain)
Retains only the elements in this set that are contained in the specified set.
U_CAPI void uset_freeze(USet *set)
Freeze the set (make it immutable).
U_CAPI UBool uset_isFrozen(const USet *set)
Determines whether the set has been frozen (made immutable) or not.
U_CAPI void uset_retain(USet *set, UChar32 start, UChar32 end)
Retain only the elements in this set that are contained in the specified range.
U_CAPI USet * uset_openEmpty(void)
Create an empty USet object.
"Smart pointer" class, closes a USet via uset_close().
U_CAPI void uset_removeAllStrings(USet *set)
Remove all strings from this set.
U_CAPI void uset_complement(USet *set)
This is equivalent to uset_complementRange(set, 0, 0x10FFFF).
U_CAPI void uset_applyIntPropertyValue(USet *set, UProperty prop, int32_t value, UErrorCode *ec)
Modifies the set to contain those code points which have the given value for the given binary or enum...
U_CAPI UBool uset_containsNone(const USet *set1, const USet *set2)
Returns true if set1 contains none of the characters and strings of set2.
U_CAPI UBool uset_equals(const USet *set1, const USet *set2)
Returns true if set1 contains all of the characters and strings of set2, and vis versa.
U_CAPI void uset_removeRange(USet *set, UChar32 start, UChar32 end)
Removes the given range of characters from the given USet.
U_CAPI void uset_addAll(USet *set, const USet *additionalSet)
Adds all of the elements in the specified set to this set if they're not already present.
Ignore white space within patterns unless quoted or escaped.
Definition: uset.h:63
U_CAPI void uset_complementRange(USet *set, UChar32 start, UChar32 end)
Complements the specified range in this set.
struct USerializedSet USerializedSet
A serialized form of a Unicode set.
U_CAPI UBool uset_contains(const USet *set, UChar32 c)
Returns true if the given USet contains the given character.
U_CAPI USet * uset_open(UChar32 start, UChar32 end)
Creates a USet object that contains the range of characters start..end, inclusive.
U_CAPI UBool uset_containsString(const USet *set, const UChar *str, int32_t strLen)
Returns true if the given USet contains the given string.
U_CAPI void uset_retainString(USet *set, const UChar *str, int32_t length)
Retains only the specified string from this set if it is present.
U_CAPI int32_t uset_applyPattern(USet *set, const UChar *pattern, int32_t patternLength, uint32_t options, UErrorCode *status)
Modifies the set to represent the set specified by the given pattern.
U_CAPI UBool uset_containsAll(const USet *set1, const USet *set2)
Returns true if set1 contains all the characters and strings of set2.
U_CAPI UBool uset_getSerializedSet(USerializedSet *fillSet, const uint16_t *src, int32_t srcLength)
Given a serialized array, fill in the given serialized set object.
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction)
"Smart pointer" definition macro, deletes objects via the closeFunction.
Definition: localpointer.h:550
U_CAPI UBool uset_containsRange(const USet *set, UChar32 start, UChar32 end)
Returns true if the given USet contains all characters c where start <= c && c <= end...
C++ API: "Smart pointers" for use with and in ICU4C C++ code.
U_CAPI UBool uset_getSerializedRange(const USerializedSet *set, int32_t rangeIndex, UChar32 *pStart, UChar32 *pEnd)
Returns a range of characters contained in the given serialized set.
U_CAPI USet * uset_cloneAsThawed(const USet *set)
Clone the set and make the clone mutable.
U_CAPI int32_t uset_size(const USet *set)
Returns the number of characters and strings contained in this set.
U_CAPI int32_t uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition)
Returns the length of the initial substring of the input string which consists only of characters and...
U_CAPI int32_t uset_serialize(const USet *set, uint16_t *dest, int32_t destCapacity, UErrorCode *pErrorCode)
Serializes this set into an array of 16-bit integers.
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:467
U_CAPI void uset_closeOver(USet *set, int32_t attributes)
Close this set over the given attribute.
U_CAPI UBool uset_containsSome(const USet *set1, const USet *set2)
Returns true if set1 contains some of the characters and strings of set2.
U_CAPI void uset_addString(USet *set, const UChar *str, int32_t strLen)
Adds the given string to the given USet.
U_CAPI void uset_addRange(USet *set, UChar32 start, UChar32 end)
Adds the given range of characters to the given USet.
USetSpanCondition
Argument values for whether span() and similar functions continue while the current character is cont...
Definition: uset.h:159
C API: Unicode Properties.
U_CAPI int32_t uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition)
Returns the start of the trailing substring of the input string which consists only of characters and...
U_CAPI USet * uset_openPattern(const UChar *pattern, int32_t patternLength, UErrorCode *ec)
Creates a set from the given pattern.
char16_t UChar
The base type for UTF-16 code units and pointers.
Definition: umachine.h:418
U_CAPI int32_t uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition)
Returns the start of the trailing substring of the input string which consists only of characters and...
U_CAPI int32_t uset_getItem(const USet *set, int32_t itemIndex, UChar32 *start, UChar32 *end, UChar *str, int32_t strCapacity, UErrorCode *ec)
Returns an item of this set.
U_CAPI void uset_applyPropertyAlias(USet *set, const UChar *prop, int32_t propLength, const UChar *value, int32_t valueLength, UErrorCode *ec)
Modifies the set to contain those code points which have the given value for the given property...
int32_t length
The total length of the array.
Definition: uset.h:247
UProperty
Selection constants for Unicode properties.
Definition: uchar.h:195
U_CAPI USet * uset_openPatternOptions(const UChar *pattern, int32_t patternLength, uint32_t options, UErrorCode *ec)
Creates a set from the given pattern.
U_CAPI UBool uset_isEmpty(const USet *set)
Returns true if the given USet contains no characters and no strings.
U_CAPI void uset_compact(USet *set)
Reallocate this objects internal structures to take up the least possible space, without changing thi...
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition: utypes.h:415
struct USet USet
USet is the C API type corresponding to C++ class UnicodeSet.
Definition: uset.h:50
U_CAPI int32_t uset_getItemCount(const USet *set)
Returns the number of items in this set.
U_CAPI UBool uset_serializedContains(const USerializedSet *set, UChar32 c)
Returns true if the given USerializedSet contains the given character.
Capacity of USerializedSet::staticArray.
Definition: uset.h:224
Enable case insensitive matching.
Definition: uset.h:91
Enable case insensitive matching.
Definition: uset.h:101
Basic definitions for ICU, for both C and C++ APIs.
int32_t bmpLength
The length of the array that contains BMP characters.
Definition: uset.h:242
U_CAPI UBool uset_resemblesPattern(const UChar *pattern, int32_t patternLength, int32_t pos)
Return true if the given position, in the given pattern, appears to be the start of a UnicodeSet patt...
A serialized form of a Unicode set.
Definition: uset.h:232
U_CAPI int32_t uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition)
Returns the length of the initial substring of the input string which consists only of characters and...
Continues a span() while there is a set element at the current position.
Definition: uset.h:207
U_CAPI void uset_removeString(USet *set, const UChar *str, int32_t strLen)
Removes the given string to the given USet.
U_CAPI void uset_setSerializedToOne(USerializedSet *fillSet, UChar32 c)
Set the USerializedSet to contain the given character (and nothing else).
U_CAPI UChar32 uset_charAt(const USet *set, int32_t charIndex)
Returns the character at the given index within this set, where the set is ordered by ascending code ...
U_CAPI int32_t uset_getSerializedRangeCount(const USerializedSet *set)
Returns the number of disjoint ranges of characters contained in the given serialized set...
U_CAPI UBool uset_hasStrings(const USet *set)
U_CAPI void uset_addAllCodePoints(USet *set, const UChar *str, int32_t strLen)
Adds each of the characters in this string to the set.
U_CAPI void uset_complementAllCodePoints(USet *set, const UChar *str, int32_t length)
Complements EACH of the characters in this string.
U_CAPI void uset_remove(USet *set, UChar32 c)
Removes the given character from the given USet.
Continues a span() while there is no set element at the current position.
Definition: uset.h:172
int8_t UBool
The ICU boolean type, a signed-byte integer.
Definition: umachine.h:269