MagickCore  6.9.12-56
Convert, Edit, Or Compose Bitmap Images
 All Data Structures
token.c
1 /*
2 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3 % %
4 % %
5 % %
6 % TTTTT OOO K K EEEEE N N %
7 % T O O K K E NN N %
8 % T O O KKK EEE N N N %
9 % T O O K K E N NN %
10 % T OOO K K EEEEE N N %
11 % %
12 % %
13 % MagickCore Token Methods %
14 % %
15 % Software Design %
16 % Cristy %
17 % January 1993 %
18 % %
19 % %
20 % Copyright 1999-2021 ImageMagick Studio LLC, a non-profit organization %
21 % dedicated to making software imaging solutions freely available. %
22 % %
23 % You may not use this file except in compliance with the License. You may %
24 % obtain a copy of the License at %
25 % %
26 % https://imagemagick.org/script/license.php %
27 % %
28 % Unless required by applicable law or agreed to in writing, software %
29 % distributed under the License is distributed on an "AS IS" BASIS, %
30 % WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %
31 % See the License for the specific language governing permissions and %
32 % limitations under the License. %
33 % %
34 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
35 %
36 %
37 %
38 */
39 
40 /*
41  Include declarations.
42 */
43 #include "magick/studio.h"
44 #include "magick/exception.h"
45 #include "magick/exception-private.h"
46 #include "magick/image.h"
47 #include "magick/image-private.h"
48 #include "magick/locale-private.h"
49 #include "magick/memory_.h"
50 #include "magick/string_.h"
51 #include "magick/string-private.h"
52 #include "magick/token.h"
53 #include "magick/token-private.h"
54 #include "magick/utility.h"
55 
56 /*
57  Typedef declaractions.
58 */
59 struct _TokenInfo
60 {
61  int
62  state;
63 
64  MagickStatusType
65  flag;
66 
67  ssize_t
68  offset;
69 
70  char
71  quote;
72 
73  size_t
74  signature;
75 };
76 
77 /*
78 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
79 % %
80 % %
81 % %
82 % A c q u i r e T o k e n I n f o %
83 % %
84 % %
85 % %
86 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
87 %
88 % AcquireTokenInfo() allocates the TokenInfo structure.
89 %
90 % The format of the AcquireTokenInfo method is:
91 %
92 % TokenInfo *AcquireTokenInfo()
93 %
94 */
95 MagickExport TokenInfo *AcquireTokenInfo(void)
96 {
97  TokenInfo
98  *token_info;
99 
100  token_info=(TokenInfo *) AcquireMagickMemory(sizeof(*token_info));
101  if (token_info == (TokenInfo *) NULL)
102  ThrowFatalException(ResourceLimitFatalError,"MemoryAllocationFailed");
103  token_info->signature=MagickCoreSignature;
104  return(token_info);
105 }
106 
107 /*
108 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
109 % %
110 % %
111 % %
112 % D e s t r o y T o k e n I n f o %
113 % %
114 % %
115 % %
116 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
117 %
118 % DestroyTokenInfo() deallocates memory associated with an TokenInfo
119 % structure.
120 %
121 % The format of the DestroyTokenInfo method is:
122 %
123 % TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
124 %
125 % A description of each parameter follows:
126 %
127 % o token_info: Specifies a pointer to an TokenInfo structure.
128 %
129 */
130 MagickExport TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
131 {
132  assert(token_info != (TokenInfo *) NULL);
133  assert(token_info->signature == MagickCoreSignature);
134  if (IsEventLogging() != MagickFalse)
135  (void) LogMagickEvent(TraceEvent,GetMagickModule(),"...");
136  token_info->signature=(~MagickCoreSignature);
137  token_info=(TokenInfo *) RelinquishMagickMemory(token_info);
138  return(token_info);
139 }
140 
141 /*
142 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
143 % %
144 % %
145 % %
146 + G e t N e x t T o k e n %
147 % %
148 % %
149 % %
150 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
151 %
152 % GetNextToken() gets a token from the token stream. A token is defined as
153 % a sequence of characters delimited by whitespace (e.g. clip-path), a
154 % sequence delimited with quotes (.e.g "Quote me"), or a sequence enclosed in
155 % parenthesis (e.g. rgb(0,0,0)). GetNextToken() also recognizes these
156 % separator characters: ':', '=', ',', and ';'. GetNextToken() returns the
157 % length of the consumed token.
158 %
159 % The format of the GetNextToken method is:
160 %
161 % size_t GetNextToken(const char *magick_restrict start,
162 % const char **magick_restrict end,const size_t extent,
163 % char *magick_restrict token)
164 %
165 % A description of each parameter follows:
166 %
167 % o start: the start of the token sequence.
168 %
169 % o end: point to the end of the token sequence.
170 %
171 % o extent: maximum extent of the token.
172 %
173 % o token: copy the token to this buffer.
174 %
175 */
176 MagickExport magick_hot_spot size_t GetNextToken(
177  const char *magick_restrict start,const char **magick_restrict end,
178  const size_t extent,char *magick_restrict token)
179 {
180  double
181  value;
182 
183  char
184  *magick_restrict q;
185 
186  const char
187  *magick_restrict p;
188 
189  ssize_t
190  i;
191 
192  assert(start != (const char *) NULL);
193  assert(token != (char *) NULL);
194  i=0;
195  p=start;
196  while ((isspace((int) ((unsigned char) *p)) != 0) && (*p != '\0'))
197  p++;
198  switch (*p)
199  {
200  case '\0':
201  break;
202  case '"':
203  case '\'':
204  case '`':
205  case '{':
206  {
207  char
208  escape;
209 
210  switch (*p)
211  {
212  case '"': escape='"'; break;
213  case '\'': escape='\''; break;
214  case '`': escape='\''; break;
215  case '{': escape='}'; break;
216  default: escape=(*p); break;
217  }
218  for (p++; *p != '\0'; p++)
219  {
220  if ((*p == '\\') && ((*(p+1) == escape) || (*(p+1) == '\\')))
221  p++;
222  else
223  if (*p == escape)
224  {
225  p++;
226  break;
227  }
228  if (i < (ssize_t) (extent-1))
229  token[i++]=(*p);
230  if ((size_t) (p-start) >= (extent-1))
231  break;
232  }
233  break;
234  }
235  case '/':
236  {
237  if (i < (ssize_t) (extent-1))
238  token[i++]=(*p);
239  p++;
240  if ((*p == '>') || (*p == '/'))
241  {
242  if (i < (ssize_t) (extent-1))
243  token[i++]=(*p);
244  p++;
245  }
246  break;
247  }
248  default:
249  {
250  char
251  *q;
252 
253  value=StringToDouble(p,&q);
254  (void) value;
255  if ((p != q) && (*p != ','))
256  {
257  for ( ; (p < q) && (*p != ','); p++)
258  {
259  if (i < (ssize_t) (extent-1))
260  token[i++]=(*p);
261  if ((size_t) (p-start) >= (extent-1))
262  break;
263  }
264  if (*p == '%')
265  {
266  if (i < (ssize_t) (extent-1))
267  token[i++]=(*p);
268  p++;
269  }
270  break;
271  }
272  if ((*p != '\0') && (isalpha((int) ((unsigned char) *p)) == 0) &&
273  (*p != *DirectorySeparator) && (*p != '#') && (*p != '<'))
274  {
275  if (i < (ssize_t) (extent-1))
276  token[i++]=(*p);
277  p++;
278  break;
279  }
280  for ( ; *p != '\0'; p++)
281  {
282  if (((isspace((int) ((unsigned char) *p)) != 0) || (*p == '=') ||
283  (*p == ',') || (*p == ':') || (*p == ';')) && (*(p-1) != '\\'))
284  break;
285  if ((i > 0) && (*p == '<'))
286  break;
287  if (i < (ssize_t) (extent-1))
288  token[i++]=(*p);
289  if (*p == '>')
290  break;
291  if (*p == '(')
292  {
293  for (p++; *p != '\0'; p++)
294  {
295  if (i < (ssize_t) (extent-1))
296  token[i++]=(*p);
297  if ((*p == ')') && (*(p-1) != '\\'))
298  break;
299  if ((size_t) (p-start) >= (extent-1))
300  break;
301  }
302  if (*p == '\0')
303  break;
304  }
305  if ((size_t) (p-start) >= (extent-1))
306  break;
307  }
308  break;
309  }
310  }
311  token[i]='\0';
312  if (LocaleNCompare(token,"url(#",5) == 0)
313  {
314  q=strrchr(token,')');
315  if (q != (char *) NULL)
316  {
317  *q='\0';
318  (void) memmove(token,token+5,(size_t) (q-token-4));
319  }
320  }
321  while (isspace((int) ((unsigned char) *p)) != 0)
322  p++;
323  if (end != (const char **) NULL)
324  *end=(const char *) p;
325  return(p-start+1);
326 }
327 
328 /*
329 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
330 % %
331 % %
332 % %
333 % G l o b E x p r e s s i o n %
334 % %
335 % %
336 % %
337 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
338 %
339 % GlobExpression() returns MagickTrue if the expression matches the pattern.
340 %
341 % The format of the GlobExpression function is:
342 %
343 % MagickBooleanType GlobExpression(const char *magick_restrict expression,
344 % const char *magick_restrict pattern,
345 % const MagickBooleanType case_insensitive)
346 %
347 % A description of each parameter follows:
348 %
349 % o expression: Specifies a pointer to a text string containing a file name.
350 %
351 % o pattern: Specifies a pointer to a text string containing a pattern.
352 %
353 % o case_insensitive: set to MagickTrue to ignore the case when matching
354 % an expression.
355 %
356 */
357 MagickExport MagickBooleanType GlobExpression(
358  const char *magick_restrict expression,const char *magick_restrict pattern,
359  const MagickBooleanType case_insensitive)
360 {
361  MagickBooleanType
362  done,
363  match;
364 
365  const char
366  *magick_restrict p;
367 
368  /*
369  Return on empty pattern or '*'.
370  */
371  if (pattern == (char *) NULL)
372  return(MagickTrue);
373  if (GetUTFCode(pattern) == 0)
374  return(MagickTrue);
375  if (LocaleCompare(pattern,"*") == 0)
376  return(MagickTrue);
377  p=pattern+strlen(pattern)-1;
378  if ((GetUTFCode(p) == ']') && (strchr(pattern,'[') != (char *) NULL))
379  {
381  *exception;
382 
383  ImageInfo
384  *image_info;
385 
386  /*
387  Determine if pattern is a scene, i.e. img0001.pcd[2].
388  */
389  image_info=AcquireImageInfo();
390  (void) CopyMagickString(image_info->filename,pattern,MaxTextExtent);
391  exception=AcquireExceptionInfo();
392  (void) SetImageInfo(image_info,0,exception);
393  exception=DestroyExceptionInfo(exception);
394  if (LocaleCompare(image_info->filename,pattern) != 0)
395  {
396  image_info=DestroyImageInfo(image_info);
397  return(MagickFalse);
398  }
399  image_info=DestroyImageInfo(image_info);
400  }
401  /*
402  Evaluate glob expression.
403  */
404  done=MagickFalse;
405  while ((GetUTFCode(pattern) != 0) && (done == MagickFalse))
406  {
407  if (GetUTFCode(expression) == 0)
408  if ((GetUTFCode(pattern) != '{') && (GetUTFCode(pattern) != '*'))
409  break;
410  switch (GetUTFCode(pattern))
411  {
412  case '*':
413  {
414  MagickBooleanType
415  status;
416 
417  status=MagickFalse;
418  while (GetUTFCode(pattern) == '*')
419  pattern+=GetUTFOctets(pattern);
420  while ((GetUTFCode(expression) != 0) && (status == MagickFalse))
421  {
422  status=GlobExpression(expression,pattern,case_insensitive);
423  expression+=GetUTFOctets(expression);
424  }
425  if (status != MagickFalse)
426  {
427  while (GetUTFCode(expression) != 0)
428  expression+=GetUTFOctets(expression);
429  while (GetUTFCode(pattern) != 0)
430  pattern+=GetUTFOctets(pattern);
431  }
432  break;
433  }
434  case '[':
435  {
436  int
437  c;
438 
439  pattern+=GetUTFOctets(pattern);
440  for ( ; ; )
441  {
442  if ((GetUTFCode(pattern) == 0) || (GetUTFCode(pattern) == ']'))
443  {
444  done=MagickTrue;
445  break;
446  }
447  if (GetUTFCode(pattern) == '\\')
448  {
449  pattern+=GetUTFOctets(pattern);
450  if (GetUTFCode(pattern) == 0)
451  {
452  done=MagickTrue;
453  break;
454  }
455  }
456  if (GetUTFCode(pattern+GetUTFOctets(pattern)) == '-')
457  {
458  c=GetUTFCode(pattern);
459  pattern+=GetUTFOctets(pattern);
460  pattern+=GetUTFOctets(pattern);
461  if (GetUTFCode(pattern) == ']')
462  {
463  done=MagickTrue;
464  break;
465  }
466  if (GetUTFCode(pattern) == '\\')
467  {
468  pattern+=GetUTFOctets(pattern);
469  if (GetUTFCode(pattern) == 0)
470  {
471  done=MagickTrue;
472  break;
473  }
474  }
475  if ((GetUTFCode(expression) < c) ||
476  (GetUTFCode(expression) > GetUTFCode(pattern)))
477  {
478  pattern+=GetUTFOctets(pattern);
479  continue;
480  }
481  }
482  else
483  if (GetUTFCode(pattern) != GetUTFCode(expression))
484  {
485  pattern+=GetUTFOctets(pattern);
486  continue;
487  }
488  pattern+=GetUTFOctets(pattern);
489  while ((GetUTFCode(pattern) != ']') && (GetUTFCode(pattern) != 0))
490  {
491  if ((GetUTFCode(pattern) == '\\') &&
492  (GetUTFCode(pattern+GetUTFOctets(pattern)) > 0))
493  pattern+=GetUTFOctets(pattern);
494  pattern+=GetUTFOctets(pattern);
495  }
496  if (GetUTFCode(pattern) != 0)
497  {
498  pattern+=GetUTFOctets(pattern);
499  expression+=GetUTFOctets(expression);
500  }
501  break;
502  }
503  break;
504  }
505  case '?':
506  {
507  pattern+=GetUTFOctets(pattern);
508  expression+=GetUTFOctets(expression);
509  break;
510  }
511  case '{':
512  {
513  char
514  *target;
515 
516  char
517  *p;
518 
519  target=AcquireString(pattern);
520  p=target;
521  pattern++;
522  while ((GetUTFCode(pattern) != '}') && (GetUTFCode(pattern) != 0))
523  {
524  *p++=(*pattern++);
525  if ((GetUTFCode(pattern) == ',') || (GetUTFCode(pattern) == '}'))
526  {
527  *p='\0';
528  match=GlobExpression(expression,target,case_insensitive);
529  if (match != MagickFalse)
530  {
531  expression+=MagickMin(strlen(expression),strlen(target));
532  break;
533  }
534  p=target;
535  pattern+=GetUTFOctets(pattern);
536  }
537  }
538  while ((GetUTFCode(pattern) != '}') && (GetUTFCode(pattern) != 0))
539  pattern+=GetUTFOctets(pattern);
540  if (GetUTFCode(pattern) != 0)
541  pattern+=GetUTFOctets(pattern);
542  target=DestroyString(target);
543  break;
544  }
545  case '\\':
546  {
547  pattern+=GetUTFOctets(pattern);
548  if (GetUTFCode(pattern) == 0)
549  break;
550  }
551  default:
552  {
553  if (case_insensitive != MagickFalse)
554  {
555  if (LocaleToLowercase((int) GetUTFCode(expression)) != LocaleToLowercase((int) GetUTFCode(pattern)))
556  {
557  done=MagickTrue;
558  break;
559  }
560  }
561  else
562  if (GetUTFCode(expression) != GetUTFCode(pattern))
563  {
564  done=MagickTrue;
565  break;
566  }
567  expression+=GetUTFOctets(expression);
568  pattern+=GetUTFOctets(pattern);
569  }
570  }
571  }
572  while (GetUTFCode(pattern) == '*')
573  pattern+=GetUTFOctets(pattern);
574  match=(GetUTFCode(expression) == 0) && (GetUTFCode(pattern) == 0) ?
575  MagickTrue : MagickFalse;
576  return(match);
577 }
578 
579 /*
580 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
581 % %
582 % %
583 % %
584 + I s G l o b %
585 % %
586 % %
587 % %
588 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
589 %
590 % IsGlob() returns MagickTrue if the path specification contains a globbing
591 % pattern.
592 %
593 % The format of the IsGlob method is:
594 %
595 % MagickBooleanType IsGlob(const char *geometry)
596 %
597 % A description of each parameter follows:
598 %
599 % o path: the path.
600 %
601 */
602 MagickExport MagickBooleanType IsGlob(const char *path)
603 {
604  MagickBooleanType
605  status = MagickFalse;
606 
607  const char
608  *p;
609 
610  if (IsPathAccessible(path) != MagickFalse)
611  return(MagickFalse);
612  for (p=path; *p != '\0'; p++)
613  {
614  switch (*p)
615  {
616  case '*':
617  case '?':
618  case '{':
619  case '}':
620  case '[':
621  case ']':
622  {
623  status=MagickTrue;
624  break;
625  }
626  default:
627  break;
628  }
629  }
630  return(status);
631 }
632 
633 /*
634 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
635 % %
636 % %
637 % %
638 % I s M a g i c k T r u e %
639 % %
640 % %
641 % %
642 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
643 %
644 % IsMagickTrue() returns MagickTrue if the value is "true", "on", "yes" or
645 % "1".
646 %
647 % The format of the IsMagickTrue method is:
648 %
649 % MagickBooleanType IsMagickTrue(const char *value)
650 %
651 % A description of each parameter follows:
652 %
653 % o option: either MagickTrue or MagickFalse depending on the value
654 % parameter.
655 %
656 % o value: Specifies a pointer to a character array.
657 %
658 */
659 MagickExport MagickBooleanType IsMagickTrue(const char *value)
660 {
661  if (value == (const char *) NULL)
662  return(MagickFalse);
663  if (LocaleCompare(value,"true") == 0)
664  return(MagickTrue);
665  if (LocaleCompare(value,"on") == 0)
666  return(MagickTrue);
667  if (LocaleCompare(value,"yes") == 0)
668  return(MagickTrue);
669  if (LocaleCompare(value,"1") == 0)
670  return(MagickTrue);
671  return(MagickFalse);
672 }
673 
674 /*
675 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
676 % %
677 % %
678 % %
679 % T o k e n i z e r %
680 % %
681 % %
682 % %
683 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
684 %
685 % Tokenizer() is a generalized, finite state token parser. It extracts tokens
686 % one at a time from a string of characters. The characters used for white
687 % space, for break characters, and for quotes can be specified. Also,
688 % characters in the string can be preceded by a specifiable escape character
689 % which removes any special meaning the character may have.
690 %
691 % Here is some terminology:
692 %
693 % o token: A single unit of information in the form of a group of
694 % characters.
695 %
696 % o white space: Apace that gets ignored (except within quotes or when
697 % escaped), like blanks and tabs. in addition, white space terminates a
698 % non-quoted token.
699 %
700 % o break set: One or more characters that separates non-quoted tokens.
701 % Commas are a common break character. The usage of break characters to
702 % signal the end of a token is the same as that of white space, except
703 % multiple break characters with nothing or only white space between
704 % generate a null token for each two break characters together.
705 %
706 % For example, if blank is set to be the white space and comma is set to
707 % be the break character, the line
708 %
709 % A, B, C , , DEF
710 %
711 % ... consists of 5 tokens:
712 %
713 % 1) "A"
714 % 2) "B"
715 % 3) "C"
716 % 4) "" (the null string)
717 % 5) "DEF"
718 %
719 % o Quote character: A character that, when surrounding a group of other
720 % characters, causes the group of characters to be treated as a single
721 % token, no matter how many white spaces or break characters exist in
722 % the group. Also, a token always terminates after the closing quote.
723 % For example, if ' is the quote character, blank is white space, and
724 % comma is the break character, the following string
725 %
726 % A, ' B, CD'EF GHI
727 %
728 % ... consists of 4 tokens:
729 %
730 % 1) "A"
731 % 2) " B, CD" (note the blanks & comma)
732 % 3) "EF"
733 % 4) "GHI"
734 %
735 % The quote characters themselves do not appear in the resultant
736 % tokens. The double quotes are delimiters i use here for
737 % documentation purposes only.
738 %
739 % o Escape character: A character which itself is ignored but which
740 % causes the next character to be used as is. ^ and \ are often used
741 % as escape characters. An escape in the last position of the string
742 % gets treated as a "normal" (i.e., non-quote, non-white, non-break,
743 % and non-escape) character. For example, assume white space, break
744 % character, and quote are the same as in the above examples, and
745 % further, assume that ^ is the escape character. Then, in the string
746 %
747 % ABC, ' DEF ^' GH' I ^ J K^ L ^
748 %
749 % ... there are 7 tokens:
750 %
751 % 1) "ABC"
752 % 2) " DEF ' GH"
753 % 3) "I"
754 % 4) " " (a lone blank)
755 % 5) "J"
756 % 6) "K L"
757 % 7) "^" (passed as is at end of line)
758 %
759 % The format of the Tokenizer method is:
760 %
761 % int Tokenizer(TokenInfo *token_info,const unsigned flag,char *token,
762 % const size_t max_token_length,const char *line,const char *white,
763 % const char *break_set,const char *quote,const char escape,
764 % char *breaker,int *next,char *quoted)
765 %
766 % A description of each parameter follows:
767 %
768 % o flag: right now, only the low order 3 bits are used.
769 %
770 % 1 => convert non-quoted tokens to upper case
771 % 2 => convert non-quoted tokens to lower case
772 % 0 => do not convert non-quoted tokens
773 %
774 % o token: a character string containing the returned next token
775 %
776 % o max_token_length: the maximum size of "token". Characters beyond
777 % "max_token_length" are truncated.
778 %
779 % o string: the string to be parsed.
780 %
781 % o white: a string of the valid white spaces. example:
782 %
783 % char whitesp[]={" \t"};
784 %
785 % blank and tab will be valid white space.
786 %
787 % o break: a string of the valid break characters. example:
788 %
789 % char breakch[]={";,"};
790 %
791 % semicolon and comma will be valid break characters.
792 %
793 % o quote: a string of the valid quote characters. An example would be
794 %
795 % char whitesp[]={"'\"");
796 %
797 % (this causes single and double quotes to be valid) Note that a
798 % token starting with one of these characters needs the same quote
799 % character to terminate it.
800 %
801 % for example:
802 %
803 % "ABC '
804 %
805 % is unterminated, but
806 %
807 % "DEF" and 'GHI'
808 %
809 % are properly terminated. Note that different quote characters
810 % can appear on the same line; only for a given token do the quote
811 % characters have to be the same.
812 %
813 % o escape: the escape character (NOT a string ... only one
814 % allowed). Use zero if none is desired.
815 %
816 % o breaker: the break character used to terminate the current
817 % token. If the token was quoted, this will be the quote used. If
818 % the token is the last one on the line, this will be zero.
819 %
820 % o next: this variable points to the first character of the
821 % next token. it gets reset by "tokenizer" as it steps through the
822 % string. Set it to 0 upon initialization, and leave it alone
823 % after that. You can change it if you want to jump around in the
824 % string or re-parse from the beginning, but be careful.
825 %
826 % o quoted: set to True if the token was quoted and MagickFalse
827 % if not. You may need this information (for example: in C, a
828 % string with quotes around it is a character string, while one
829 % without is an identifier).
830 %
831 % o result: 0 if we haven't reached EOS (end of string), and 1
832 % if we have.
833 %
834 */
835 
836 #define IN_WHITE 0
837 #define IN_TOKEN 1
838 #define IN_QUOTE 2
839 #define IN_OZONE 3
840 
841 static ssize_t sindex(int c,const char *string)
842 {
843  const char
844  *p;
845 
846  for (p=string; *p != '\0'; p++)
847  if (c == (int) (*p))
848  return((ssize_t) (p-string));
849  return(-1);
850 }
851 
852 static void StoreToken(TokenInfo *token_info,char *string,
853  size_t max_token_length,int c)
854 {
855  ssize_t
856  i;
857 
858  if ((token_info->offset < 0) ||
859  ((size_t) token_info->offset >= (max_token_length-1)))
860  return;
861  i=token_info->offset++;
862  string[i]=(char) c;
863  if (token_info->state == IN_QUOTE)
864  return;
865  switch (token_info->flag & 0x03)
866  {
867  case 1:
868  {
869  string[i]=(char) LocaleToUppercase(c);
870  break;
871  }
872  case 2:
873  {
874  string[i]=(char) LocaleToLowercase(c);
875  break;
876  }
877  default:
878  break;
879  }
880 }
881 
882 MagickExport int Tokenizer(TokenInfo *token_info,const unsigned flag,
883  char *token,const size_t max_token_length,const char *line,const char *white,
884  const char *break_set,const char *quote,const char escape,char *breaker,
885  int *next,char *quoted)
886 {
887  int
888  c;
889 
890  ssize_t
891  i;
892 
893  *breaker='\0';
894  *quoted='\0';
895  if (line[*next] == '\0')
896  return(1);
897  token_info->state=IN_WHITE;
898  token_info->quote=(char) MagickFalse;
899  token_info->flag=flag;
900  for (token_info->offset=0; (int) line[*next] != 0; (*next)++)
901  {
902  c=(int) line[*next];
903  i=sindex(c,break_set);
904  if (i >= 0)
905  {
906  switch (token_info->state)
907  {
908  case IN_WHITE:
909  case IN_TOKEN:
910  case IN_OZONE:
911  {
912  (*next)++;
913  *breaker=break_set[i];
914  token[token_info->offset]='\0';
915  return(0);
916  }
917  case IN_QUOTE:
918  {
919  StoreToken(token_info,token,max_token_length,c);
920  break;
921  }
922  }
923  continue;
924  }
925  i=sindex(c,quote);
926  if (i >= 0)
927  {
928  switch (token_info->state)
929  {
930  case IN_WHITE:
931  {
932  token_info->state=IN_QUOTE;
933  token_info->quote=quote[i];
934  *quoted=(char) MagickTrue;
935  break;
936  }
937  case IN_QUOTE:
938  {
939  if (quote[i] != token_info->quote)
940  StoreToken(token_info,token,max_token_length,c);
941  else
942  {
943  token_info->state=IN_OZONE;
944  token_info->quote='\0';
945  }
946  break;
947  }
948  case IN_TOKEN:
949  case IN_OZONE:
950  {
951  *breaker=(char) c;
952  token[token_info->offset]='\0';
953  return(0);
954  }
955  }
956  continue;
957  }
958  i=sindex(c,white);
959  if (i >= 0)
960  {
961  switch (token_info->state)
962  {
963  case IN_WHITE:
964  case IN_OZONE:
965  break;
966  case IN_TOKEN:
967  {
968  token_info->state=IN_OZONE;
969  break;
970  }
971  case IN_QUOTE:
972  {
973  StoreToken(token_info,token,max_token_length,c);
974  break;
975  }
976  }
977  continue;
978  }
979  if (c == (int) escape)
980  {
981  if (line[(*next)+1] == '\0')
982  {
983  *breaker='\0';
984  StoreToken(token_info,token,max_token_length,c);
985  (*next)++;
986  token[token_info->offset]='\0';
987  return(0);
988  }
989  switch (token_info->state)
990  {
991  case IN_WHITE:
992  {
993  (*next)--;
994  token_info->state=IN_TOKEN;
995  break;
996  }
997  case IN_TOKEN:
998  case IN_QUOTE:
999  {
1000  (*next)++;
1001  c=(int) line[*next];
1002  StoreToken(token_info,token,max_token_length,c);
1003  break;
1004  }
1005  case IN_OZONE:
1006  {
1007  token[token_info->offset]='\0';
1008  return(0);
1009  }
1010  }
1011  continue;
1012  }
1013  switch (token_info->state)
1014  {
1015  case IN_WHITE:
1016  {
1017  token_info->state=IN_TOKEN;
1018  StoreToken(token_info,token,max_token_length,c);
1019  break;
1020  }
1021  case IN_TOKEN:
1022  case IN_QUOTE:
1023  {
1024  StoreToken(token_info,token,max_token_length,c);
1025  break;
1026  }
1027  case IN_OZONE:
1028  {
1029  token[token_info->offset]='\0';
1030  return(0);
1031  }
1032  }
1033  }
1034  token[token_info->offset]='\0';
1035  return(0);
1036 }