Squid Web Cache master
Loading...
Searching...
No Matches
testTokenizer.cc
Go to the documentation of this file.
1/*
2 * Copyright (C) 1996-2025 The Squid Software Foundation and contributors
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9#include "squid.h"
10#include "base/CharacterSet.h"
11#include "compat/cppunit.h"
12#include "parser/Tokenizer.h"
13#include "unitTestMain.h"
14
33
34SBuf text("GET http://resource.com/path HTTP/1.1\r\n"
35 "Host: resource.com\r\n"
36 "Cookie: laijkpk3422r j1noin \r\n"
37 "\r\n");
38const CharacterSet alpha("alpha","abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
39const CharacterSet whitespace("whitespace"," \r\n");
40const CharacterSet crlf("crlf","\r\n");
41const CharacterSet tab("tab","\t");
42const CharacterSet numbers("numbers","0123456789");
43
44void
46{
47 const SBuf canary("This text should not be changed.");
48
50 SBuf s;
51
53 all += alpha;
54 all += crlf;
55 all += numbers;
56 all.add(':').add('.').add('/');
57
58 // an empty prefix should return false (the full output buffer case)
59 s = canary;
60 const SBuf before = t.remaining();
61 CPPUNIT_ASSERT(!t.prefix(s, all, 0));
62 // ... and a false return value means no parameter changes
63 CPPUNIT_ASSERT_EQUAL(canary, s);
64 // ... and a false return value means no input buffer changes
65 CPPUNIT_ASSERT_EQUAL(before, t.remaining());
66
67 // successful prefix tokenization
68 CPPUNIT_ASSERT(t.prefix(s,alpha));
69 CPPUNIT_ASSERT_EQUAL(SBuf("GET"),s);
70 CPPUNIT_ASSERT(t.prefix(s,whitespace));
71 CPPUNIT_ASSERT_EQUAL(SBuf(" "),s);
72
73 //no match (first char is not in the prefix set)
74 CPPUNIT_ASSERT(!t.prefix(s,whitespace));
75 CPPUNIT_ASSERT_EQUAL(SBuf(" "),s);
76
77 // one more match to set S to something meaningful
78 CPPUNIT_ASSERT(t.prefix(s,alpha));
79 CPPUNIT_ASSERT_EQUAL(SBuf("http"),s);
80
81 //no match (no characters from the character set in the prefix)
82 CPPUNIT_ASSERT(!t.prefix(s,tab));
83 CPPUNIT_ASSERT_EQUAL(SBuf("http"),s); //output SBuf left untouched
84
85 // match until the end of the sample
86 CPPUNIT_ASSERT(t.prefix(s,all));
87 CPPUNIT_ASSERT_EQUAL(SBuf(),t.remaining());
88
89 // empty prefix should return false (the empty input buffer case)
90 s = canary;
91 CPPUNIT_ASSERT(!t.prefix(s, all));
92 // ... and a false return value means no parameter changes
93 CPPUNIT_ASSERT_EQUAL(canary, s);
94}
95
96void
98{
100 SBuf s;
101
102 // first scenario: patterns match
103 // prep for test
104 CPPUNIT_ASSERT(t.prefix(s,alpha));
105 CPPUNIT_ASSERT_EQUAL(SBuf("GET"),s);
106
107 // test skipping one character from a character set
108 CPPUNIT_ASSERT(t.skipOne(whitespace));
109 // check that skip was right
110 CPPUNIT_ASSERT(t.prefix(s,alpha));
111 CPPUNIT_ASSERT_EQUAL(SBuf("http"),s);
112
113 //check skip prefix
114 CPPUNIT_ASSERT(t.skip(SBuf("://")));
115 // verify
116 CPPUNIT_ASSERT(t.prefix(s,alpha));
117 CPPUNIT_ASSERT_EQUAL(SBuf("resource"),s);
118
119 // no skip
120 CPPUNIT_ASSERT(!t.skipOne(alpha));
121 CPPUNIT_ASSERT(!t.skip(SBuf("://")));
122 CPPUNIT_ASSERT(!t.skip('a'));
123
124 // test skipping all characters from a character set while looking at .com
125 CPPUNIT_ASSERT(t.skip('.'));
126 CPPUNIT_ASSERT_EQUAL(static_cast<SBuf::size_type>(3), t.skipAll(alpha));
127 CPPUNIT_ASSERT(t.remaining().startsWith(SBuf("/path")));
128}
129
130void
132{
134 SBuf s;
135
136 // first scenario: patterns match
137 CPPUNIT_ASSERT(t.token(s,whitespace));
138 CPPUNIT_ASSERT_EQUAL(SBuf("GET"),s);
139 CPPUNIT_ASSERT(t.token(s,whitespace));
140 CPPUNIT_ASSERT_EQUAL(SBuf("http://resource.com/path"),s);
141 CPPUNIT_ASSERT(t.token(s,whitespace));
142 CPPUNIT_ASSERT_EQUAL(SBuf("HTTP/1.1"),s);
143 CPPUNIT_ASSERT(t.token(s,whitespace));
144 CPPUNIT_ASSERT_EQUAL(SBuf("Host:"),s);
145
146}
147
148void
150{
151 const SBuf canary("This text should not be changed.");
152
154 SBuf s;
155
157 all += alpha;
158 all += crlf;
159 all += numbers;
160 all.add(':').add('.').add('/');
161
162 // an empty suffix should return false (the full output buffer case)
163 s = canary;
164 const SBuf before = t.remaining();
165 CPPUNIT_ASSERT(!t.suffix(s, all, 0));
166 // ... and a false return value means no parameter changes
167 CPPUNIT_ASSERT_EQUAL(canary, s);
168 // ... and a false return value means no input buffer changes
169 CPPUNIT_ASSERT_EQUAL(before, t.remaining());
170
171 // consume suffix until the last CRLF, including that last CRLF
172 SBuf::size_type remaining = t.remaining().length();
173 while (t.remaining().findLastOf(crlf) != SBuf::npos) {
174 CPPUNIT_ASSERT(t.remaining().length() > 0);
175 CPPUNIT_ASSERT(t.skipOneTrailing(all));
176 // ensure steady progress
177 CPPUNIT_ASSERT_EQUAL(remaining, t.remaining().length() + 1);
178 --remaining;
179 }
180
181 // no match (last char is not in the suffix set)
182 CPPUNIT_ASSERT(!t.suffix(s, crlf));
183 CPPUNIT_ASSERT(!t.suffix(s, whitespace));
184
185 // successful suffix tokenization
186 CPPUNIT_ASSERT(t.suffix(s, numbers));
187 CPPUNIT_ASSERT_EQUAL(SBuf("1"), s);
188 CPPUNIT_ASSERT(t.skipSuffix(SBuf("1.")));
189 CPPUNIT_ASSERT(t.skipSuffix(SBuf("/")));
190 CPPUNIT_ASSERT(t.suffix(s, alpha));
191 CPPUNIT_ASSERT_EQUAL(SBuf("HTTP"), s);
192 CPPUNIT_ASSERT(t.suffix(s, whitespace));
193 CPPUNIT_ASSERT_EQUAL(SBuf(" "), s);
194
195 // match until the end of the sample
196 CPPUNIT_ASSERT(t.suffix(s, all));
197 CPPUNIT_ASSERT_EQUAL(SBuf(), t.remaining());
198
199 // an empty buffer does not end with a token
200 s = canary;
201 CPPUNIT_ASSERT(!t.suffix(s, all));
202 CPPUNIT_ASSERT_EQUAL(canary, s); // no parameter changes
203
204 // we cannot skip an empty suffix, even in an empty buffer
205 CPPUNIT_ASSERT(!t.skipSuffix(SBuf()));
206}
207
208void
210{
211 // successful parse in base 10
212 {
213 int64_t rv;
214 Parser::Tokenizer t(SBuf("1234"));
215 const int64_t benchmark = 1234;
216 CPPUNIT_ASSERT(t.int64(rv, 10));
217 CPPUNIT_ASSERT_EQUAL(benchmark,rv);
218 CPPUNIT_ASSERT(t.buf().isEmpty());
219 }
220
221 // successful parse, autodetect base
222 {
223 int64_t rv;
224 Parser::Tokenizer t(SBuf("1234"));
225 const int64_t benchmark = 1234;
226 CPPUNIT_ASSERT(t.int64(rv));
227 CPPUNIT_ASSERT_EQUAL(benchmark,rv);
228 CPPUNIT_ASSERT(t.buf().isEmpty());
229 }
230
231 // successful parse, autodetect base
232 {
233 int64_t rv;
234 Parser::Tokenizer t(SBuf("01234"));
235 const int64_t benchmark = 01234;
236 CPPUNIT_ASSERT(t.int64(rv));
237 CPPUNIT_ASSERT_EQUAL(benchmark,rv);
238 CPPUNIT_ASSERT(t.buf().isEmpty());
239 }
240
241 // successful parse, autodetect base
242 {
243 int64_t rv;
244 Parser::Tokenizer t(SBuf("0x12f4"));
245 const int64_t benchmark = 0x12f4;
246 CPPUNIT_ASSERT(t.int64(rv));
247 CPPUNIT_ASSERT_EQUAL(benchmark,rv);
248 CPPUNIT_ASSERT(t.buf().isEmpty());
249 }
250
251 // When interpreting octal numbers, standard strtol() and Tokenizer::int64()
252 // treat leading zero as a part of sequence of digits rather than a
253 // character used _exclusively_ as base indicator. Thus, it is not possible
254 // to create an invalid octal number with an explicit octal base -- the
255 // first invalid character after the base will be successfully ignored. This
256 // treatment also makes it difficult to define "shortest valid octal input".
257 // Here, we are just enumerating interesting "short input" octal cases in
258 // four dimensions:
259 // 1. int64(base) argument: forced or auto-detected;
260 // 2. base character ("0") in input: absent or present;
261 // 3. post-base digits in input: absent, valid, or invalid;
262 // 4. input length limits via int64(length) argument: unlimited or limited.
263
264 // forced base; input: no base, no post-base digits, unlimited
265 {
266 int64_t rv;
267 Parser::Tokenizer t(SBuf(""));
268 CPPUNIT_ASSERT(!t.int64(rv, 8));
269 CPPUNIT_ASSERT_EQUAL(SBuf(""), t.buf());
270 }
271
272 // forced base; input: no base, no post-base digits, limited
273 {
274 int64_t rv;
275 Parser::Tokenizer t(SBuf("7"));
276 CPPUNIT_ASSERT(!t.int64(rv, 8, false, 0));
277 CPPUNIT_ASSERT_EQUAL(SBuf("7"), t.buf());
278 }
279
280 // forced base; input: no base, one valid post-base digit, unlimited
281 {
282 int64_t rv;
283 Parser::Tokenizer t(SBuf("4"));
284 const int64_t benchmark = 04;
285 CPPUNIT_ASSERT(t.int64(rv, 8));
286 CPPUNIT_ASSERT_EQUAL(benchmark, rv);
287 CPPUNIT_ASSERT_EQUAL(SBuf(""), t.buf());
288 }
289
290 // forced base; input: no base, one valid post-base digit, limited
291 {
292 int64_t rv;
293 Parser::Tokenizer t(SBuf("46"));
294 const int64_t benchmark = 04;
295 CPPUNIT_ASSERT(t.int64(rv, 8, false, 1));
296 CPPUNIT_ASSERT_EQUAL(benchmark, rv);
297 CPPUNIT_ASSERT_EQUAL(SBuf("6"), t.buf());
298 }
299
300 // forced base; input: no base, one invalid post-base digit, unlimited
301 {
302 int64_t rv;
303 Parser::Tokenizer t(SBuf("8"));
304 CPPUNIT_ASSERT(!t.int64(rv, 8));
305 CPPUNIT_ASSERT_EQUAL(SBuf("8"), t.buf());
306 }
307
308 // forced base; input: no base, one invalid post-base digit, limited
309 {
310 int64_t rv;
311 Parser::Tokenizer t(SBuf("80"));
312 CPPUNIT_ASSERT(!t.int64(rv, 8, false, 1));
313 CPPUNIT_ASSERT_EQUAL(SBuf("80"), t.buf());
314 }
315
316 // repeat the above six octal cases, but now with base character in input
317
318 // forced base; input: base, no post-base digits, unlimited
319 {
320 int64_t rv;
321 Parser::Tokenizer t(SBuf("0"));
322 const int64_t benchmark = 0;
323 CPPUNIT_ASSERT(t.int64(rv, 8));
324 CPPUNIT_ASSERT_EQUAL(benchmark, rv);
325 CPPUNIT_ASSERT_EQUAL(SBuf(""), t.buf());
326 }
327
328 // forced base; input: base, no post-base digits, limited
329 {
330 int64_t rv;
331 Parser::Tokenizer t(SBuf("07"));
332 const int64_t benchmark = 0;
333 CPPUNIT_ASSERT(t.int64(rv, 8, false, 1));
334 CPPUNIT_ASSERT_EQUAL(benchmark, rv);
335 CPPUNIT_ASSERT_EQUAL(SBuf("7"), t.buf());
336 }
337
338 // forced base; input: base, one valid post-base digit, unlimited
339 {
340 int64_t rv;
341 Parser::Tokenizer t(SBuf("04"));
342 const int64_t benchmark = 04;
343 CPPUNIT_ASSERT(t.int64(rv, 8));
344 CPPUNIT_ASSERT_EQUAL(benchmark, rv);
345 CPPUNIT_ASSERT_EQUAL(SBuf(""), t.buf());
346 }
347
348 // forced base; input: base, one valid post-base digit, limited
349 {
350 int64_t rv;
351 Parser::Tokenizer t(SBuf("046"));
352 const int64_t benchmark = 04;
353 CPPUNIT_ASSERT(t.int64(rv, 8, false, 2));
354 CPPUNIT_ASSERT_EQUAL(benchmark, rv);
355 CPPUNIT_ASSERT_EQUAL(SBuf("6"), t.buf());
356 }
357
358 // forced base; input: base, one invalid post-base digit, unlimited
359 {
360 int64_t rv;
361 Parser::Tokenizer t(SBuf("08"));
362 const int64_t benchmark = 00;
363 CPPUNIT_ASSERT(t.int64(rv, 8));
364 CPPUNIT_ASSERT_EQUAL(benchmark, rv);
365 CPPUNIT_ASSERT_EQUAL(SBuf("8"), t.buf());
366 }
367
368 // forced base; input: base, one invalid post-base digit, limited
369 {
370 int64_t rv;
371 Parser::Tokenizer t(SBuf("08"));
372 const int64_t benchmark = 00;
373 CPPUNIT_ASSERT(t.int64(rv, 8, false, 2));
374 CPPUNIT_ASSERT_EQUAL(benchmark, rv);
375 CPPUNIT_ASSERT_EQUAL(SBuf("8"), t.buf());
376 }
377
378 // And now repeat six "with base character in input" octal cases but with
379 // auto-detected base. When octal cases below say "auto-detected base", they
380 // describe int64() base=0 parameter value. Current int64() implementation
381 // does auto-detect base as octal in all of these cases, but that might
382 // change, and some of these cases (e.g., "0") can also be viewed as a
383 // non-octal input case as well. These cases do not attempt to test base
384 // detection. They focus on other potential problems.
385
386 // auto-detected base; input: base, no post-base digits, unlimited
387 {
388 int64_t rv;
389 Parser::Tokenizer t(SBuf("0"));
390 const int64_t benchmark = 00;
391 CPPUNIT_ASSERT(t.int64(rv, 0));
392 CPPUNIT_ASSERT_EQUAL(benchmark, rv);
393 CPPUNIT_ASSERT_EQUAL(SBuf(""), t.buf());
394 }
395
396 // auto-detected base; input: base, no post-base digits, limited
397 {
398 int64_t rv;
399 Parser::Tokenizer t(SBuf("07"));
400 const int64_t benchmark = 0;
401 CPPUNIT_ASSERT(t.int64(rv, 0, false, 1));
402 CPPUNIT_ASSERT_EQUAL(benchmark, rv);
403 CPPUNIT_ASSERT_EQUAL(SBuf("7"), t.buf());
404 }
405
406 // auto-detected base; input: base, one valid post-base digit, unlimited
407 {
408 int64_t rv;
409 Parser::Tokenizer t(SBuf("04"));
410 const int64_t benchmark = 04;
411 CPPUNIT_ASSERT(t.int64(rv, 0));
412 CPPUNIT_ASSERT_EQUAL(benchmark, rv);
413 CPPUNIT_ASSERT_EQUAL(SBuf(""), t.buf());
414 }
415
416 // auto-detected base; input: base, one valid post-base digit, limited
417 {
418 int64_t rv;
419 Parser::Tokenizer t(SBuf("046"));
420 const int64_t benchmark = 04;
421 CPPUNIT_ASSERT(t.int64(rv, 0, false, 2));
422 CPPUNIT_ASSERT_EQUAL(benchmark, rv);
423 CPPUNIT_ASSERT_EQUAL(SBuf("6"), t.buf());
424 }
425
426 // auto-detected base; input: base, one invalid post-base digit, unlimited
427 {
428 int64_t rv;
429 Parser::Tokenizer t(SBuf("08"));
430 const int64_t benchmark = 00;
431 CPPUNIT_ASSERT(t.int64(rv, 0));
432 CPPUNIT_ASSERT_EQUAL(benchmark, rv);
433 CPPUNIT_ASSERT_EQUAL(SBuf("8"), t.buf());
434 }
435
436 // auto-detected base; input: base, one invalid post-base digit, limited
437 {
438 int64_t rv;
439 Parser::Tokenizer t(SBuf("08"));
440 const int64_t benchmark = 00;
441 CPPUNIT_ASSERT(t.int64(rv, 0, false, 2));
442 CPPUNIT_ASSERT_EQUAL(benchmark, rv);
443 CPPUNIT_ASSERT_EQUAL(SBuf("8"), t.buf());
444 }
445
446 // this ends four-dimensional enumeration of octal cases described earlier
447
448 // check octal base auto-detection
449 {
450 int64_t rv;
451 Parser::Tokenizer t(SBuf("0128"));
452 const int64_t benchmark = 012;
453 CPPUNIT_ASSERT(t.int64(rv, 0));
454 CPPUNIT_ASSERT_EQUAL(benchmark, rv);
455 CPPUNIT_ASSERT_EQUAL(SBuf("8"), t.buf());
456 }
457
458 // check that octal base auto-detection is not confused by repeated zeros
459 {
460 int64_t rv;
461 Parser::Tokenizer t(SBuf("00000000071"));
462 const int64_t benchmark = 00000000071;
463 CPPUNIT_ASSERT(t.int64(rv));
464 CPPUNIT_ASSERT_EQUAL(benchmark,rv);
465 CPPUNIT_ASSERT_EQUAL(SBuf(""), t.buf());
466 }
467
468 // check that forced octal base is not confused by hex prefix
469 {
470 int64_t rv;
471 Parser::Tokenizer t(SBuf("0x5"));
472 const int64_t benchmark = 0;
473 CPPUNIT_ASSERT(t.int64(rv, 8));
474 CPPUNIT_ASSERT_EQUAL(benchmark, rv);
475 CPPUNIT_ASSERT_EQUAL(SBuf("x5"), t.buf());
476 }
477
478 // autodetect decimal base in shortest valid input
479 {
480 int64_t rv;
481 Parser::Tokenizer t(SBuf("1"));
482 const int64_t benchmark = 1;
483 CPPUNIT_ASSERT(t.int64(rv));
484 CPPUNIT_ASSERT_EQUAL(benchmark,rv);
485 CPPUNIT_ASSERT(t.buf().isEmpty());
486 }
487
488 // autodetect hex base in shortest valid input
489 {
490 int64_t rv;
491 Parser::Tokenizer t(SBuf("0X1"));
492 const int64_t benchmark = 0X1;
493 CPPUNIT_ASSERT(t.int64(rv));
494 CPPUNIT_ASSERT_EQUAL(benchmark,rv);
495 CPPUNIT_ASSERT(t.buf().isEmpty());
496 }
497
498 // invalid (when autodetecting base) input matching hex base
499 {
500 int64_t rv;
501 Parser::Tokenizer t(SBuf("0x"));
502 CPPUNIT_ASSERT(!t.int64(rv));
503 CPPUNIT_ASSERT_EQUAL(SBuf("0x"), t.buf());
504 }
505
506 // invalid (when forcing hex base) input matching hex base
507 {
508 int64_t rv;
509 Parser::Tokenizer t(SBuf("0x"));
510 CPPUNIT_ASSERT(!t.int64(rv, 16));
511 CPPUNIT_ASSERT_EQUAL(SBuf("0x"), t.buf());
512 }
513
514 // invalid (when autodetecting base and limiting) input matching hex base
515 {
516 int64_t rv;
517 Parser::Tokenizer t(SBuf("0x2"));
518 CPPUNIT_ASSERT(!t.int64(rv, 0, true, 2));
519 CPPUNIT_ASSERT_EQUAL(SBuf("0x2"), t.buf());
520 }
521
522 // invalid (when forcing hex base and limiting) input matching hex base
523 {
524 int64_t rv;
525 Parser::Tokenizer t(SBuf("0x3"));
526 CPPUNIT_ASSERT(!t.int64(rv, 16, false, 2));
527 CPPUNIT_ASSERT_EQUAL(SBuf("0x3"), t.buf());
528 }
529
530 // API mismatch: don't eat leading space
531 {
532 int64_t rv;
533 Parser::Tokenizer t(SBuf(" 1234"));
534 CPPUNIT_ASSERT(!t.int64(rv));
535 CPPUNIT_ASSERT_EQUAL(SBuf(" 1234"), t.buf());
536 }
537
538 // API mismatch: don't eat multiple leading spaces
539 {
540 int64_t rv;
541 Parser::Tokenizer t(SBuf(" 1234"));
542 CPPUNIT_ASSERT(!t.int64(rv));
543 CPPUNIT_ASSERT_EQUAL(SBuf(" 1234"), t.buf());
544 }
545
546 // zero corner case: repeated zeros
547 {
548 int64_t rv;
549 Parser::Tokenizer t(SBuf("00"));
550 const int64_t benchmark = 00;
551 CPPUNIT_ASSERT(t.int64(rv));
552 CPPUNIT_ASSERT_EQUAL(benchmark,rv);
553 CPPUNIT_ASSERT_EQUAL(SBuf(""), t.buf());
554 }
555
556 // zero corner case: "positive" zero
557 {
558 int64_t rv;
559 Parser::Tokenizer t(SBuf("+0"));
560 const int64_t benchmark = +0;
561 CPPUNIT_ASSERT(t.int64(rv));
562 CPPUNIT_ASSERT_EQUAL(benchmark,rv);
563 CPPUNIT_ASSERT_EQUAL(SBuf(""), t.buf());
564 }
565
566 // zero corner case: "negative" zero
567 {
568 int64_t rv;
569 Parser::Tokenizer t(SBuf("-0"));
570 const int64_t benchmark = -0;
571 CPPUNIT_ASSERT(t.int64(rv));
572 CPPUNIT_ASSERT_EQUAL(benchmark,rv);
573 CPPUNIT_ASSERT_EQUAL(SBuf(""), t.buf());
574 }
575
576 // trailing spaces
577 {
578 int64_t rv;
579 Parser::Tokenizer t(SBuf("1234 foo"));
580 const int64_t benchmark = 1234;
581 CPPUNIT_ASSERT(t.int64(rv));
582 CPPUNIT_ASSERT_EQUAL(benchmark,rv);
583 CPPUNIT_ASSERT_EQUAL(SBuf(" foo"), t.buf());
584 }
585
586 // trailing nonspaces
587 {
588 int64_t rv;
589 Parser::Tokenizer t(SBuf("1234foo"));
590 const int64_t benchmark = 1234;
591 CPPUNIT_ASSERT(t.int64(rv));
592 CPPUNIT_ASSERT_EQUAL(benchmark,rv);
593 CPPUNIT_ASSERT_EQUAL(SBuf("foo"), t.buf());
594 }
595
596 // trailing nonspaces
597 {
598 int64_t rv;
599 Parser::Tokenizer t(SBuf("0x1234foo"));
600 const int64_t benchmark = 0x1234f;
601 CPPUNIT_ASSERT(t.int64(rv));
602 CPPUNIT_ASSERT_EQUAL(benchmark,rv);
603 CPPUNIT_ASSERT_EQUAL(SBuf("oo"), t.buf());
604 }
605
606 // overflow
607 {
608 int64_t rv;
609 Parser::Tokenizer t(SBuf("1029397752385698678762234"));
610 CPPUNIT_ASSERT(!t.int64(rv));
611 CPPUNIT_ASSERT_EQUAL(SBuf("1029397752385698678762234"), t.buf());
612 }
613
614 // buffered sub-string parsing
615 {
616 int64_t rv;
617 SBuf base("1029397752385698678762234");
618 const int64_t benchmark = 22;
619 Parser::Tokenizer t(base.substr(base.length()-4,2));
620 CPPUNIT_ASSERT_EQUAL(SBuf("22"),t.buf());
621 CPPUNIT_ASSERT(t.int64(rv));
622 CPPUNIT_ASSERT_EQUAL(benchmark,rv);
623 CPPUNIT_ASSERT(t.buf().isEmpty());
624 }
625
626 // base-16, prefix
627 {
628 int64_t rv;
629 SBuf base("deadbeefrow");
630 const int64_t benchmark=0xdeadbeef;
631 Parser::Tokenizer t(base);
632 CPPUNIT_ASSERT(t.int64(rv,16));
633 CPPUNIT_ASSERT_EQUAL(benchmark,rv);
634 CPPUNIT_ASSERT_EQUAL(SBuf("row"),t.buf());
635
636 }
637}
638
639int
640main(int argc, char *argv[])
641{
642 return TestProgram().run(argc, argv);
643}
644
optimized set of C chars, with quick membership test and merge support
CharacterSet & add(const unsigned char c)
add a given character to the character set
bool prefix(SBuf &returnedToken, const CharacterSet &tokenChars, SBuf::size_type limit=SBuf::npos)
Definition Tokenizer.cc:79
bool suffix(SBuf &returnedToken, const CharacterSet &tokenChars, SBuf::size_type limit=SBuf::npos)
Definition Tokenizer.cc:117
bool skipOne(const CharacterSet &discardables)
Definition Tokenizer.cc:161
bool token(SBuf &returnedToken, const CharacterSet &delimiters)
Definition Tokenizer.cc:61
bool skipSuffix(const SBuf &tokenToSkip)
Definition Tokenizer.cc:172
SBuf::size_type skipAll(const CharacterSet &discardables)
Definition Tokenizer.cc:137
bool int64(int64_t &result, int base=0, bool allowSign=true, SBuf::size_type limit=SBuf::npos)
Definition Tokenizer.cc:238
SBuf buf() const
yet unparsed data
Definition Tokenizer.h:35
const SBuf & remaining() const
the remaining unprocessed section of buffer
Definition Tokenizer.h:44
bool skipOneTrailing(const CharacterSet &discardables)
Definition Tokenizer.cc:211
bool skip(const SBuf &tokenToSkip)
Definition Tokenizer.cc:189
Definition SBuf.h:94
static const size_type npos
Definition SBuf.h:100
size_type length() const
Returns the number of bytes stored in SBuf.
Definition SBuf.h:419
bool isEmpty() const
Definition SBuf.h:435
size_type findLastOf(const CharacterSet &set, size_type endPos=npos) const
Definition SBuf.cc:769
bool startsWith(const SBuf &S, const SBufCaseSensitive isCaseSensitive=caseSensitive) const
Definition SBuf.cc:442
SBuf substr(size_type pos, size_type n=npos) const
Definition SBuf.cc:576
MemBlob::size_type size_type
Definition SBuf.h:96
implements test program's main() function while enabling customization
int run(int argc, char *argv[])
CPPUNIT_TEST(testTokenizerPrefix)
CPPUNIT_TEST(testTokenizerSuffix)
void testTokenizerPrefix()
void testTokenizerSkip()
void testTokenizerToken()
CPPUNIT_TEST(testTokenizerToken)
CPPUNIT_TEST(testTokenizerSkip)
void testTokenizerInt64()
void testTokenizerSuffix()
CPPUNIT_TEST(testTokenizerInt64)
CPPUNIT_TEST_SUITE(TestTokenizer)
int main()
const CharacterSet tab("tab","\t")
SBuf text("GET http://resource.com/path HTTP/1.1\r\n" "Host: resource.com\r\n" "Cookie: laijkpk3422r j1noin \r\n" "\r\n")
CPPUNIT_TEST_SUITE_REGISTRATION(TestTokenizer)
const CharacterSet alpha("alpha","abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
const CharacterSet crlf("crlf","\r\n")
const CharacterSet whitespace("whitespace"," \r\n")
const CharacterSet numbers("numbers","0123456789")