testTokenizer_8cc_source.html

/*

 * Copyright (C) 1996-2026 The Squid Software Foundation and contributors

 *

 * Squid software is distributed under GPLv2+ license and includes

 * contributions from numerous individuals and organizations.

 * Please see the COPYING and CONTRIBUTORS files for details.

 */


#include "squid.h"

#include "base/CharacterSet.h"

#include "compat/cppunit.h"

#include "parser/Tokenizer.h"

#include "unitTestMain.h"


class TestTokenizer : public CPPUNIT_NS::TestFixture

{

    CPPUNIT_TEST_SUITE(TestTokenizer);

    CPPUNIT_TEST(testTokenizerPrefix);

    CPPUNIT_TEST(testTokenizerSuffix);

    CPPUNIT_TEST(testTokenizerSkip);

    CPPUNIT_TEST(testTokenizerToken);

    CPPUNIT_TEST(testTokenizerInt64);

    CPPUNIT_TEST_SUITE_END();


protected:

    void testTokenizerPrefix();

    void testTokenizerSuffix();

    void testTokenizerSkip();

    void testTokenizerToken();

    void testTokenizerInt64();

};


CPPUNIT_TEST_SUITE_REGISTRATION(TestTokenizer);


SBuf text("GET http://resource.com/path HTTP/1.1\r\n"

          "Host: resource.com\r\n"

          "Cookie: laijkpk3422r j1noin \r\n"

          "\r\n");

const CharacterSet alpha("alpha","abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");

const CharacterSet whitespace("whitespace"," \r\n");

const CharacterSet crlf("crlf","\r\n");

const CharacterSet tab("tab","\t");

const CharacterSet numbers("numbers","0123456789");


void


TestTokenizer::testTokenizerPrefix()

{

    const SBuf canary("This text should not be changed.");


    Parser::Tokenizer t(text);

    SBuf s;


    CharacterSet all(whitespace);

    all += alpha;

    all += crlf;

    all += numbers;

    all.add(':').add('.').add('/');


    // an empty prefix should return false (the full output buffer case)

    s = canary;

    const SBuf before = t.remaining();

    CPPUNIT_ASSERT(!t.prefix(s, all, 0));

    // ... and a false return value means no parameter changes

    CPPUNIT_ASSERT_EQUAL(canary, s);

    // ... and a false return value means no input buffer changes

    CPPUNIT_ASSERT_EQUAL(before, t.remaining());


    // successful prefix tokenization

    CPPUNIT_ASSERT(t.prefix(s,alpha));

    CPPUNIT_ASSERT_EQUAL(SBuf("GET"),s);

    CPPUNIT_ASSERT(t.prefix(s,whitespace));

    CPPUNIT_ASSERT_EQUAL(SBuf(" "),s);


    //no match (first char is not in the prefix set)

    CPPUNIT_ASSERT(!t.prefix(s,whitespace));

    CPPUNIT_ASSERT_EQUAL(SBuf(" "),s);


    // one more match to set S to something meaningful

    CPPUNIT_ASSERT(t.prefix(s,alpha));

    CPPUNIT_ASSERT_EQUAL(SBuf("http"),s);


    //no match (no characters from the character set in the prefix)

    CPPUNIT_ASSERT(!t.prefix(s,tab));

    CPPUNIT_ASSERT_EQUAL(SBuf("http"),s); //output SBuf left untouched


    // match until the end of the sample

    CPPUNIT_ASSERT(t.prefix(s,all));

    CPPUNIT_ASSERT_EQUAL(SBuf(),t.remaining());


    // empty prefix should return false (the empty input buffer case)

    s = canary;

    CPPUNIT_ASSERT(!t.prefix(s, all));

    // ... and a false return value means no parameter changes

    CPPUNIT_ASSERT_EQUAL(canary, s);

}


void


TestTokenizer::testTokenizerSkip()

{

    Parser::Tokenizer t(text);

    SBuf s;


    // first scenario: patterns match

    // prep for test

    CPPUNIT_ASSERT(t.prefix(s,alpha));

    CPPUNIT_ASSERT_EQUAL(SBuf("GET"),s);


    // test skipping one character from a character set

    CPPUNIT_ASSERT(t.skipOne(whitespace));

    // check that skip was right

    CPPUNIT_ASSERT(t.prefix(s,alpha));

    CPPUNIT_ASSERT_EQUAL(SBuf("http"),s);


    //check skip prefix

    CPPUNIT_ASSERT(t.skip(SBuf("://")));

    // verify

    CPPUNIT_ASSERT(t.prefix(s,alpha));

    CPPUNIT_ASSERT_EQUAL(SBuf("resource"),s);


    // no skip

    CPPUNIT_ASSERT(!t.skipOne(alpha));

    CPPUNIT_ASSERT(!t.skip(SBuf("://")));

    CPPUNIT_ASSERT(!t.skip('a'));


    // test skipping all characters from a character set while looking at .com

    CPPUNIT_ASSERT(t.skip('.'));

    CPPUNIT_ASSERT_EQUAL(static_cast<SBuf::size_type>(3), t.skipAll(alpha));

    CPPUNIT_ASSERT(t.remaining().startsWith(SBuf("/path")));

}


void


TestTokenizer::testTokenizerToken()

{

    Parser::Tokenizer t(text);

    SBuf s;


    // first scenario: patterns match

    CPPUNIT_ASSERT(t.token(s,whitespace));

    CPPUNIT_ASSERT_EQUAL(SBuf("GET"),s);

    CPPUNIT_ASSERT(t.token(s,whitespace));

    CPPUNIT_ASSERT_EQUAL(SBuf("http://resource.com/path"),s);

    CPPUNIT_ASSERT(t.token(s,whitespace));

    CPPUNIT_ASSERT_EQUAL(SBuf("HTTP/1.1"),s);

    CPPUNIT_ASSERT(t.token(s,whitespace));

    CPPUNIT_ASSERT_EQUAL(SBuf("Host:"),s);


}


void


TestTokenizer::testTokenizerSuffix()

{

    const SBuf canary("This text should not be changed.");


    Parser::Tokenizer t(text);

    SBuf s;


    CharacterSet all(whitespace);

    all += alpha;

    all += crlf;

    all += numbers;

    all.add(':').add('.').add('/');


    // an empty suffix should return false (the full output buffer case)

    s = canary;

    const SBuf before = t.remaining();

    CPPUNIT_ASSERT(!t.suffix(s, all, 0));

    // ... and a false return value means no parameter changes

    CPPUNIT_ASSERT_EQUAL(canary, s);

    // ... and a false return value means no input buffer changes

    CPPUNIT_ASSERT_EQUAL(before, t.remaining());


    // consume suffix until the last CRLF, including that last CRLF

    SBuf::size_type remaining = t.remaining().length();

    while (t.remaining().findLastOf(crlf) != SBuf::npos) {

        CPPUNIT_ASSERT(t.remaining().length() > 0);

        CPPUNIT_ASSERT(t.skipOneTrailing(all));

        // ensure steady progress

        CPPUNIT_ASSERT_EQUAL(remaining, t.remaining().length() + 1);

        --remaining;

    }


    // no match (last char is not in the suffix set)

    CPPUNIT_ASSERT(!t.suffix(s, crlf));

    CPPUNIT_ASSERT(!t.suffix(s, whitespace));


    // successful suffix tokenization

    CPPUNIT_ASSERT(t.suffix(s, numbers));

    CPPUNIT_ASSERT_EQUAL(SBuf("1"), s);

    CPPUNIT_ASSERT(t.skipSuffix(SBuf("1.")));

    CPPUNIT_ASSERT(t.skipSuffix(SBuf("/")));

    CPPUNIT_ASSERT(t.suffix(s, alpha));

    CPPUNIT_ASSERT_EQUAL(SBuf("HTTP"), s);

    CPPUNIT_ASSERT(t.suffix(s, whitespace));

    CPPUNIT_ASSERT_EQUAL(SBuf(" "), s);


    // match until the end of the sample

    CPPUNIT_ASSERT(t.suffix(s, all));

    CPPUNIT_ASSERT_EQUAL(SBuf(), t.remaining());


    // an empty buffer does not end with a token

    s = canary;

    CPPUNIT_ASSERT(!t.suffix(s, all));

    CPPUNIT_ASSERT_EQUAL(canary, s); // no parameter changes


    // we cannot skip an empty suffix, even in an empty buffer

    CPPUNIT_ASSERT(!t.skipSuffix(SBuf()));

}


void


TestTokenizer::testTokenizerInt64()

{

    // successful parse in base 10

    {

        int64_t rv;

        Parser::Tokenizer t(SBuf("1234"));

        const int64_t benchmark = 1234;

        CPPUNIT_ASSERT(t.int64(rv, 10));

        CPPUNIT_ASSERT_EQUAL(benchmark,rv);

        CPPUNIT_ASSERT(t.buf().isEmpty());

    }


    // successful parse, autodetect base

    {

        int64_t rv;

        Parser::Tokenizer t(SBuf("1234"));

        const int64_t benchmark = 1234;

        CPPUNIT_ASSERT(t.int64(rv));

        CPPUNIT_ASSERT_EQUAL(benchmark,rv);

        CPPUNIT_ASSERT(t.buf().isEmpty());

    }


    // successful parse, autodetect base

    {

        int64_t rv;

        Parser::Tokenizer t(SBuf("01234"));

        const int64_t benchmark = 01234;

        CPPUNIT_ASSERT(t.int64(rv));

        CPPUNIT_ASSERT_EQUAL(benchmark,rv);

        CPPUNIT_ASSERT(t.buf().isEmpty());

    }


    // successful parse, autodetect base

    {

        int64_t rv;

        Parser::Tokenizer t(SBuf("0x12f4"));

        const int64_t benchmark = 0x12f4;

        CPPUNIT_ASSERT(t.int64(rv));

        CPPUNIT_ASSERT_EQUAL(benchmark,rv);

        CPPUNIT_ASSERT(t.buf().isEmpty());

    }


    // When interpreting octal numbers, standard strtol() and Tokenizer::int64()

    // treat leading zero as a part of sequence of digits rather than a

    // character used _exclusively_ as base indicator. Thus, it is not possible

    // to create an invalid octal number with an explicit octal base -- the

    // first invalid character after the base will be successfully ignored. This

    // treatment also makes it difficult to define "shortest valid octal input".

    // Here, we are just enumerating interesting "short input" octal cases in

    // four dimensions:

    // 1. int64(base) argument: forced or auto-detected;

    // 2. base character ("0") in input: absent or present;

    // 3. post-base digits in input: absent, valid, or invalid;

    // 4. input length limits via int64(length) argument: unlimited or limited.


    // forced base; input: no base, no post-base digits, unlimited

    {

        int64_t rv;

        Parser::Tokenizer t(SBuf(""));

        CPPUNIT_ASSERT(!t.int64(rv, 8));

        CPPUNIT_ASSERT_EQUAL(SBuf(""), t.buf());

    }


    // forced base; input: no base, no post-base digits, limited

    {

        int64_t rv;

        Parser::Tokenizer t(SBuf("7"));

        CPPUNIT_ASSERT(!t.int64(rv, 8, false, 0));

        CPPUNIT_ASSERT_EQUAL(SBuf("7"), t.buf());

    }


    // forced base; input: no base, one valid post-base digit, unlimited

    {

        int64_t rv;

        Parser::Tokenizer t(SBuf("4"));

        const int64_t benchmark = 04;

        CPPUNIT_ASSERT(t.int64(rv, 8));

        CPPUNIT_ASSERT_EQUAL(benchmark, rv);

        CPPUNIT_ASSERT_EQUAL(SBuf(""), t.buf());

    }


    // forced base; input: no base, one valid post-base digit, limited

    {

        int64_t rv;

        Parser::Tokenizer t(SBuf("46"));

        const int64_t benchmark = 04;

        CPPUNIT_ASSERT(t.int64(rv, 8, false, 1));

        CPPUNIT_ASSERT_EQUAL(benchmark, rv);

        CPPUNIT_ASSERT_EQUAL(SBuf("6"), t.buf());

    }


    // forced base; input: no base, one invalid post-base digit, unlimited

    {

        int64_t rv;

        Parser::Tokenizer t(SBuf("8"));

        CPPUNIT_ASSERT(!t.int64(rv, 8));

        CPPUNIT_ASSERT_EQUAL(SBuf("8"), t.buf());

    }


    // forced base; input: no base, one invalid post-base digit, limited

    {

        int64_t rv;

        Parser::Tokenizer t(SBuf("80"));

        CPPUNIT_ASSERT(!t.int64(rv, 8, false, 1));

        CPPUNIT_ASSERT_EQUAL(SBuf("80"), t.buf());

    }


    // repeat the above six octal cases, but now with base character in input


    // forced base; input: base, no post-base digits, unlimited

    {

        int64_t rv;

        Parser::Tokenizer t(SBuf("0"));

        const int64_t benchmark = 0;

        CPPUNIT_ASSERT(t.int64(rv, 8));

        CPPUNIT_ASSERT_EQUAL(benchmark, rv);

        CPPUNIT_ASSERT_EQUAL(SBuf(""), t.buf());

    }


    // forced base; input: base, no post-base digits, limited

    {

        int64_t rv;

        Parser::Tokenizer t(SBuf("07"));

        const int64_t benchmark = 0;

        CPPUNIT_ASSERT(t.int64(rv, 8, false, 1));

        CPPUNIT_ASSERT_EQUAL(benchmark, rv);

        CPPUNIT_ASSERT_EQUAL(SBuf("7"), t.buf());

    }


    // forced base; input: base, one valid post-base digit, unlimited

    {

        int64_t rv;

        Parser::Tokenizer t(SBuf("04"));

        const int64_t benchmark = 04;

        CPPUNIT_ASSERT(t.int64(rv, 8));

        CPPUNIT_ASSERT_EQUAL(benchmark, rv);

        CPPUNIT_ASSERT_EQUAL(SBuf(""), t.buf());

    }


    // forced base; input: base, one valid post-base digit, limited

    {

        int64_t rv;

        Parser::Tokenizer t(SBuf("046"));

        const int64_t benchmark = 04;

        CPPUNIT_ASSERT(t.int64(rv, 8, false, 2));

        CPPUNIT_ASSERT_EQUAL(benchmark, rv);

        CPPUNIT_ASSERT_EQUAL(SBuf("6"), t.buf());

    }


    // forced base; input: base, one invalid post-base digit, unlimited

    {

        int64_t rv;

        Parser::Tokenizer t(SBuf("08"));

        const int64_t benchmark = 00;

        CPPUNIT_ASSERT(t.int64(rv, 8));

        CPPUNIT_ASSERT_EQUAL(benchmark, rv);

        CPPUNIT_ASSERT_EQUAL(SBuf("8"), t.buf());

    }


    // forced base; input: base, one invalid post-base digit, limited

    {

        int64_t rv;

        Parser::Tokenizer t(SBuf("08"));

        const int64_t benchmark = 00;

        CPPUNIT_ASSERT(t.int64(rv, 8, false, 2));

        CPPUNIT_ASSERT_EQUAL(benchmark, rv);

        CPPUNIT_ASSERT_EQUAL(SBuf("8"), t.buf());

    }


    // And now repeat six "with base character in input" octal cases but with

    // auto-detected base. When octal cases below say "auto-detected base", they

    // describe int64() base=0 parameter value. Current int64() implementation

    // does auto-detect base as octal in all of these cases, but that might

    // change, and some of these cases (e.g., "0") can also be viewed as a

    // non-octal input case as well. These cases do not attempt to test base

    // detection. They focus on other potential problems.


    // auto-detected base; input: base, no post-base digits, unlimited

    {

        int64_t rv;

        Parser::Tokenizer t(SBuf("0"));

        const int64_t benchmark = 00;

        CPPUNIT_ASSERT(t.int64(rv, 0));

        CPPUNIT_ASSERT_EQUAL(benchmark, rv);

        CPPUNIT_ASSERT_EQUAL(SBuf(""), t.buf());

    }


    // auto-detected base; input: base, no post-base digits, limited

    {

        int64_t rv;

        Parser::Tokenizer t(SBuf("07"));

        const int64_t benchmark = 0;

        CPPUNIT_ASSERT(t.int64(rv, 0, false, 1));

        CPPUNIT_ASSERT_EQUAL(benchmark, rv);

        CPPUNIT_ASSERT_EQUAL(SBuf("7"), t.buf());

    }


    // auto-detected base; input: base, one valid post-base digit, unlimited

    {

        int64_t rv;

        Parser::Tokenizer t(SBuf("04"));

        const int64_t benchmark = 04;

        CPPUNIT_ASSERT(t.int64(rv, 0));

        CPPUNIT_ASSERT_EQUAL(benchmark, rv);

        CPPUNIT_ASSERT_EQUAL(SBuf(""), t.buf());

    }


    // auto-detected base; input: base, one valid post-base digit, limited

    {

        int64_t rv;

        Parser::Tokenizer t(SBuf("046"));

        const int64_t benchmark = 04;

        CPPUNIT_ASSERT(t.int64(rv, 0, false, 2));

        CPPUNIT_ASSERT_EQUAL(benchmark, rv);

        CPPUNIT_ASSERT_EQUAL(SBuf("6"), t.buf());

    }


    // auto-detected base; input: base, one invalid post-base digit, unlimited

    {

        int64_t rv;

        Parser::Tokenizer t(SBuf("08"));

        const int64_t benchmark = 00;

        CPPUNIT_ASSERT(t.int64(rv, 0));

        CPPUNIT_ASSERT_EQUAL(benchmark, rv);

        CPPUNIT_ASSERT_EQUAL(SBuf("8"), t.buf());

    }


    // auto-detected base; input: base, one invalid post-base digit, limited

    {

        int64_t rv;

        Parser::Tokenizer t(SBuf("08"));

        const int64_t benchmark = 00;

        CPPUNIT_ASSERT(t.int64(rv, 0, false, 2));

        CPPUNIT_ASSERT_EQUAL(benchmark, rv);

        CPPUNIT_ASSERT_EQUAL(SBuf("8"), t.buf());

    }


    // this ends four-dimensional enumeration of octal cases described earlier


    // check octal base auto-detection

    {

        int64_t rv;

        Parser::Tokenizer t(SBuf("0128"));

        const int64_t benchmark = 012;

        CPPUNIT_ASSERT(t.int64(rv, 0));

        CPPUNIT_ASSERT_EQUAL(benchmark, rv);

        CPPUNIT_ASSERT_EQUAL(SBuf("8"), t.buf());

    }


    // check that octal base auto-detection is not confused by repeated zeros

    {

        int64_t rv;

        Parser::Tokenizer t(SBuf("00000000071"));

        const int64_t benchmark = 00000000071;

        CPPUNIT_ASSERT(t.int64(rv));

        CPPUNIT_ASSERT_EQUAL(benchmark,rv);

        CPPUNIT_ASSERT_EQUAL(SBuf(""), t.buf());

    }


    // check that forced octal base is not confused by hex prefix

    {

        int64_t rv;

        Parser::Tokenizer t(SBuf("0x5"));

        const int64_t benchmark = 0;

        CPPUNIT_ASSERT(t.int64(rv, 8));

        CPPUNIT_ASSERT_EQUAL(benchmark, rv);

        CPPUNIT_ASSERT_EQUAL(SBuf("x5"), t.buf());

    }


    // autodetect decimal base in shortest valid input

    {

        int64_t rv;

        Parser::Tokenizer t(SBuf("1"));

        const int64_t benchmark = 1;

        CPPUNIT_ASSERT(t.int64(rv));

        CPPUNIT_ASSERT_EQUAL(benchmark,rv);

        CPPUNIT_ASSERT(t.buf().isEmpty());

    }


    // autodetect hex base in shortest valid input

    {

        int64_t rv;

        Parser::Tokenizer t(SBuf("0X1"));

        const int64_t benchmark = 0X1;

        CPPUNIT_ASSERT(t.int64(rv));

        CPPUNIT_ASSERT_EQUAL(benchmark,rv);

        CPPUNIT_ASSERT(t.buf().isEmpty());

    }


    // invalid (when autodetecting base) input matching hex base

    {

        int64_t rv;

        Parser::Tokenizer t(SBuf("0x"));

        CPPUNIT_ASSERT(!t.int64(rv));

        CPPUNIT_ASSERT_EQUAL(SBuf("0x"), t.buf());

    }


    // invalid (when forcing hex base) input matching hex base

    {

        int64_t rv;

        Parser::Tokenizer t(SBuf("0x"));

        CPPUNIT_ASSERT(!t.int64(rv, 16));

        CPPUNIT_ASSERT_EQUAL(SBuf("0x"), t.buf());

    }


    // invalid (when autodetecting base and limiting) input matching hex base

    {

        int64_t rv;

        Parser::Tokenizer t(SBuf("0x2"));

        CPPUNIT_ASSERT(!t.int64(rv, 0, true, 2));

        CPPUNIT_ASSERT_EQUAL(SBuf("0x2"), t.buf());

    }


    // invalid (when forcing hex base and limiting) input matching hex base

    {

        int64_t rv;

        Parser::Tokenizer t(SBuf("0x3"));

        CPPUNIT_ASSERT(!t.int64(rv, 16, false, 2));

        CPPUNIT_ASSERT_EQUAL(SBuf("0x3"), t.buf());

    }


    // API mismatch: don't eat leading space

    {

        int64_t rv;

        Parser::Tokenizer t(SBuf(" 1234"));

        CPPUNIT_ASSERT(!t.int64(rv));

        CPPUNIT_ASSERT_EQUAL(SBuf(" 1234"), t.buf());

    }


    // API mismatch: don't eat multiple leading spaces

    {

        int64_t rv;

        Parser::Tokenizer t(SBuf("  1234"));

        CPPUNIT_ASSERT(!t.int64(rv));

        CPPUNIT_ASSERT_EQUAL(SBuf("  1234"), t.buf());

    }


    // zero corner case: repeated zeros

    {

        int64_t rv;

        Parser::Tokenizer t(SBuf("00"));

        const int64_t benchmark = 00;

        CPPUNIT_ASSERT(t.int64(rv));

        CPPUNIT_ASSERT_EQUAL(benchmark,rv);

        CPPUNIT_ASSERT_EQUAL(SBuf(""), t.buf());

    }


    // zero corner case: "positive" zero

    {

        int64_t rv;

        Parser::Tokenizer t(SBuf("+0"));

        const int64_t benchmark = +0;

        CPPUNIT_ASSERT(t.int64(rv));

        CPPUNIT_ASSERT_EQUAL(benchmark,rv);

        CPPUNIT_ASSERT_EQUAL(SBuf(""), t.buf());

    }


    // zero corner case: "negative" zero

    {

        int64_t rv;

        Parser::Tokenizer t(SBuf("-0"));

        const int64_t benchmark = -0;

        CPPUNIT_ASSERT(t.int64(rv));

        CPPUNIT_ASSERT_EQUAL(benchmark,rv);

        CPPUNIT_ASSERT_EQUAL(SBuf(""), t.buf());

    }


    // trailing spaces

    {

        int64_t rv;

        Parser::Tokenizer t(SBuf("1234  foo"));

        const int64_t benchmark = 1234;

        CPPUNIT_ASSERT(t.int64(rv));

        CPPUNIT_ASSERT_EQUAL(benchmark,rv);

        CPPUNIT_ASSERT_EQUAL(SBuf("  foo"), t.buf());

    }


    // trailing nonspaces

    {

        int64_t rv;

        Parser::Tokenizer t(SBuf("1234foo"));

        const int64_t benchmark = 1234;

        CPPUNIT_ASSERT(t.int64(rv));

        CPPUNIT_ASSERT_EQUAL(benchmark,rv);

        CPPUNIT_ASSERT_EQUAL(SBuf("foo"), t.buf());

    }


    // trailing nonspaces

    {

        int64_t rv;

        Parser::Tokenizer t(SBuf("0x1234foo"));

        const int64_t benchmark = 0x1234f;

        CPPUNIT_ASSERT(t.int64(rv));

        CPPUNIT_ASSERT_EQUAL(benchmark,rv);

        CPPUNIT_ASSERT_EQUAL(SBuf("oo"), t.buf());

    }


    // overflow

    {

        int64_t rv;

        Parser::Tokenizer t(SBuf("1029397752385698678762234"));

        CPPUNIT_ASSERT(!t.int64(rv));

        CPPUNIT_ASSERT_EQUAL(SBuf("1029397752385698678762234"), t.buf());

    }


    // buffered sub-string parsing

    {

        int64_t rv;

        SBuf base("1029397752385698678762234");

        const int64_t benchmark = 22;

        Parser::Tokenizer t(base.substr(base.length()-4,2));

        CPPUNIT_ASSERT_EQUAL(SBuf("22"),t.buf());

        CPPUNIT_ASSERT(t.int64(rv));

        CPPUNIT_ASSERT_EQUAL(benchmark,rv);

        CPPUNIT_ASSERT(t.buf().isEmpty());

    }


    // base-16, prefix

    {

        int64_t rv;

        SBuf base("deadbeefrow");

        const int64_t benchmark=0xdeadbeef;

        Parser::Tokenizer t(base);

        CPPUNIT_ASSERT(t.int64(rv,16));

        CPPUNIT_ASSERT_EQUAL(benchmark,rv);

        CPPUNIT_ASSERT_EQUAL(SBuf("row"),t.buf());


    }

}


int


main(int argc, char *argv[])

{

    return TestProgram().run(argc, argv);

}


CharacterSet.h

CharacterSet
optimized set of C chars, with quick membership test and merge support
Definition CharacterSet.h:18

CharacterSet::add
CharacterSet & add(const unsigned char c)
add a given character to the character set
Definition CharacterSet.cc:47

Parser::Tokenizer
Definition Tokenizer.h:30

Parser::Tokenizer::prefix
bool prefix(SBuf &returnedToken, const CharacterSet &tokenChars, SBuf::size_type limit=SBuf::npos)
Definition Tokenizer.cc:79

Parser::Tokenizer::suffix
bool suffix(SBuf &returnedToken, const CharacterSet &tokenChars, SBuf::size_type limit=SBuf::npos)
Definition Tokenizer.cc:117

Parser::Tokenizer::skipOne
bool skipOne(const CharacterSet &discardables)
Definition Tokenizer.cc:161

Parser::Tokenizer::token
bool token(SBuf &returnedToken, const CharacterSet &delimiters)
Definition Tokenizer.cc:61

Parser::Tokenizer::skipSuffix
bool skipSuffix(const SBuf &tokenToSkip)
Definition Tokenizer.cc:172

Parser::Tokenizer::skipAll
SBuf::size_type skipAll(const CharacterSet &discardables)
Definition Tokenizer.cc:137

Parser::Tokenizer::int64
bool int64(int64_t &result, int base=0, bool allowSign=true, SBuf::size_type limit=SBuf::npos)
Definition Tokenizer.cc:238

Parser::Tokenizer::buf
SBuf buf() const
yet unparsed data
Definition Tokenizer.h:35

Parser::Tokenizer::remaining
const SBuf & remaining() const
the remaining unprocessed section of buffer
Definition Tokenizer.h:44

Parser::Tokenizer::skipOneTrailing
bool skipOneTrailing(const CharacterSet &discardables)
Definition Tokenizer.cc:211

Parser::Tokenizer::skip
bool skip(const SBuf &tokenToSkip)
Definition Tokenizer.cc:189

SBuf
Definition SBuf.h:94

SBuf::npos
static const size_type npos
Definition SBuf.h:100

SBuf::length
size_type length() const
Returns the number of bytes stored in SBuf.
Definition SBuf.h:419

SBuf::isEmpty
bool isEmpty() const
Definition SBuf.h:435

SBuf::findLastOf
size_type findLastOf(const CharacterSet &set, size_type endPos=npos) const
Definition SBuf.cc:769

SBuf::startsWith
bool startsWith(const SBuf &S, const SBufCaseSensitive isCaseSensitive=caseSensitive) const
Definition SBuf.cc:442

SBuf::substr
SBuf substr(size_type pos, size_type n=npos) const
Definition SBuf.cc:576

SBuf::size_type
MemBlob::size_type size_type
Definition SBuf.h:96

TestProgram
implements test program's main() function while enabling customization
Definition unitTestMain.h:26

TestProgram::run
int run(int argc, char *argv[])
Definition unitTestMain.h:44

TestTokenizer
Definition testTokenizer.cc:16

TestTokenizer::CPPUNIT_TEST
CPPUNIT_TEST(testTokenizerPrefix)

TestTokenizer::CPPUNIT_TEST
CPPUNIT_TEST(testTokenizerSuffix)

TestTokenizer::testTokenizerPrefix
void testTokenizerPrefix()
Definition testTokenizer.cc:45

TestTokenizer::testTokenizerSkip
void testTokenizerSkip()
Definition testTokenizer.cc:97

TestTokenizer::testTokenizerToken
void testTokenizerToken()
Definition testTokenizer.cc:131

TestTokenizer::CPPUNIT_TEST
CPPUNIT_TEST(testTokenizerToken)

TestTokenizer::CPPUNIT_TEST
CPPUNIT_TEST(testTokenizerSkip)

TestTokenizer::CPPUNIT_TEST_SUITE_END
CPPUNIT_TEST_SUITE_END()

TestTokenizer::testTokenizerInt64
void testTokenizerInt64()
Definition testTokenizer.cc:209

TestTokenizer::testTokenizerSuffix
void testTokenizerSuffix()
Definition testTokenizer.cc:149

TestTokenizer::CPPUNIT_TEST
CPPUNIT_TEST(testTokenizerInt64)

TestTokenizer::CPPUNIT_TEST_SUITE
CPPUNIT_TEST_SUITE(TestTokenizer)

cppunit.h

main
int main()
Definition kerberos_ldap_group.cc:492

Tokenizer.h

squid.h

tab
const CharacterSet tab("tab","\t")

text
SBuf text("GET http://resource.com/path HTTP/1.1\r\n" "Host: resource.com\r\n" "Cookie: laijkpk3422r j1noin \r\n" "\r\n")

CPPUNIT_TEST_SUITE_REGISTRATION
CPPUNIT_TEST_SUITE_REGISTRATION(TestTokenizer)

alpha
const CharacterSet alpha("alpha","abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")

crlf
const CharacterSet crlf("crlf","\r\n")

whitespace
const CharacterSet whitespace("whitespace"," \r\n")

numbers
const CharacterSet numbers("numbers","0123456789")

unitTestMain.h