RequestParser_8cc_source.html

/*

 * Copyright (C) 1996-2026 The Squid Software Foundation and contributors

 *

 * Squid software is distributed under GPLv2+ license and includes

 * contributions from numerous individuals and organizations.

 * Please see the COPYING and CONTRIBUTORS files for details.

 */


#include "squid.h"

#include "debug/Stream.h"

#include "http/one/RequestParser.h"

#include "http/ProtocolVersion.h"

#include "parser/Tokenizer.h"

#include "SquidConfig.h"


Http1::Parser::size_type


Http::One::RequestParser::firstLineSize() const

{

    // RFC 7230 section 2.6

    /* method SP request-target SP "HTTP/" DIGIT "." DIGIT CRLF */

    return method_.image().length() + uri_.length() + 12;

}


void


Http::One::RequestParser::skipGarbageLines()

{

    if (Config.onoff.relaxed_header_parser) {

        if (Config.onoff.relaxed_header_parser < 0 && (buf_[0] == '\r' || buf_[0] == '\n'))

            debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<

                   "CRLF bytes received ahead of request-line. " <<

                   "Ignored due to relaxed_header_parser.");

        // Be tolerant of prefix empty lines

        // ie any series of either \n or \r\n with no other characters and no repeated \r

        while (!buf_.isEmpty() && (buf_[0] == '\n' ||

                                   (buf_[0] == '\r' && buf_.length() > 1 && buf_[1] == '\n'))) {

            buf_.consume(1);

        }

    }

}


bool


Http::One::RequestParser::parseMethodField(Tokenizer &tok)

{

    // method field is a sequence of TCHAR.

    // Limit to 32 characters to prevent overly long sequences of non-HTTP

    // being sucked in before mismatch is detected. 32 is itself annoyingly

    // big but there are methods registered by IANA that reach 17 bytes:

    //  http://www.iana.org/assignments/http-methods

    static const size_t maxMethodLength = 32; // TODO: make this configurable?


    SBuf methodFound;

    if (!tok.prefix(methodFound, CharacterSet::TCHAR, maxMethodLength)) {

        debugs(33, ErrorLevel(), "ERROR: invalid request-line: missing or malformed method");

        parseStatusCode = Http::scBadRequest;

        return false;

    }

    method_ = HttpRequestMethod(methodFound);


    if (!skipDelimiter(tok.skipAll(DelimiterCharacters()), "after method"))

        return false;


    return true;

}


static const CharacterSet &


UriValidCharacters()

{

    /* RFC 3986 section 2:

     * "

     *   A URI is composed from a limited set of characters consisting of

     *   digits, letters, and a few graphic symbols.

     * "

     */

    static const CharacterSet UriChars =

        CharacterSet("URI-Chars","") +

        // RFC 3986 section 2.2 - reserved characters

        CharacterSet("gen-delims", ":/?#[]@") +

        CharacterSet("sub-delims", "!$&'()*+,;=") +

        // RFC 3986 section 2.3 - unreserved characters

        CharacterSet::RFC3986_UNRESERVED() +

        // RFC 3986 section 2.1 - percent encoding "%" HEXDIG

        CharacterSet("pct-encoded", "%") +

        CharacterSet::HEXDIG;


    return UriChars;

}


const CharacterSet &


Http::One::RequestParser::RequestTargetCharacters()

{

    if (Config.onoff.relaxed_header_parser) {

#if USE_HTTP_VIOLATIONS

        static const CharacterSet RelaxedExtended =

            UriValidCharacters() +

            // accept whitespace (extended), it will be dealt with later

            DelimiterCharacters() +

            // RFC 2396 unwise character set which must never be transmitted

            // in un-escaped form. But many web services do anyway.

            CharacterSet("RFC2396-unwise","\"\\|^<>`{}") +

            // UTF-8 because we want to be future-proof

            CharacterSet("UTF-8", 128, 255);


        return RelaxedExtended;

#else

        static const CharacterSet RelaxedCompliant =

            UriValidCharacters() +

            // accept whitespace (extended), it will be dealt with later.

            DelimiterCharacters();


        return RelaxedCompliant;

#endif

    }


    // strict parse only accepts what the RFC say we can

    return UriValidCharacters();

}


bool


Http::One::RequestParser::parseUriField(Tokenizer &tok)

{

    /* Arbitrary 64KB URI upper length limit.

     *

     * Not quite as arbitrary as it seems though. Old SquidString objects

     * cannot store strings larger than 64KB, so we must limit until they

     * have all been replaced with SBuf.

     *

     * Not that it matters but RFC 7230 section 3.1.1 requires (RECOMMENDED)

     * at least 8000 octets for the whole line, including method and version.

     */

    const size_t maxUriLength = static_cast<size_t>((64*1024)-1);


    SBuf uriFound;

    if (!tok.prefix(uriFound, RequestTargetCharacters())) {

        parseStatusCode = Http::scBadRequest;

        debugs(33, ErrorLevel(), "ERROR: invalid request-line: missing or malformed URI");

        return false;

    }


    if (uriFound.length() > maxUriLength) {

        // RFC 7230 section 3.1.1 mandatory (MUST) 414 response

        parseStatusCode = Http::scUriTooLong;

        debugs(33, ErrorLevel(), "ERROR: invalid request-line: " << uriFound.length() <<

               "-byte URI exceeds " << maxUriLength << "-byte limit");

        return false;

    }


    uri_ = uriFound;

    return true;

}


bool


Http::One::RequestParser::parseHttpVersionField(Tokenizer &tok)

{

    static const SBuf http1p0("HTTP/1.0");

    static const SBuf http1p1("HTTP/1.1");

    const auto savedTok = tok;


    // Optimization: Expect (and quickly parse) HTTP/1.1 or HTTP/1.0 in

    // the vast majority of cases.

    if (tok.skipSuffix(http1p1)) {

        msgProtocol_ = Http::ProtocolVersion(1, 1);

        return true;

    } else if (tok.skipSuffix(http1p0)) {

        msgProtocol_ = Http::ProtocolVersion(1, 0);

        return true;

    } else {

        // RFC 7230 section 2.6:

        // HTTP-version  = HTTP-name "/" DIGIT "." DIGIT

        static const CharacterSet period("Decimal point", ".");

        static const SBuf proto("HTTP/");

        SBuf majorDigit;

        SBuf minorDigit;

        if (tok.suffix(minorDigit, CharacterSet::DIGIT) &&

                tok.skipOneTrailing(period) &&

                tok.suffix(majorDigit, CharacterSet::DIGIT) &&

                tok.skipSuffix(proto)) {

            const bool multiDigits = majorDigit.length() > 1 || minorDigit.length() > 1;

            // use '0.0' for unsupported multiple digit version numbers

            const unsigned int major = multiDigits ? 0 : (*majorDigit.rawContent() - '0');

            const unsigned int minor = multiDigits ? 0 : (*minorDigit.rawContent() - '0');

            msgProtocol_ = Http::ProtocolVersion(major, minor);

            return true;

        }

    }


    // A GET request might use HTTP/0.9 syntax

    if (method_ == Http::METHOD_GET) {

        // RFC 1945 - no HTTP version field at all

        tok = savedTok; // in case the URI ends with a digit

        // report this assumption as an error if configured to triage parsing

        debugs(33, ErrorLevel(), "assuming HTTP/0.9 request-line");

        msgProtocol_ = Http::ProtocolVersion(0,9);

        return true;

    }


    debugs(33, ErrorLevel(), "ERROR: invalid request-line: not HTTP");

    parseStatusCode = Http::scBadRequest;

    return false;

}


bool


Http::One::RequestParser::skipDelimiter(const size_t count, const char *where)

{

    if (count <= 0) {

        debugs(33, ErrorLevel(), "ERROR: invalid request-line: missing delimiter " << where);

        parseStatusCode = Http::scBadRequest;

        return false;

    }


    // tolerant parser allows multiple whitespace characters between request-line fields

    if (count > 1 && !Config.onoff.relaxed_header_parser) {

        debugs(33, ErrorLevel(), "ERROR: invalid request-line: too many delimiters " << where);

        parseStatusCode = Http::scBadRequest;

        return false;

    }


    return true;

}


bool


Http::One::RequestParser::skipTrailingCrs(Tokenizer &tok)

{

    if (Config.onoff.relaxed_header_parser) {

        (void)tok.skipAllTrailing(CharacterSet::CR); // optional; multiple OK

    } else {

        if (!tok.skipOneTrailing(CharacterSet::CR)) {

            debugs(33, ErrorLevel(), "ERROR: invalid request-line: missing CR before LF");

            parseStatusCode = Http::scBadRequest;

            return false;

        }

    }

    return true;

}


int


Http::One::RequestParser::parseRequestFirstLine()

{

    debugs(74, 5, "parsing possible request: buf.length=" << buf_.length());

    debugs(74, DBG_DATA, buf_);


    SBuf line;


    // Earlier, skipGarbageLines() took care of any leading LFs (if allowed).

    // Now, the request line has to end at the first LF.

    static const CharacterSet lineChars = CharacterSet::LF.complement("notLF");

    Tokenizer lineTok(buf_);

    if (!lineTok.prefix(line, lineChars) || !lineTok.skip('\n')) {

        if (buf_.length() >= Config.maxRequestHeaderSize) {

            /* who should we blame for our failure to parse this line? */


            Tokenizer methodTok(buf_);

            if (!parseMethodField(methodTok))

                return -1; // blame a bad method (or its delimiter)


            // assume it is the URI

            debugs(74, ErrorLevel(), "ERROR: invalid request-line: URI exceeds " <<

                   Config.maxRequestHeaderSize << "-byte limit");

            parseStatusCode = Http::scUriTooLong;

            return -1;

        }

        debugs(74, 5, "Parser needs more data");

        return 0;

    }


    Tokenizer tok(line);


    if (!parseMethodField(tok))

        return -1;


    /* now parse backwards, to leave just the URI */

    if (!skipTrailingCrs(tok))

        return -1;


    if (!parseHttpVersionField(tok))

        return -1;


    if (!http0() && !skipDelimiter(tok.skipAllTrailing(DelimiterCharacters()), "before protocol version"))

        return -1;


    /* parsed everything before and after the URI */


    if (!parseUriField(tok))

        return -1;


    if (!tok.atEnd()) {

        debugs(33, ErrorLevel(), "ERROR: invalid request-line: garbage after URI");

        parseStatusCode = Http::scBadRequest;

        return -1;

    }


    parseStatusCode = Http::scOkay;

    buf_ = lineTok.remaining(); // incremental parse checkpoint

    return 1;

}


bool


Http::One::RequestParser::parse(const SBuf &aBuf)

{

    const bool result = doParse(aBuf);

    if (preserveParsed_) {

        assert(aBuf.length() >= remaining().length());

        parsed_.append(aBuf.substr(0, aBuf.length() - remaining().length())); // newly parsed bytes

    }


    return result;

}


// raw is not a reference because a reference might point back to our own buf_ or parsed_

bool


Http::One::RequestParser::doParse(const SBuf &aBuf)

{

    buf_ = aBuf;

    debugs(74, DBG_DATA, "Parse buf={length=" << aBuf.length() << ", data='" << aBuf << "'}");


    // stage 1: locate the request-line

    if (parsingStage_ == HTTP_PARSE_NONE) {

        skipGarbageLines();


        // if we hit something before EOS treat it as a message

        if (!buf_.isEmpty())

            parsingStage_ = HTTP_PARSE_FIRST;

        else

            return false;

    }


    // stage 2: parse the request-line

    if (parsingStage_ == HTTP_PARSE_FIRST) {

        const int retcode = parseRequestFirstLine();


        // first-line (or a look-alike) found successfully.

        if (retcode > 0) {

            parsingStage_ = HTTP_PARSE_MIME;

        }


        debugs(74, 5, "request-line: retval " << retcode << ": line={" << aBuf.length() << ", data='" << aBuf << "'}");

        debugs(74, 5, "request-line: method: " << method_);

        debugs(74, 5, "request-line: url: " << uri_);

        debugs(74, 5, "request-line: proto: " << msgProtocol_);

        debugs(74, 5, "Parser: bytes processed=" << (aBuf.length()-buf_.length()));


        // syntax errors already

        if (retcode < 0) {

            parsingStage_ = HTTP_PARSE_DONE;

            return false;

        }

    }


    // stage 3: locate the mime header block

    if (parsingStage_ == HTTP_PARSE_MIME) {

        // HTTP/1.x request-line is valid and parsing completed.

        if (!grabMimeBlock("Request", Config.maxRequestHeaderSize)) {

            if (parseStatusCode == Http::scHeaderTooLarge)

                parseStatusCode = Http::scRequestHeaderFieldsTooLarge;

            return false;

        }

    }


    return !needsMoreData();

}


UriValidCharacters
static const CharacterSet & UriValidCharacters()
the characters which truly are valid within URI
Definition RequestParser.cc:87

RequestParser.h

Config
class SquidConfig Config
Definition SquidConfig.cc:12

SquidConfig.h

assert
#define assert(EX)
Definition assert.h:17

CharacterSet
optimized set of C chars, with quick membership test and merge support
Definition CharacterSet.h:18

CharacterSet::complement
CharacterSet complement(const char *complementLabel=nullptr) const
Definition CharacterSet.cc:74

CharacterSet::TCHAR
static const CharacterSet TCHAR
Definition CharacterSet.h:105

CharacterSet::DIGIT
static const CharacterSet DIGIT
Definition CharacterSet.h:84

CharacterSet::HEXDIG
static const CharacterSet HEXDIG
Definition CharacterSet.h:88

CharacterSet::LF
static const CharacterSet LF
Definition CharacterSet.h:92

CharacterSet::CR
static const CharacterSet CR
Definition CharacterSet.h:80

CharacterSet::RFC3986_UNRESERVED
static const CharacterSet & RFC3986_UNRESERVED()
allowed URI characters that do not have a reserved purpose, RFC 3986
Definition CharacterSet.cc:164

HttpRequestMethod
Definition RequestMethod.h:27

HttpRequestMethod::image
const SBuf & image() const
Definition RequestMethod.cc:99

Http::One::Parser::Tokenizer
::Parser::Tokenizer Tokenizer
Definition Parser.h:44

Http::One::RequestParser::parseMethodField
bool parseMethodField(Tokenizer &)
Definition RequestParser.cc:62

Http::One::RequestParser::doParse
bool doParse(const SBuf &aBuf)
called from parse() to do the parsing
Definition RequestParser.cc:349

Http::One::RequestParser::firstLineSize
Http1::Parser::size_type firstLineSize() const override
size in bytes of the first line including CRLF terminator
Definition RequestParser.cc:17

Http::One::RequestParser::RequestTargetCharacters
static const CharacterSet & RequestTargetCharacters()
characters which Squid will accept in the HTTP request-target (URI)
Definition RequestParser.cc:111

Http::One::RequestParser::parse
bool parse(const SBuf &aBuf) override
Definition RequestParser.cc:336

Http::One::RequestParser::skipDelimiter
bool skipDelimiter(const size_t count, const char *where)
Definition RequestParser.cc:229

Http::One::RequestParser::parseRequestFirstLine
int parseRequestFirstLine()
Definition RequestParser.cc:275

Http::One::RequestParser::parseHttpVersionField
bool parseHttpVersionField(Tokenizer &)
Definition RequestParser.cc:174

Http::One::RequestParser::method_
HttpRequestMethod method_
what request method has been found on the first line
Definition RequestParser.h:72

Http::One::RequestParser::parseUriField
bool parseUriField(Tokenizer &)
Definition RequestParser.cc:141

Http::One::RequestParser::uri_
SBuf uri_
raw copy of the original client request-line URI field
Definition RequestParser.h:75

Http::One::RequestParser::skipGarbageLines
void skipGarbageLines()
Definition RequestParser.cc:38

Http::One::RequestParser::skipTrailingCrs
bool skipTrailingCrs(Tokenizer &tok)
Parse CRs at the end of request-line, just before the terminating LF.
Definition RequestParser.cc:249

SBuf
Definition SBuf.h:94

SBuf::rawContent
const char * rawContent() const
Definition SBuf.cc:509

SBuf::length
size_type length() const
Returns the number of bytes stored in SBuf.
Definition SBuf.h:419

SBuf::substr
SBuf substr(size_type pos, size_type n=npos) const
Definition SBuf.cc:576

SquidConfig::onoff
struct SquidConfig::@90 onoff

SquidConfig::maxRequestHeaderSize
size_t maxRequestHeaderSize
Definition SquidConfig.h:134

SquidConfig::relaxed_header_parser
int relaxed_header_parser
Definition SquidConfig.h:315

Stream.h

DBG_DATA
#define DBG_DATA
Definition Stream.h:40

DBG_IMPORTANT
#define DBG_IMPORTANT
Definition Stream.h:38

debugs
#define debugs(SECTION, LEVEL, CONTENT)
Definition Stream.h:192

ProtocolVersion.h

Http::One::HTTP_PARSE_FIRST
@ HTTP_PARSE_FIRST
HTTP/1 message first-line.
Definition Parser.h:24

Http::One::HTTP_PARSE_DONE
@ HTTP_PARSE_DONE
parsed a message header, or reached a terminal syntax error
Definition Parser.h:29

Http::One::HTTP_PARSE_MIME
@ HTTP_PARSE_MIME
HTTP/1 mime-header block.
Definition Parser.h:28

Http::One::HTTP_PARSE_NONE
@ HTTP_PARSE_NONE
initialized, but nothing usefully parsed yet
Definition Parser.h:23

Http::One::ErrorLevel
int ErrorLevel()
the right debugs() level for logging HTTP violation messages
Definition Parser.cc:269

Http::scUriTooLong
@ scUriTooLong
Definition StatusCode.h:59

Http::scHeaderTooLarge
@ scHeaderTooLarge
Header too large to process.
Definition StatusCode.h:89

Http::scBadRequest
@ scBadRequest
Definition StatusCode.h:45

Http::scOkay
@ scOkay
Definition StatusCode.h:27

Http::scRequestHeaderFieldsTooLarge
@ scRequestHeaderFieldsTooLarge
Definition StatusCode.h:71

Http::METHOD_GET
@ METHOD_GET
Definition MethodType.h:25

Http::ProtocolVersion
AnyP::ProtocolVersion ProtocolVersion()
Definition ProtocolVersion.h:32

Tokenizer.h

squid.h

tok
Definition parse.c:160