http_2one_2Parser_8cc_source.html

/*

 * Copyright (C) 1996-2026 The Squid Software Foundation and contributors

 *

 * Squid software is distributed under GPLv2+ license and includes

 * contributions from numerous individuals and organizations.

 * Please see the COPYING and CONTRIBUTORS files for details.

 */


#include "squid.h"

#include "base/CharacterSet.h"

#include "debug/Stream.h"

#include "http/one/Parser.h"

#include "mime_header.h"

#include "parser/Tokenizer.h"

#include "SquidConfig.h"


const SBuf Http::One::Parser::Http1magic("HTTP/1.");


const SBuf &Http::One::CrLf()

{

    static const SBuf crlf("\r\n");

    return crlf;

}


void


Http::One::Parser::clear()

{

    parsingStage_ = HTTP_PARSE_NONE;

    buf_ = nullptr;

    msgProtocol_ = AnyP::ProtocolVersion();

    mimeHeaderBlock_.clear();

}


static const CharacterSet &


RelaxedDelimiterCharacters()

{

    // RFC 7230 section 3.5

    // tolerant parser MAY accept any of SP, HTAB, VT (%x0B), FF (%x0C),

    // or bare CR as whitespace between request-line fields

    static const CharacterSet RelaxedDels =

        (CharacterSet::SP +

         CharacterSet::HTAB +

         CharacterSet("VT,FF","\x0B\x0C") +

         CharacterSet::CR).rename("relaxed-WSP");


    return RelaxedDels;

}


const CharacterSet &


Http::One::Parser::WhitespaceCharacters()

{

    return Config.onoff.relaxed_header_parser ?

           RelaxedDelimiterCharacters() : CharacterSet::WSP;

}


const CharacterSet &


Http::One::Parser::DelimiterCharacters()

{

    return Config.onoff.relaxed_header_parser ?

           RelaxedDelimiterCharacters() : CharacterSet::SP;

}


void


Http::One::Parser::skipLineTerminator(Tokenizer &tok) const

{

    if (Config.onoff.relaxed_header_parser && tok.skipOne(CharacterSet::LF))

        return;


    tok.skipRequired("line-terminating CRLF", Http1::CrLf());

}


static const CharacterSet &


LineCharacters()

{

    static const CharacterSet line = CharacterSet::LF.complement("non-LF");

    return line;

}


void


Http::One::Parser::cleanMimePrefix()

{

    Tokenizer tok(mimeHeaderBlock_);

    while (tok.skipOne(RelaxedDelimiterCharacters())) {

        (void)tok.skipAll(LineCharacters()); // optional line content

        // LF terminator is required.

        // trust headersEnd() to ensure that we have at least one LF

        (void)tok.skipOne(CharacterSet::LF);

    }


    // If mimeHeaderBlock_ had just whitespace line(s) followed by CRLF,

    // then we skipped everything, including that terminating LF.

    // Restore the terminating CRLF if needed.

    if (tok.atEnd())

        mimeHeaderBlock_ = Http1::CrLf();

    else

        mimeHeaderBlock_ = tok.remaining();

    // now mimeHeaderBlock_ has 0+ fields followed by the LF terminator

}


void


Http::One::Parser::unfoldMime()

{

    Tokenizer tok(mimeHeaderBlock_);

    const auto szLimit = mimeHeaderBlock_.length();

    mimeHeaderBlock_.clear();

    // prevent the mime sender being able to make append() realloc/grow multiple times.

    mimeHeaderBlock_.reserveSpace(szLimit);


    static const CharacterSet nonCRLF = (CharacterSet::CR + CharacterSet::LF).complement().rename("non-CRLF");


    while (!tok.atEnd()) {

        const SBuf all(tok.remaining());

        const auto blobLen = tok.skipAll(nonCRLF); // may not be there

        const auto crLen = tok.skipAll(CharacterSet::CR); // may not be there

        const auto lfLen = tok.skipOne(CharacterSet::LF); // may not be there


        if (lfLen && tok.skipAll(CharacterSet::WSP)) { // obs-fold!

            mimeHeaderBlock_.append(all.substr(0, blobLen));

            mimeHeaderBlock_.append(' '); // replace one obs-fold with one SP

        } else

            mimeHeaderBlock_.append(all.substr(0, blobLen + crLen + lfLen));

    }

}


bool


Http::One::Parser::grabMimeBlock(const char *which, const size_t limit)

{

    // MIME headers block exist in (only) HTTP/1.x and ICY

    const bool expectMime = (msgProtocol_.protocol == AnyP::PROTO_HTTP && msgProtocol_.major == 1) ||

                            msgProtocol_.protocol == AnyP::PROTO_ICY ||

                            hackExpectsMime_;


    if (expectMime) {

        /* NOTE: HTTP/0.9 messages do not have a mime header block.

         *       So the rest of the code will need to deal with '0'-byte headers

         *       (ie, none, so don't try parsing em)

         */

        bool containsObsFold;

        if (SBuf::size_type mimeHeaderBytes = headersEnd(buf_, containsObsFold)) {


            // Squid could handle these headers, but admin does not want to

            if (firstLineSize() + mimeHeaderBytes >= limit) {

                debugs(33, 5, "Too large " << which);

                parseStatusCode = Http::scHeaderTooLarge;

                buf_.consume(mimeHeaderBytes);

                parsingStage_ = HTTP_PARSE_DONE;

                return false;

            }


            mimeHeaderBlock_ = buf_.consume(mimeHeaderBytes);

            cleanMimePrefix();

            if (containsObsFold)

                unfoldMime();


            debugs(74, 5, "mime header (0-" << mimeHeaderBytes << ") {" << mimeHeaderBlock_ << "}");


        } else { // headersEnd() == 0

            if (buf_.length()+firstLineSize() >= limit) {

                debugs(33, 5, "Too large " << which);

                parseStatusCode = Http::scHeaderTooLarge;

                parsingStage_ = HTTP_PARSE_DONE;

            } else

                debugs(33, 5, "Incomplete " << which << ", waiting for end of headers");

            return false;

        }


    } else

        debugs(33, 3, "Missing HTTP/1.x identifier");


    // NP: we do not do any further stages here yet so go straight to DONE

    parsingStage_ = HTTP_PARSE_DONE;


    return true;

}


// arbitrary maximum-length for headers which can be found by Http1Parser::getHostHeaderField()

#define GET_HDR_SZ  1024


// BUG: returns only the first header line with given name,

//      ignores multi-line headers and obs-fold headers

char *


Http::One::Parser::getHostHeaderField()

{

    if (!headerBlockSize())

        return nullptr;


    LOCAL_ARRAY(char, header, GET_HDR_SZ);

    const char *name = "Host";

    const int namelen = strlen(name);


    debugs(25, 5, "looking for " << name);


    // while we can find more LF in the SBuf

    Tokenizer tok(mimeHeaderBlock_);

    SBuf p;


    while (tok.prefix(p, LineCharacters())) {

        if (!tok.skipOne(CharacterSet::LF)) // move tokenizer past the LF

            break; // error. reached invalid octet or end of buffer instead of an LF ??


        // header lines must start with the name (case insensitive)

        if (p.substr(0, namelen).caseCmp(name, namelen))

            continue;


        // then a COLON

        if (p[namelen] != ':')

            continue;


        // drop any trailing *CR sequence

        p.trim(Http1::CrLf(), false, true);


        debugs(25, 5, "checking " << p);

        p.consume(namelen + 1);


        // TODO: optimize SBuf::trim to take CharacterSet directly

        Tokenizer t(p);

        t.skipAll(CharacterSet::WSP);

        p = t.remaining();


        // prevent buffer overrun on char header[];

        p.chop(0, sizeof(header)-1);


        // currently only used for pre-parse Host header, ensure valid domain[:port] or ip[:port]

        static const auto hostChars = CharacterSet("host",":[].-_") + CharacterSet::ALPHA + CharacterSet::DIGIT;

        if (p.findFirstNotOf(hostChars) != SBuf::npos)

            break; // error. line contains character not accepted in Host header


        // return the header field-value

        SBufToCstring(header, p);

        debugs(25, 5, "returning " << header);

        return header;

    }


    return nullptr;

}


int


Http::One::ErrorLevel()

{

    return Config.onoff.relaxed_header_parser < 0 ? DBG_IMPORTANT : 5;

}


namespace Http::One {

static void


ParseBws_(Parser::Tokenizer &tok, const CharacterSet &bwsChars)

{

    const auto count = tok.skipAll(bwsChars);


    if (tok.atEnd())

        throw InsufficientInput(); // even if count is positive


    if (count) {

        // Generating BWS is a MUST-level violation so warn about it as needed.

        debugs(33, ErrorLevel(), "found " << count << " BWS octets");

        // RFC 7230 says we MUST parse BWS, so we fall through even if

        // Config.onoff.relaxed_header_parser is off.

    }

    // else we successfully "parsed" an empty BWS sequence


    // success: no more BWS characters expected

}


} // namespace Http::One


void


Http::One::ParseBws(Parser::Tokenizer &tok)

{

    ParseBws_(tok, Parser::WhitespaceCharacters());

}


void


Http::One::ParseStrictBws(Parser::Tokenizer &tok)

{

    ParseBws_(tok, CharacterSet::WSP);

}


CharacterSet.h

SBufToCstring
void SBufToCstring(char *d, const SBuf &s)
Definition SBuf.h:756

Config
class SquidConfig Config
Definition SquidConfig.cc:12

SquidConfig.h

AnyP::ProtocolVersion
Definition ProtocolVersion.h:24

CharacterSet
optimized set of C chars, with quick membership test and merge support
Definition CharacterSet.h:18

CharacterSet::complement
CharacterSet complement(const char *complementLabel=nullptr) const
Definition CharacterSet.cc:74

CharacterSet::SP
static const CharacterSet SP
Definition CharacterSet.h:94

CharacterSet::rename
CharacterSet & rename(const char *label)
change name; handy in const declarations that use operators
Definition CharacterSet.h:61

CharacterSet::WSP
static const CharacterSet WSP
Definition CharacterSet.h:98

CharacterSet::DIGIT
static const CharacterSet DIGIT
Definition CharacterSet.h:84

CharacterSet::ALPHA
static const CharacterSet ALPHA
Definition CharacterSet.h:76

CharacterSet::LF
static const CharacterSet LF
Definition CharacterSet.h:92

CharacterSet::CR
static const CharacterSet CR
Definition CharacterSet.h:80

CharacterSet::HTAB
static const CharacterSet HTAB
Definition CharacterSet.h:90

Http::One::Parser::getHostHeaderField
char * getHostHeaderField()
Definition Parser.cc:213

Http::One::Parser::msgProtocol_
AnyP::ProtocolVersion msgProtocol_
what protocol label has been found in the first line (if any)
Definition Parser.h:152

Http::One::Parser::WhitespaceCharacters
static const CharacterSet & WhitespaceCharacters()
Definition Parser.cc:52

Http::One::Parser::buf_
SBuf buf_
bytes remaining to be parsed
Definition Parser.h:146

Http::One::Parser::cleanMimePrefix
void cleanMimePrefix()
Definition Parser.cc:97

Http::One::Parser::skipLineTerminator
void skipLineTerminator(Tokenizer &) const
Definition Parser.cc:66

Http::One::Parser::grabMimeBlock
bool grabMimeBlock(const char *which, const size_t limit)
Definition Parser.cc:157

Http::One::Parser::parsingStage_
ParseState parsingStage_
what stage the parser is currently up to
Definition Parser.h:149

Http::One::Parser::DelimiterCharacters
static const CharacterSet & DelimiterCharacters()
Definition Parser.cc:59

Http::One::Parser::clear
virtual void clear()=0
Definition Parser.cc:27

Http::One::Parser::unfoldMime
void unfoldMime()
Definition Parser.cc:132

Http::One::Parser::mimeHeaderBlock_
SBuf mimeHeaderBlock_
buffer holding the mime headers (if any)
Definition Parser.h:155

Http::One::Parser::Tokenizer
::Parser::Tokenizer Tokenizer
Definition Parser.h:44

Http::One::Parser::Http1magic
static const SBuf Http1magic
RFC 7230 section 2.6 - 7 magic octets.
Definition Parser.h:143

Parser::Tokenizer
Definition Tokenizer.h:30

SBuf
Definition SBuf.h:94

SBuf::caseCmp
int caseCmp(const SBuf &S, const size_type n) const
shorthand version for case-insensitive compare()
Definition SBuf.h:287

SBuf::npos
static const size_type npos
Definition SBuf.h:100

SBuf::consume
SBuf consume(size_type n=npos)
Definition SBuf.cc:481

SBuf::chop
SBuf & chop(size_type pos, size_type n=npos)
Definition SBuf.cc:530

SBuf::findFirstNotOf
size_type findFirstNotOf(const CharacterSet &set, size_type startPos=0) const
Definition SBuf.cc:746

SBuf::trim
SBuf & trim(const SBuf &toRemove, bool atBeginning=true, bool atEnd=true)
Definition SBuf.cc:551

SBuf::clear
void clear()
Definition SBuf.cc:175

SBuf::substr
SBuf substr(size_type pos, size_type n=npos) const
Definition SBuf.cc:576

SBuf::size_type
MemBlob::size_type size_type
Definition SBuf.h:96

SquidConfig::onoff
struct SquidConfig::@90 onoff

SquidConfig::relaxed_header_parser
int relaxed_header_parser
Definition SquidConfig.h:315

Stream.h

DBG_IMPORTANT
#define DBG_IMPORTANT
Definition Stream.h:38

debugs
#define debugs(SECTION, LEVEL, CONTENT)
Definition Stream.h:192

RelaxedDelimiterCharacters
static const CharacterSet & RelaxedDelimiterCharacters()
characters HTTP permits tolerant parsers to accept as delimiters
Definition Parser.cc:37

LineCharacters
static const CharacterSet & LineCharacters()
all characters except the LF line terminator
Definition Parser.cc:76

GET_HDR_SZ
#define GET_HDR_SZ
Definition Parser.cc:208

Parser.h

headersEnd
size_t headersEnd(const char *mime, size_t l, bool &containsObsFold)
Definition mime_header.cc:17

mime_header.h

AnyP::PROTO_ICY
@ PROTO_ICY
Definition ProtocolType.h:37

AnyP::PROTO_HTTP
@ PROTO_HTTP
Definition ProtocolType.h:25

Http::One
common part of ParseBws() and ParseStrctBws()
Definition forward.h:17

Http::One::ParseBws
void ParseBws(Parser::Tokenizer &)
Definition Parser.cc:297

Http::One::HTTP_PARSE_DONE
@ HTTP_PARSE_DONE
parsed a message header, or reached a terminal syntax error
Definition Parser.h:29

Http::One::HTTP_PARSE_NONE
@ HTTP_PARSE_NONE
initialized, but nothing usefully parsed yet
Definition Parser.h:23

Http::One::ParseBws_
static void ParseBws_(Parser::Tokenizer &tok, const CharacterSet &bwsChars)
Definition Parser.cc:277

Http::One::CrLf
const SBuf & CrLf()
CRLF textual representation.
Definition Parser.cc:20

Http::One::ParseStrictBws
void ParseStrictBws(Parser::Tokenizer &)
Definition Parser.cc:303

Http::One::ErrorLevel
int ErrorLevel()
the right debugs() level for logging HTTP violation messages
Definition Parser.cc:269

Http::scHeaderTooLarge
@ scHeaderTooLarge
Header too large to process.
Definition StatusCode.h:89

Tokenizer.h

squid.h

LOCAL_ARRAY
#define LOCAL_ARRAY(type, name, size)
Definition squid.h:62

tok
Definition parse.c:160