Squid Web Cache master
Loading...
Searching...
No Matches
RequestParser.cc
Go to the documentation of this file.
1/*
2 * Copyright (C) 1996-2025 The Squid Software Foundation and contributors
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9#include "squid.h"
10#include "debug/Stream.h"
13#include "parser/Tokenizer.h"
14#include "SquidConfig.h"
15
16Http1::Parser::size_type
18{
19 // RFC 7230 section 2.6
20 /* method SP request-target SP "HTTP/" DIGIT "." DIGIT CRLF */
21 return method_.image().length() + uri_.length() + 12;
22}
23
37void
39{
41 if (Config.onoff.relaxed_header_parser < 0 && (buf_[0] == '\r' || buf_[0] == '\n'))
42 debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
43 "CRLF bytes received ahead of request-line. " <<
44 "Ignored due to relaxed_header_parser.");
45 // Be tolerant of prefix empty lines
46 // ie any series of either \n or \r\n with no other characters and no repeated \r
47 while (!buf_.isEmpty() && (buf_[0] == '\n' ||
48 (buf_[0] == '\r' && buf_.length() > 1 && buf_[1] == '\n'))) {
49 buf_.consume(1);
50 }
51 }
52}
53
61bool
63{
64 // method field is a sequence of TCHAR.
65 // Limit to 32 characters to prevent overly long sequences of non-HTTP
66 // being sucked in before mismatch is detected. 32 is itself annoyingly
67 // big but there are methods registered by IANA that reach 17 bytes:
68 // http://www.iana.org/assignments/http-methods
69 static const size_t maxMethodLength = 32; // TODO: make this configurable?
70
71 SBuf methodFound;
72 if (!tok.prefix(methodFound, CharacterSet::TCHAR, maxMethodLength)) {
73 debugs(33, ErrorLevel(), "ERROR: invalid request-line: missing or malformed method");
74 parseStatusCode = Http::scBadRequest;
75 return false;
76 }
77 method_ = HttpRequestMethod(methodFound);
78
79 if (!skipDelimiter(tok.skipAll(DelimiterCharacters()), "after method"))
80 return false;
81
82 return true;
83}
84
86static const CharacterSet &
88{
89 /* RFC 3986 section 2:
90 * "
91 * A URI is composed from a limited set of characters consisting of
92 * digits, letters, and a few graphic symbols.
93 * "
94 */
95 static const CharacterSet UriChars =
96 CharacterSet("URI-Chars","") +
97 // RFC 3986 section 2.2 - reserved characters
98 CharacterSet("gen-delims", ":/?#[]@") +
99 CharacterSet("sub-delims", "!$&'()*+,;=") +
100 // RFC 3986 section 2.3 - unreserved characters
102 // RFC 3986 section 2.1 - percent encoding "%" HEXDIG
103 CharacterSet("pct-encoded", "%") +
105
106 return UriChars;
107}
108
110const CharacterSet &
112{
114#if USE_HTTP_VIOLATIONS
115 static const CharacterSet RelaxedExtended =
117 // accept whitespace (extended), it will be dealt with later
118 DelimiterCharacters() +
119 // RFC 2396 unwise character set which must never be transmitted
120 // in un-escaped form. But many web services do anyway.
121 CharacterSet("RFC2396-unwise","\"\\|^<>`{}") +
122 // UTF-8 because we want to be future-proof
123 CharacterSet("UTF-8", 128, 255);
124
125 return RelaxedExtended;
126#else
127 static const CharacterSet RelaxedCompliant =
129 // accept whitespace (extended), it will be dealt with later.
130 DelimiterCharacters();
131
132 return RelaxedCompliant;
133#endif
134 }
135
136 // strict parse only accepts what the RFC say we can
137 return UriValidCharacters();
138}
139
140bool
142{
143 /* Arbitrary 64KB URI upper length limit.
144 *
145 * Not quite as arbitrary as it seems though. Old SquidString objects
146 * cannot store strings larger than 64KB, so we must limit until they
147 * have all been replaced with SBuf.
148 *
149 * Not that it matters but RFC 7230 section 3.1.1 requires (RECOMMENDED)
150 * at least 8000 octets for the whole line, including method and version.
151 */
152 const size_t maxUriLength = static_cast<size_t>((64*1024)-1);
153
154 SBuf uriFound;
155 if (!tok.prefix(uriFound, RequestTargetCharacters())) {
156 parseStatusCode = Http::scBadRequest;
157 debugs(33, ErrorLevel(), "ERROR: invalid request-line: missing or malformed URI");
158 return false;
159 }
160
161 if (uriFound.length() > maxUriLength) {
162 // RFC 7230 section 3.1.1 mandatory (MUST) 414 response
163 parseStatusCode = Http::scUriTooLong;
164 debugs(33, ErrorLevel(), "ERROR: invalid request-line: " << uriFound.length() <<
165 "-byte URI exceeds " << maxUriLength << "-byte limit");
166 return false;
167 }
168
169 uri_ = uriFound;
170 return true;
171}
172
173bool
175{
176 static const SBuf http1p0("HTTP/1.0");
177 static const SBuf http1p1("HTTP/1.1");
178 const auto savedTok = tok;
179
180 // Optimization: Expect (and quickly parse) HTTP/1.1 or HTTP/1.0 in
181 // the vast majority of cases.
182 if (tok.skipSuffix(http1p1)) {
183 msgProtocol_ = Http::ProtocolVersion(1, 1);
184 return true;
185 } else if (tok.skipSuffix(http1p0)) {
186 msgProtocol_ = Http::ProtocolVersion(1, 0);
187 return true;
188 } else {
189 // RFC 7230 section 2.6:
190 // HTTP-version = HTTP-name "/" DIGIT "." DIGIT
191 static const CharacterSet period("Decimal point", ".");
192 static const SBuf proto("HTTP/");
193 SBuf majorDigit;
194 SBuf minorDigit;
195 if (tok.suffix(minorDigit, CharacterSet::DIGIT) &&
196 tok.skipOneTrailing(period) &&
197 tok.suffix(majorDigit, CharacterSet::DIGIT) &&
198 tok.skipSuffix(proto)) {
199 const bool multiDigits = majorDigit.length() > 1 || minorDigit.length() > 1;
200 // use '0.0' for unsupported multiple digit version numbers
201 const unsigned int major = multiDigits ? 0 : (*majorDigit.rawContent() - '0');
202 const unsigned int minor = multiDigits ? 0 : (*minorDigit.rawContent() - '0');
203 msgProtocol_ = Http::ProtocolVersion(major, minor);
204 return true;
205 }
206 }
207
208 // A GET request might use HTTP/0.9 syntax
209 if (method_ == Http::METHOD_GET) {
210 // RFC 1945 - no HTTP version field at all
211 tok = savedTok; // in case the URI ends with a digit
212 // report this assumption as an error if configured to triage parsing
213 debugs(33, ErrorLevel(), "assuming HTTP/0.9 request-line");
214 msgProtocol_ = Http::ProtocolVersion(0,9);
215 return true;
216 }
217
218 debugs(33, ErrorLevel(), "ERROR: invalid request-line: not HTTP");
219 parseStatusCode = Http::scBadRequest;
220 return false;
221}
222
228bool
229Http::One::RequestParser::skipDelimiter(const size_t count, const char *where)
230{
231 if (count <= 0) {
232 debugs(33, ErrorLevel(), "ERROR: invalid request-line: missing delimiter " << where);
233 parseStatusCode = Http::scBadRequest;
234 return false;
235 }
236
237 // tolerant parser allows multiple whitespace characters between request-line fields
238 if (count > 1 && !Config.onoff.relaxed_header_parser) {
239 debugs(33, ErrorLevel(), "ERROR: invalid request-line: too many delimiters " << where);
240 parseStatusCode = Http::scBadRequest;
241 return false;
242 }
243
244 return true;
245}
246
248bool
250{
252 (void)tok.skipAllTrailing(CharacterSet::CR); // optional; multiple OK
253 } else {
254 if (!tok.skipOneTrailing(CharacterSet::CR)) {
255 debugs(33, ErrorLevel(), "ERROR: invalid request-line: missing CR before LF");
256 parseStatusCode = Http::scBadRequest;
257 return false;
258 }
259 }
260 return true;
261}
262
274int
276{
277 debugs(74, 5, "parsing possible request: buf.length=" << buf_.length());
278 debugs(74, DBG_DATA, buf_);
279
280 SBuf line;
281
282 // Earlier, skipGarbageLines() took care of any leading LFs (if allowed).
283 // Now, the request line has to end at the first LF.
284 static const CharacterSet lineChars = CharacterSet::LF.complement("notLF");
285 Tokenizer lineTok(buf_);
286 if (!lineTok.prefix(line, lineChars) || !lineTok.skip('\n')) {
287 if (buf_.length() >= Config.maxRequestHeaderSize) {
288 /* who should we blame for our failure to parse this line? */
289
290 Tokenizer methodTok(buf_);
291 if (!parseMethodField(methodTok))
292 return -1; // blame a bad method (or its delimiter)
293
294 // assume it is the URI
295 debugs(74, ErrorLevel(), "ERROR: invalid request-line: URI exceeds " <<
296 Config.maxRequestHeaderSize << "-byte limit");
297 parseStatusCode = Http::scUriTooLong;
298 return -1;
299 }
300 debugs(74, 5, "Parser needs more data");
301 return 0;
302 }
303
304 Tokenizer tok(line);
305
306 if (!parseMethodField(tok))
307 return -1;
308
309 /* now parse backwards, to leave just the URI */
310 if (!skipTrailingCrs(tok))
311 return -1;
312
313 if (!parseHttpVersionField(tok))
314 return -1;
315
316 if (!http0() && !skipDelimiter(tok.skipAllTrailing(DelimiterCharacters()), "before protocol version"))
317 return -1;
318
319 /* parsed everything before and after the URI */
320
321 if (!parseUriField(tok))
322 return -1;
323
324 if (!tok.atEnd()) {
325 debugs(33, ErrorLevel(), "ERROR: invalid request-line: garbage after URI");
326 parseStatusCode = Http::scBadRequest;
327 return -1;
328 }
329
330 parseStatusCode = Http::scOkay;
331 buf_ = lineTok.remaining(); // incremental parse checkpoint
332 return 1;
333}
334
335bool
337{
338 const bool result = doParse(aBuf);
339 if (preserveParsed_) {
340 assert(aBuf.length() >= remaining().length());
341 parsed_.append(aBuf.substr(0, aBuf.length() - remaining().length())); // newly parsed bytes
342 }
343
344 return result;
345}
346
347// raw is not a reference because a reference might point back to our own buf_ or parsed_
348bool
350{
351 buf_ = aBuf;
352 debugs(74, DBG_DATA, "Parse buf={length=" << aBuf.length() << ", data='" << aBuf << "'}");
353
354 // stage 1: locate the request-line
355 if (parsingStage_ == HTTP_PARSE_NONE) {
356 skipGarbageLines();
357
358 // if we hit something before EOS treat it as a message
359 if (!buf_.isEmpty())
360 parsingStage_ = HTTP_PARSE_FIRST;
361 else
362 return false;
363 }
364
365 // stage 2: parse the request-line
366 if (parsingStage_ == HTTP_PARSE_FIRST) {
367 const int retcode = parseRequestFirstLine();
368
369 // first-line (or a look-alike) found successfully.
370 if (retcode > 0) {
371 parsingStage_ = HTTP_PARSE_MIME;
372 }
373
374 debugs(74, 5, "request-line: retval " << retcode << ": line={" << aBuf.length() << ", data='" << aBuf << "'}");
375 debugs(74, 5, "request-line: method: " << method_);
376 debugs(74, 5, "request-line: url: " << uri_);
377 debugs(74, 5, "request-line: proto: " << msgProtocol_);
378 debugs(74, 5, "Parser: bytes processed=" << (aBuf.length()-buf_.length()));
379
380 // syntax errors already
381 if (retcode < 0) {
382 parsingStage_ = HTTP_PARSE_DONE;
383 return false;
384 }
385 }
386
387 // stage 3: locate the mime header block
388 if (parsingStage_ == HTTP_PARSE_MIME) {
389 // HTTP/1.x request-line is valid and parsing completed.
390 if (!grabMimeBlock("Request", Config.maxRequestHeaderSize)) {
391 if (parseStatusCode == Http::scHeaderTooLarge)
392 parseStatusCode = Http::scRequestHeaderFieldsTooLarge;
393 return false;
394 }
395 }
396
397 return !needsMoreData();
398}
399
static const CharacterSet & UriValidCharacters()
the characters which truly are valid within URI
class SquidConfig Config
#define assert(EX)
Definition assert.h:17
optimized set of C chars, with quick membership test and merge support
CharacterSet complement(const char *complementLabel=nullptr) const
static const CharacterSet TCHAR
static const CharacterSet DIGIT
static const CharacterSet HEXDIG
static const CharacterSet LF
static const CharacterSet CR
static const CharacterSet & RFC3986_UNRESERVED()
allowed URI characters that do not have a reserved purpose, RFC 3986
const SBuf & image() const
::Parser::Tokenizer Tokenizer
Definition Parser.h:44
bool parseMethodField(Tokenizer &)
bool doParse(const SBuf &aBuf)
called from parse() to do the parsing
Http1::Parser::size_type firstLineSize() const override
size in bytes of the first line including CRLF terminator
static const CharacterSet & RequestTargetCharacters()
characters which Squid will accept in the HTTP request-target (URI)
bool parse(const SBuf &aBuf) override
bool skipDelimiter(const size_t count, const char *where)
bool parseHttpVersionField(Tokenizer &)
HttpRequestMethod method_
what request method has been found on the first line
bool parseUriField(Tokenizer &)
SBuf uri_
raw copy of the original client request-line URI field
bool skipTrailingCrs(Tokenizer &tok)
Parse CRs at the end of request-line, just before the terminating LF.
Definition SBuf.h:94
const char * rawContent() const
Definition SBuf.cc:509
size_type length() const
Returns the number of bytes stored in SBuf.
Definition SBuf.h:419
SBuf substr(size_type pos, size_type n=npos) const
Definition SBuf.cc:576
struct SquidConfig::@90 onoff
size_t maxRequestHeaderSize
int relaxed_header_parser
#define DBG_DATA
Definition Stream.h:40
#define DBG_IMPORTANT
Definition Stream.h:38
#define debugs(SECTION, LEVEL, CONTENT)
Definition Stream.h:192
@ HTTP_PARSE_FIRST
HTTP/1 message first-line.
Definition Parser.h:24
@ HTTP_PARSE_DONE
parsed a message header, or reached a terminal syntax error
Definition Parser.h:29
@ HTTP_PARSE_MIME
HTTP/1 mime-header block.
Definition Parser.h:28
@ HTTP_PARSE_NONE
initialized, but nothing usefully parsed yet
Definition Parser.h:23
int ErrorLevel()
the right debugs() level for logging HTTP violation messages
Definition Parser.cc:269
@ scUriTooLong
Definition StatusCode.h:59
@ scHeaderTooLarge
Header too large to process.
Definition StatusCode.h:89
@ scBadRequest
Definition StatusCode.h:45
@ scOkay
Definition StatusCode.h:27
@ scRequestHeaderFieldsTooLarge
Definition StatusCode.h:71
@ METHOD_GET
Definition MethodType.h:25
AnyP::ProtocolVersion ProtocolVersion()
Definition parse.c:160