Squid Web Cache master
Loading...
Searching...
No Matches
Parser.cc
Go to the documentation of this file.
1/*
2 * Copyright (C) 1996-2025 The Squid Software Foundation and contributors
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9#include "squid.h"
10#include "base/CharacterSet.h"
11#include "debug/Stream.h"
12#include "http/one/Parser.h"
13#include "mime_header.h"
14#include "parser/Tokenizer.h"
15#include "SquidConfig.h"
16
18const SBuf Http::One::Parser::Http1magic("HTTP/1.");
19
21{
22 static const SBuf crlf("\r\n");
23 return crlf;
24}
25
26void
34
36static const CharacterSet &
38{
39 // RFC 7230 section 3.5
40 // tolerant parser MAY accept any of SP, HTAB, VT (%x0B), FF (%x0C),
41 // or bare CR as whitespace between request-line fields
42 static const CharacterSet RelaxedDels =
45 CharacterSet("VT,FF","\x0B\x0C") +
46 CharacterSet::CR).rename("relaxed-WSP");
47
48 return RelaxedDels;
49}
50
51const CharacterSet &
57
58const CharacterSet &
64
65void
67{
69 return;
70
71 tok.skipRequired("line-terminating CRLF", Http1::CrLf());
72}
73
75static const CharacterSet &
77{
78 static const CharacterSet line = CharacterSet::LF.complement("non-LF");
79 return line;
80}
81
96void
98{
99 Tokenizer tok(mimeHeaderBlock_);
100 while (tok.skipOne(RelaxedDelimiterCharacters())) {
101 (void)tok.skipAll(LineCharacters()); // optional line content
102 // LF terminator is required.
103 // trust headersEnd() to ensure that we have at least one LF
104 (void)tok.skipOne(CharacterSet::LF);
105 }
106
107 // If mimeHeaderBlock_ had just whitespace line(s) followed by CRLF,
108 // then we skipped everything, including that terminating LF.
109 // Restore the terminating CRLF if needed.
110 if (tok.atEnd())
111 mimeHeaderBlock_ = Http1::CrLf();
112 else
113 mimeHeaderBlock_ = tok.remaining();
114 // now mimeHeaderBlock_ has 0+ fields followed by the LF terminator
115}
116
131void
133{
134 Tokenizer tok(mimeHeaderBlock_);
135 const auto szLimit = mimeHeaderBlock_.length();
136 mimeHeaderBlock_.clear();
137 // prevent the mime sender being able to make append() realloc/grow multiple times.
138 mimeHeaderBlock_.reserveSpace(szLimit);
139
140 static const CharacterSet nonCRLF = (CharacterSet::CR + CharacterSet::LF).complement().rename("non-CRLF");
141
142 while (!tok.atEnd()) {
143 const SBuf all(tok.remaining());
144 const auto blobLen = tok.skipAll(nonCRLF); // may not be there
145 const auto crLen = tok.skipAll(CharacterSet::CR); // may not be there
146 const auto lfLen = tok.skipOne(CharacterSet::LF); // may not be there
147
148 if (lfLen && tok.skipAll(CharacterSet::WSP)) { // obs-fold!
149 mimeHeaderBlock_.append(all.substr(0, blobLen));
150 mimeHeaderBlock_.append(' '); // replace one obs-fold with one SP
151 } else
152 mimeHeaderBlock_.append(all.substr(0, blobLen + crLen + lfLen));
153 }
154}
155
156bool
157Http::One::Parser::grabMimeBlock(const char *which, const size_t limit)
158{
159 // MIME headers block exist in (only) HTTP/1.x and ICY
160 const bool expectMime = (msgProtocol_.protocol == AnyP::PROTO_HTTP && msgProtocol_.major == 1) ||
161 msgProtocol_.protocol == AnyP::PROTO_ICY ||
162 hackExpectsMime_;
163
164 if (expectMime) {
165 /* NOTE: HTTP/0.9 messages do not have a mime header block.
166 * So the rest of the code will need to deal with '0'-byte headers
167 * (ie, none, so don't try parsing em)
168 */
169 bool containsObsFold;
170 if (SBuf::size_type mimeHeaderBytes = headersEnd(buf_, containsObsFold)) {
171
172 // Squid could handle these headers, but admin does not want to
173 if (firstLineSize() + mimeHeaderBytes >= limit) {
174 debugs(33, 5, "Too large " << which);
175 parseStatusCode = Http::scHeaderTooLarge;
176 buf_.consume(mimeHeaderBytes);
177 parsingStage_ = HTTP_PARSE_DONE;
178 return false;
179 }
180
181 mimeHeaderBlock_ = buf_.consume(mimeHeaderBytes);
182 cleanMimePrefix();
183 if (containsObsFold)
184 unfoldMime();
185
186 debugs(74, 5, "mime header (0-" << mimeHeaderBytes << ") {" << mimeHeaderBlock_ << "}");
187
188 } else { // headersEnd() == 0
189 if (buf_.length()+firstLineSize() >= limit) {
190 debugs(33, 5, "Too large " << which);
191 parseStatusCode = Http::scHeaderTooLarge;
192 parsingStage_ = HTTP_PARSE_DONE;
193 } else
194 debugs(33, 5, "Incomplete " << which << ", waiting for end of headers");
195 return false;
196 }
197
198 } else
199 debugs(33, 3, "Missing HTTP/1.x identifier");
200
201 // NP: we do not do any further stages here yet so go straight to DONE
202 parsingStage_ = HTTP_PARSE_DONE;
203
204 return true;
205}
206
207// arbitrary maximum-length for headers which can be found by Http1Parser::getHostHeaderField()
208#define GET_HDR_SZ 1024
209
210// BUG: returns only the first header line with given name,
211// ignores multi-line headers and obs-fold headers
212char *
214{
215 if (!headerBlockSize())
216 return nullptr;
217
218 LOCAL_ARRAY(char, header, GET_HDR_SZ);
219 const char *name = "Host";
220 const int namelen = strlen(name);
221
222 debugs(25, 5, "looking for " << name);
223
224 // while we can find more LF in the SBuf
225 Tokenizer tok(mimeHeaderBlock_);
226 SBuf p;
227
228 while (tok.prefix(p, LineCharacters())) {
229 if (!tok.skipOne(CharacterSet::LF)) // move tokenizer past the LF
230 break; // error. reached invalid octet or end of buffer instead of an LF ??
231
232 // header lines must start with the name (case insensitive)
233 if (p.substr(0, namelen).caseCmp(name, namelen))
234 continue;
235
236 // then a COLON
237 if (p[namelen] != ':')
238 continue;
239
240 // drop any trailing *CR sequence
241 p.trim(Http1::CrLf(), false, true);
242
243 debugs(25, 5, "checking " << p);
244 p.consume(namelen + 1);
245
246 // TODO: optimize SBuf::trim to take CharacterSet directly
247 Tokenizer t(p);
248 t.skipAll(CharacterSet::WSP);
249 p = t.remaining();
250
251 // prevent buffer overrun on char header[];
252 p.chop(0, sizeof(header)-1);
253
254 // currently only used for pre-parse Host header, ensure valid domain[:port] or ip[:port]
255 static const auto hostChars = CharacterSet("host",":[].-_") + CharacterSet::ALPHA + CharacterSet::DIGIT;
256 if (p.findFirstNotOf(hostChars) != SBuf::npos)
257 break; // error. line contains character not accepted in Host header
258
259 // return the header field-value
260 SBufToCstring(header, p);
261 debugs(25, 5, "returning " << header);
262 return header;
263 }
264
265 return nullptr;
266}
267
268int
273
275namespace Http::One {
276static void
278{
279 const auto count = tok.skipAll(bwsChars);
280
281 if (tok.atEnd())
282 throw InsufficientInput(); // even if count is positive
283
284 if (count) {
285 // Generating BWS is a MUST-level violation so warn about it as needed.
286 debugs(33, ErrorLevel(), "found " << count << " BWS octets");
287 // RFC 7230 says we MUST parse BWS, so we fall through even if
288 // Config.onoff.relaxed_header_parser is off.
289 }
290 // else we successfully "parsed" an empty BWS sequence
291
292 // success: no more BWS characters expected
293}
294} // namespace Http::One
295
296void
301
302void
307
void SBufToCstring(char *d, const SBuf &s)
Definition SBuf.h:756
class SquidConfig Config
optimized set of C chars, with quick membership test and merge support
CharacterSet complement(const char *complementLabel=nullptr) const
static const CharacterSet SP
CharacterSet & rename(const char *label)
change name; handy in const declarations that use operators
static const CharacterSet WSP
static const CharacterSet DIGIT
static const CharacterSet ALPHA
static const CharacterSet LF
static const CharacterSet CR
static const CharacterSet HTAB
char * getHostHeaderField()
Definition Parser.cc:213
AnyP::ProtocolVersion msgProtocol_
what protocol label has been found in the first line (if any)
Definition Parser.h:152
static const CharacterSet & WhitespaceCharacters()
Definition Parser.cc:52
SBuf buf_
bytes remaining to be parsed
Definition Parser.h:146
void cleanMimePrefix()
Definition Parser.cc:97
void skipLineTerminator(Tokenizer &) const
Definition Parser.cc:66
bool grabMimeBlock(const char *which, const size_t limit)
Definition Parser.cc:157
ParseState parsingStage_
what stage the parser is currently up to
Definition Parser.h:149
static const CharacterSet & DelimiterCharacters()
Definition Parser.cc:59
virtual void clear()=0
Definition Parser.cc:27
void unfoldMime()
Definition Parser.cc:132
SBuf mimeHeaderBlock_
buffer holding the mime headers (if any)
Definition Parser.h:155
::Parser::Tokenizer Tokenizer
Definition Parser.h:44
static const SBuf Http1magic
RFC 7230 section 2.6 - 7 magic octets.
Definition Parser.h:143
Definition SBuf.h:94
int caseCmp(const SBuf &S, const size_type n) const
shorthand version for case-insensitive compare()
Definition SBuf.h:287
static const size_type npos
Definition SBuf.h:100
SBuf consume(size_type n=npos)
Definition SBuf.cc:481
SBuf & chop(size_type pos, size_type n=npos)
Definition SBuf.cc:530
size_type findFirstNotOf(const CharacterSet &set, size_type startPos=0) const
Definition SBuf.cc:746
SBuf & trim(const SBuf &toRemove, bool atBeginning=true, bool atEnd=true)
Definition SBuf.cc:551
void clear()
Definition SBuf.cc:175
SBuf substr(size_type pos, size_type n=npos) const
Definition SBuf.cc:576
MemBlob::size_type size_type
Definition SBuf.h:96
struct SquidConfig::@90 onoff
int relaxed_header_parser
#define DBG_IMPORTANT
Definition Stream.h:38
#define debugs(SECTION, LEVEL, CONTENT)
Definition Stream.h:192
static const CharacterSet & RelaxedDelimiterCharacters()
characters HTTP permits tolerant parsers to accept as delimiters
Definition Parser.cc:37
static const CharacterSet & LineCharacters()
all characters except the LF line terminator
Definition Parser.cc:76
#define GET_HDR_SZ
Definition Parser.cc:208
size_t headersEnd(const char *mime, size_t l, bool &containsObsFold)
@ PROTO_ICY
@ PROTO_HTTP
common part of ParseBws() and ParseStrctBws()
Definition forward.h:17
void ParseBws(Parser::Tokenizer &)
Definition Parser.cc:297
@ HTTP_PARSE_DONE
parsed a message header, or reached a terminal syntax error
Definition Parser.h:29
@ HTTP_PARSE_NONE
initialized, but nothing usefully parsed yet
Definition Parser.h:23
static void ParseBws_(Parser::Tokenizer &tok, const CharacterSet &bwsChars)
Definition Parser.cc:277
const SBuf & CrLf()
CRLF textual representation.
Definition Parser.cc:20
void ParseStrictBws(Parser::Tokenizer &)
Definition Parser.cc:303
int ErrorLevel()
the right debugs() level for logging HTTP violation messages
Definition Parser.cc:269
@ scHeaderTooLarge
Header too large to process.
Definition StatusCode.h:89
#define LOCAL_ARRAY(type, name, size)
Definition squid.h:62
Definition parse.c:160