23 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
24 "abcdefghijklmnopqrstuvwxyz"
29 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
30 "abcdefghijklmnopqrstuvwxyz"
47 static const auto userInfoValid =
CharacterSet(
"userinfo",
":-._~%!$&'()*+,;=") +
66 static const auto pathValid =
CharacterSet(
"path",
"/:@-._~%!$&'()*+,;=") +
89 output.
append(goodSection);
94 output.
appendf(
"%%%02X",
static_cast<unsigned int>(
static_cast<unsigned char>(ch)));
97 if (tk.
prefix(goodSection, ignore))
98 output.
append(goodSection);
109 while (!
tok.atEnd()) {
112 if (
tok.prefix(token, unencodedChars))
117 const auto rawBytesAfterPercent =
tok.remaining();
118 int64_t hex1 = 0, hex2 = 0;
119 if (
tok.int64(hex1, 16,
false, 1) &&
tok.int64(hex2, 16,
false, 1)) {
120 output.
append(
static_cast<char>((hex1 << 4) | hex2));
123 debugs(23, 3,
"invalid pct-encoding sequence starting at %" << rawBytesAfterPercent);
134 if (
const auto decoded = Decode(input))
142 static SBuf star(
"*");
149 static SBuf slash(
"/");
156 hostAddr_.fromHost(src);
157 if (hostAddr_.isAnyAddr()) {
158 xstrncpy(host_, src,
sizeof(host_));
159 hostIsNumeric_ =
false;
161 hostAddr_.toHostStr(host_,
sizeof(host_));
162 debugs(23, 3,
"given IP: " << hostAddr_);
172 if (hostIsNumeric()) {
174 const auto hostStrLen = hostIP().toHostStr(ip,
sizeof(ip));
175 return SBuf(ip, hostStrLen);
180std::optional<AnyP::Host>
189 const SBuf regName(host());
192 debugs(23, 3,
"rejecting percent-encoded reg-name: " << regName);
214 debugs(23, 5,
"urlInitialize: Initializing...");
302 const uint64_t dlen = strlen(host);
305 debugs(23, 2,
"URL domain too large (" << dlen <<
" bytes)");
340 foundHost[0] = urlpath[0] = login[0] =
'\0';
348 Asterisk().cmp(rawUrl) == 0) {
351 port(getScheme().defaultPort());
363 const auto rawHost = parseHost(
tok);
364 Assure(rawHost.length() <
sizeof(foundHost));
369 foundPort = parsePort(
tok);
371 if (!
tok.remaining().isEmpty())
386 static const SBuf doubleSlash(
"//");
387 if (!
tok.skip(doubleSlash))
390 auto B =
tok.remaining();
391 const char *url = B.c_str();
401 for (dst = foundHost; i < l && *src !=
'/' && *src !=
'?' && *src !=
'#' && *src !=
'\0' && !
xisspace(*src); ++i, ++src, ++dst) {
423 for (; i < l && *src !=
'\r' && *src !=
'\n' && *src !=
'\0'; ++i, ++src, ++dst) {
439 t = strrchr(foundHost,
'@');
441 strncpy((
char *) login, (
char *) foundHost,
sizeof(login)-1);
442 login[
sizeof(login)-1] =
'\0';
443 t = strrchr(login,
'@');
445 strncpy((
char *) foundHost, t + 1,
sizeof(foundHost)-1);
446 foundHost[
sizeof(foundHost)-1] =
'\0';
452 if (*foundHost ==
'[') {
458 l = strlen(foundHost);
460 for (; i < l && *src !=
']' && *src !=
'\0'; ++i, ++src, ++dst) {
469 while (*dst !=
'\0' && *dst !=
':')
473 t = strrchr(foundHost,
':');
475 if (t != strchr(foundHost,
':') ) {
485 debugs(23,
DBG_IMPORTANT,
"SECURITY ALERT: Missing hostname in URL '" << url <<
"'. see access.log for details.");
489 if (t && *t ==
':') {
496 for (t = foundHost; *t; ++t)
513 debugs(23, 3,
"Split URL '" << rawUrl <<
"' into proto='" << scheme.
image() <<
"', host='" << foundHost <<
"', port='" << foundPort <<
"', path='" << urlpath <<
"'");
525 while ((l = strlen(foundHost)) > 0 && foundHost[--l] ==
'.')
529 if (strstr(foundHost,
"..") || *foundHost ==
'.') {
534 if (foundPort < 1 || foundPort > 65535) {
535 debugs(23, 3,
"Invalid port '" << foundPort <<
"'");
540 debugs(23, 2,
"URI has whitespace: {" << rawUrl <<
"}");
556 *(urlpath + strcspn(urlpath,
w_space)) =
'\0';
576 userInfo(
SBuf(login));
606 if (!
tok.prefix(nid, nidChars, 32))
615 if (!alphanum[*nid.
begin()])
618 if (!alphanum[*nid.
rbegin()])
624 path(
tok.remaining());
625 debugs(23, 3,
"Split URI into proto=urn, nid=" << nid <<
", " <<
Raw(
"path",path().rawContent(),path().length()));
650 if (!
tok.prefix(ipv6ish, IPv6chars))
651 throw TextException(
"malformed or unsupported bracketed IP address in uri-host",
Here());
654 throw TextException(
"IPv6 address is missing a closing bracket in uri-host",
Here());
658 throw TextException(
"bracketed IPv6 address is missing a colon in uri-host",
Here());
696 if (!
tok.int64(rawPort, 10,
false))
701 constexpr auto portStorageMax = std::numeric_limits<Port::value_type>::max();
702 static_assert(!
Less(portStorageMax, portMax),
"Port type can represent the maximum valid port number");
703 if (
Less(portMax, rawPort))
708 return NaturalCast<int>(rawPort);
715 authorityHttp_.clear();
716 authorityWithPort_.clear();
717 absolutePath_.clear();
723 if (authorityHttp_.isEmpty()) {
726 authorityWithPort_.
append(host());
727 authorityHttp_ = authorityWithPort_;
729 if (
port().has_value()) {
732 if (
port() != getScheme().defaultPort())
733 authorityHttp_ = authorityWithPort_;
739 return requirePort ? authorityWithPort_ : authorityHttp_;
745 if (absolute_.isEmpty()) {
749 absolute_.append(getScheme().image());
750 absolute_.append(
":",1);
752 absolute_.append(
"//", 2);
756 if (allowUserInfo && !userInfo().isEmpty()) {
759 .
rename(
"userinfo-reserved");
760 absolute_.append(Encode(userInfo(), uiChars));
761 absolute_.append(
"@", 1);
763 absolute_.append(authority());
765 absolute_.append(host());
766 absolute_.append(
":", 1);
768 absolute_.append(absolutePath());
777 if (absolutePath_.isEmpty()) {
779 absolutePath_ = Encode(path(),
PathChars());
782 return absolutePath_;
795 buf[
sizeof(buf)-1] =
'\0';
801 if (
auto t = strchr(buf,
'?')) {
870 for (
const auto *p = url; *p !=
'\0' && *p !=
'/' && *p !=
'?' && *p !=
'#'; ++p) {
887 const auto lastSlashPos = path_.rfind(
'/');
889 const auto relUrlLength = strlen(relUrl);
892 path_.reserveCapacity(1 + relUrlLength);
893 path_.assign(
"/", 1);
896 path_.reserveCapacity(lastSlashPos + 1 + relUrlLength);
897 path_.chop(0, lastSlashPos+1);
899 path_.append(relUrl, relUrlLength);
908 const bool hostIncludesSubdomains = (*h ==
'.');
926 if (hl == 0 && dl == 0) {
943 if (1 == dl &&
'.' == d[0])
959 while(--hl >= 0 && h[hl] !=
'.');
964 return hostIncludesSubdomains ? 1 : 0;
1048#if USE_OPENSSL || HAVE_LIBGNUTLS
1069 hostIsNumeric_(false)
1078 char *cleanedUri =
nullptr;
1091 const auto pos = strcspn(uri,
w_space);
1092 char *choppedUri =
nullptr;
1093 if (pos < strlen(uri))
1094 choppedUri =
xstrndup(uri, pos + 1);
1097 cleanedUri[pos] =
'\0';
1107 char *tmp_uri =
static_cast<char*
>(
xmalloc(strlen(uri) + 1));
#define Assure(condition)
#define Here()
source code location of the caller
void SBufToCstring(char *d, const SBuf &s)
#define SQUIDSBUFPRINT(s)
constexpr bool Less(const A a, const B b)
whether integer a is less than integer b, with correct overflow handling
int stringHasWhitespace(const char *)
int stringHasCntl(const char *)
std::ostream & CurrentException(std::ostream &os)
prints active (i.e., thrown but not yet handled) exception
bool urlCheckRequest(const HttpRequest *r)
static const char valid_hostname_chars[]
static const char valid_hostname_chars_u[]
bool urlIsRelative(const char *url)
static const CharacterSet & PathChars()
Characters which are valid within a URI path section.
int matchDomainName(const char *h, const char *d, MatchDomainNameFlags flags)
char * urlCanonicalCleanWithoutRequest(const SBuf &url, const HttpRequestMethod &method, const AnyP::UriScheme &scheme)
static AnyP::UriScheme uriParseScheme(Parser::Tokenizer &tok)
static const CharacterSet & UserInfoChars()
Characters which are valid within a URI userinfo section.
const char * urlCanonicalFakeHttps(const HttpRequest *request)
bool urlAppendDomain(char *host)
apply append_domain config to the given hostname
static std::optional< Host > ParseIp(const Ip::Address &)
converts an already parsed IP address to a Host object
static std::optional< Host > ParseSimpleDomainName(const SBuf &)
static AnyP::ProtocolType FindProtocolType(const SBuf &)
static const SBuf & SlashPath()
the static '/' default URL-path
SBuf parseHost(Parser::Tokenizer &) const
void parseUrn(Parser::Tokenizer &)
AnyP::UriScheme const & getScheme() const
void touch()
clear the cached URI display forms
SBuf & authority(bool requirePort=false) const
static SBuf DecodeOrDupe(const SBuf &input)
char host_[SQUIDHOSTNAMELEN]
string representation of the URI authority name or IP
const char * host(void) const
SBuf & absolutePath() const
RFC 3986 section 4.2 relative reference called 'absolute-path'.
static std::optional< SBuf > Decode(const SBuf &)
std::optional< Host > parsedHost() const
static char * cleanup(const char *uri)
void addRelativePath(const char *relUrl)
int parsePort(Parser::Tokenizer &) const
static const SBuf & Asterisk()
the static '*' pseudo-URI
void port(const Port p)
reset authority port subcomponent
const SBuf & path() const
void host(const char *src)
bool parse(const HttpRequestMethod &, const SBuf &url)
static SBuf Encode(const SBuf &, const CharacterSet &expected)
optimized set of C chars, with quick membership test and merge support
CharacterSet complement(const char *complementLabel=nullptr) const
static const CharacterSet TCHAR
CharacterSet & rename(const char *label)
change name; handy in const declarations that use operators
static const CharacterSet DIGIT
static const CharacterSet ALPHA
static const CharacterSet HEXDIG
CharacterSet & remove(const unsigned char c)
remove a given character from the character set
char * canonicalCleanUrl() const
AnyP::Uri url
the request URI
bool fromHost(const char *hostWithoutPort)
bool prefix(SBuf &returnedToken, const CharacterSet &tokenChars, SBuf::size_type limit=SBuf::npos)
const SBuf & remaining() const
the remaining unprocessed section of buffer
bool atEnd() const
whether the end of the buffer has been reached
bool skip(const SBuf &tokenToSkip)
const char * rawContent() const
static const size_type npos
char at(size_type pos) const
void reserveCapacity(size_type minCapacity)
size_type length() const
Returns the number of bytes stored in SBuf.
SBuf & appendf(const char *fmt,...) PRINTF_FORMAT_ARG2
size_type find(char c, size_type startPos=0) const
const_iterator begin() const
SBuf & append(const SBuf &S)
const_reverse_iterator rbegin() const
void reserveSpace(size_type minSpace)
struct SquidConfig::@90 onoff
an std::runtime_error with thrower location info
#define debugs(SECTION, LEVEL, CONTENT)
#define URI_WHITESPACE_CHOP
#define URI_WHITESPACE_STRIP
#define URI_WHITESPACE_DENY
#define URI_WHITESPACE_ALLOW
#define URI_WHITESPACE_ENCODE
#define MAX_IPSTRLEN
Length of buffer that needs to be allocated to old a null-terminated IP-string.
const char * ProtocolType_str[]
uint16_t KnownPort
validated/supported port number; these values are never zero
#define RFC1738_ESCAPE_NOSPACE
char * rfc1738_do_escape(const char *url, int flags)
#define RFC1738_ESCAPE_UNESCAPED
#define rfc1738_escape_unescaped(x)
void rfc1738_unescape(char *url)
#define LOCAL_ARRAY(type, name, size)
char * xstrncpy(char *dst, const char *src, size_t n)
char * xstrndup(const char *s, size_t n)