Squid Web Cache master
Loading...
Searching...
No Matches
CharacterSet.cc
Go to the documentation of this file.
1/*
2 * Copyright (C) 1996-2025 The Squid Software Foundation and contributors
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9#include "squid.h"
10#include "base/CharacterSet.h"
11
12#include <algorithm>
13#include <iostream>
14#include <functional>
15
18{
19 Storage::const_iterator s = src.chars_.begin();
20 const Storage::const_iterator e = src.chars_.end();
21 Storage::iterator d = chars_.begin();
22 while (s != e) {
23 if (*s)
24 *d = 1;
25 ++s;
26 ++d;
27 }
28 return *this;
29}
30
33{
34 Storage::const_iterator s = src.chars_.begin();
35 const Storage::const_iterator e = src.chars_.end();
36 Storage::iterator d = chars_.begin();
37 while (s != e) {
38 if (*s)
39 *d = 0;
40 ++s;
41 ++d;
42 }
43 return *this;
44}
45
47CharacterSet::add(const unsigned char c)
48{
49 chars_[static_cast<uint8_t>(c)] = 1;
50 return *this;
51}
52
54CharacterSet::remove(const unsigned char c)
55{
56 chars_[static_cast<uint8_t>(c)] = 0;
57 return *this;
58}
59
61CharacterSet::addRange(unsigned char low, unsigned char high)
62{
63 //manual loop splitting is needed to cover case where high is 255
64 // otherwise low will wrap, resulting in infinite loop
65 while (low < high) {
66 chars_[static_cast<uint8_t>(low)] = 1;
67 ++low;
68 }
69 chars_[static_cast<uint8_t>(high)] = 1;
70 return *this;
71}
72
74CharacterSet::complement(const char *label) const
75{
76 CharacterSet result((label ? label : "complement_of_some_other_set"), "");
77 // negate each of our elements and add them to the result storage
78 std::transform(chars_.begin(), chars_.end(), result.chars_.begin(),
79 std::logical_not<Storage::value_type>());
80 return result;
81}
82
83CharacterSet::CharacterSet(const char *label, const char * const c) :
84 name(label ? label: "anonymous"),
85 chars_(Storage(256,0))
86{
87 const size_t clen = strlen(c);
88 for (size_t i = 0; i < clen; ++i)
89 add(c[i]);
90}
91
92CharacterSet::CharacterSet(const char *label, unsigned char low, unsigned char high) :
93 name(label ? label: "anonymous"),
94 chars_(Storage(256,0))
95{
96 addRange(low,high);
97}
98
99CharacterSet::CharacterSet(const char *label, std::initializer_list<std::pair<uint8_t, uint8_t>> ranges) :
100 name(label ? label: "anonymous"),
101 chars_(Storage(256,0))
102{
103 for (auto range: ranges)
104 addRange(range.first, range.second);
105}
106
107void
108CharacterSet::printChars(std::ostream &os) const
109{
110 for (size_t idx = 0; idx < 256; ++idx) {
111 if (chars_[idx])
112 os << static_cast<char>(idx);
113 }
114}
115
118{
119 lhs += rhs;
120 return lhs;
121}
122
125{
126 lhs -= rhs;
127 return lhs;
128}
129
130std::ostream&
131operator <<(std::ostream &s, const CharacterSet &c)
132{
133 s << "CharacterSet(" << c.name << ')';
134 return s;
135}
136
137const CharacterSet
138// RFC 5234
139CharacterSet::ALPHA("ALPHA", "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"),
140 CharacterSet::BIT("BIT","01"),
141 CharacterSet::CR("CR","\r"),
142CharacterSet::CTL("CTL", {{0x01,0x1f},{0x7f,0x7f}}),
143CharacterSet::DIGIT("DIGIT","0123456789"),
144CharacterSet::DQUOTE("DQUOTE","\""),
145CharacterSet::HEXDIG("HEXDIG","0123456789aAbBcCdDeEfF"),
146CharacterSet::HTAB("HTAB","\t"),
147CharacterSet::LF("LF","\n"),
148CharacterSet::SP("SP"," "),
149CharacterSet::VCHAR("VCHAR", 0x21, 0x7e),
150// RFC 7230
151CharacterSet::WSP("WSP"," \t"),
152CharacterSet::CTEXT("ctext", {{0x09,0x09},{0x20,0x20},{0x2a,0x5b},{0x5d,0x7e},{0x80,0xff}}),
153CharacterSet::TCHAR("TCHAR","!#$%&'*+-.^_`|~0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"),
154CharacterSet::SPECIAL("SPECIAL","()<>@,;:\\\"/[]?={}"),
155CharacterSet::QDTEXT("QDTEXT", {{0x09,0x09},{0x20,0x21},{0x23,0x5b},{0x5d,0x7e},{0x80,0xff}}),
156CharacterSet::OBSTEXT("OBSTEXT",0x80,0xff),
157// RFC 7232
158CharacterSet::ETAGC("ETAGC", {{0x21,0x21},{0x23,0x7e},{0x80,0xff}}),
159// RFC 7235
160CharacterSet::TOKEN68C("TOKEN68C","-._~+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
161;
162
163const CharacterSet &
165{
166 // RFC 3986: unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
167 static const auto chars = new CharacterSet("RFC3986_UNRESERVED", "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-._~");
168 return *chars;
169}
170
CharacterSet operator+(CharacterSet lhs, const CharacterSet &rhs)
CharacterSet operator-(CharacterSet lhs, const CharacterSet &rhs)
std::ostream & operator<<(std::ostream &s, const CharacterSet &c)
optimized set of C chars, with quick membership test and merge support
static const CharacterSet TOKEN68C
CharacterSet complement(const char *complementLabel=nullptr) const
std::vector< uint8_t > Storage
static const CharacterSet TCHAR
static const CharacterSet ETAGC
static const CharacterSet SP
CharacterSet & operator-=(const CharacterSet &rhs)
set subtraction: remove all characters that are also in rhs
CharacterSet & addRange(unsigned char low, unsigned char high)
add a list of character ranges, expressed as pairs [low,high], including both ends
CharacterSet & add(const unsigned char c)
add a given character to the character set
static const CharacterSet CTEXT
static const CharacterSet WSP
static const CharacterSet DQUOTE
static const CharacterSet DIGIT
CharacterSet(const char *label="anonymous", const char *const chars="")
a character set with a given label and contents
CharacterSet & operator+=(const CharacterSet &rhs)
set addition: add to this set all characters that are also in rhs
static const CharacterSet ALPHA
static const CharacterSet QDTEXT
static const CharacterSet HEXDIG
static const CharacterSet VCHAR
static const CharacterSet LF
const char * name
optional set label for debugging (default: "anonymous")
static const CharacterSet SPECIAL
CharacterSet & remove(const unsigned char c)
remove a given character from the character set
static const CharacterSet CR
static const CharacterSet & RFC3986_UNRESERVED()
allowed URI characters that do not have a reserved purpose, RFC 3986
static const CharacterSet OBSTEXT
static const CharacterSet CTL
static const CharacterSet HTAB
static const CharacterSet BIT
void printChars(std::ostream &os) const
prints all chars in arbitrary order, without any quoting/escaping