This commit is contained in:
Colton Clemmer 2023-09-03 10:37:03 -05:00
parent d6873ad05d
commit 3c72fc867c
2 changed files with 66 additions and 58 deletions

View file

@ -116,23 +116,26 @@ bool startsWithPort(QStringView string)
// For unicode ranges see: https://unicode.org/charts/
using UnicodeRange = std::pair<ushort, ushort>;
std::vector<UnicodeRange> emojiRanges = {
{U'\U00002700', U'\U000027BF' }, // Dingbats
{U'\U00001F60', U'\U0001F64F' }, // Emoticons
{U'\U00002600', U'\U000026FF' }, // Miscellaneous Symbols
{U'\U00001F30', U'\U0001F5FF' }, // Miscellaneous Symbols and Pictographs
{U'\U00001F90', U'\U0001F9FF' }, // Supplemental Symbols and Pictographs
{U'\U00002700', U'\U000027BF'}, // Dingbats
{U'\U00001F60', U'\U0001F64F'}, // Emoticons
{U'\U00002600', U'\U000026FF'}, // Miscellaneous Symbols
{U'\U00001F30', U'\U0001F5FF'}, // Miscellaneous Symbols and Pictographs
{U'\U00001F90', U'\U0001F9FF'}, // Supplemental Symbols and Pictographs
};
std::vector<UnicodeRange> alphaNumeric = {
{ U'\u0041', U'\u005A' }, // Upper alphabet
{ U'\u0061', U'\u007A' }, //Lower alphabet
{ U'\u0030', U'\u0039' }, // Numbers
{U'\u0041', U'\u005A'}, // Upper alphabet
{U'\u0061', U'\u007A'}, //Lower alphabet
{U'\u0030', U'\u0039'}, // Numbers
};
bool isInUnicodeRange(const QChar& ch, std::vector<UnicodeRange> ranges) {
bool isInUnicodeRange(const QChar &ch, std::vector<UnicodeRange> ranges)
{
ushort unicodeValue = ch.unicode();
for (const auto& range : ranges) {
if (unicodeValue >= range.first && unicodeValue <= range.second) {
for (const auto &range : ranges)
{
if (unicodeValue >= range.first && unicodeValue <= range.second)
{
return true;
}
}
@ -147,7 +150,8 @@ QString sanitizeUrl(const QString &unparsedString)
QString sanitizedUrl;
for (const QChar &c : unparsedString)
{
if (isInUnicodeRange(c, alphaNumeric) || isInUnicodeRange(c, emojiRanges))
if (isInUnicodeRange(c, alphaNumeric) ||
isInUnicodeRange(c, emojiRanges))
{
sanitizedUrl.append(c);
continue;

View file

@ -34,14 +34,16 @@ struct SanitizeCheck {
TEST(LinkParser, parseDomainLinks)
{
const QList<SanitizeCheck> sanitizeCases = {
{ "TW❘TCH.tv", "TW❘TCH.tv" "" }, // contains dingbat
{"(twitch.tv/foo)", "twitch.tv", "/foo" },
{"t🤪w🤪i🤪t🤪c🤪h🤪.tv/foo", "t🤪w🤪i🤪t🤪c🤪h🤪.tv", "/foo" },
{ "https://🏹.to/bar", "🏹.to", "/bar" },
{ "😀.com/baz", "😀.com", "/baz" }, // Emoticon
{ "❀.com/baz", "❀.com", "/baz" }, // Dingbat
{ "⛑.com/baz", "⛑.com", "/baz" }, // Misc Symbol
{ "🍀.com/baz", "🍀.com", "/baz" }, // Pictograph
{"TW❘TCH.tv", "TW❘TCH.tv"
""}, // contains dingbat
{"(twitch.tv/foo)", "twitch.tv", "/foo"},
{"t🤪w🤪i🤪t🤪c🤪h🤪.tv/foo",
"t🤪w🤪i🤪t🤪c🤪h🤪.tv", "/foo"},
{"https://🏹.to/bar", "🏹.to", "/bar"},
{"😀.com/baz", "😀.com", "/baz"}, // Emoticon
{"❀.com/baz", "❀.com", "/baz"}, // Dingbat
{"⛑.com/baz", "⛑.com", "/baz"}, // Misc Symbol
{"🍀.com/baz", "🍀.com", "/baz"}, // Pictograph
};
for (auto &c : sanitizeCases)
@ -50,7 +52,8 @@ TEST(LinkParser, parseDomainLinks)
ASSERT_TRUE(p.result().has_value()) << c.testValue.toStdString();
const auto &r = *p.result();
std::ostringstream ss;
ss << "Expected: " << c.expectedHost.toStdString() << "\nResult: " << r.host.toString().toStdString();
ss << "Expected: " << c.expectedHost.toStdString()
<< "\nResult: " << r.host.toString().toStdString();
ASSERT_EQ(c.expectedHost, r.host) << ss.str();
ASSERT_EQ(c.expectedRest, r.rest) << c.expectedRest.toStdString();
}
@ -160,43 +163,44 @@ TEST(LinkParser, doesntParseInvalidIpv4Links)
TEST(LinkParser, doesntParseInvalidLinks)
{
const QStringList inputs = {"h://foo.com",
"spotify:1234",
"ftp://chatterino.com",
"ftps://chatterino.com",
"spotify://chatterino.com",
"httpsx://chatterino.com",
"https:chatterino.com",
"https:/chatterino.com",
"http:/chatterino.com",
"htp://chatterino.com",
"/chatterino.com",
"word",
".",
"/",
"#",
":",
"?",
"a",
"://chatterino.com",
"//chatterino.com",
"http://pn.",
"http://pn./",
"https://pn./",
"pn./",
"pn.",
"http/chatterino.com",
"http/wiki.chatterino.com",
"http:cat.com",
"https:cat.com",
"http:/cat.com",
"http:/cat.com",
"https:/cat.com",
"%%%%.com",
"*.com",
"t🤪w🤪i🤪t🤪c🤪h🤪.🤪t🤪v/foo", // Invalid tld
"https։TW❘TCH.tv/ab" // misleading characters: "" and "։"
};
const QStringList inputs = {
"h://foo.com",
"spotify:1234",
"ftp://chatterino.com",
"ftps://chatterino.com",
"spotify://chatterino.com",
"httpsx://chatterino.com",
"https:chatterino.com",
"https:/chatterino.com",
"http:/chatterino.com",
"htp://chatterino.com",
"/chatterino.com",
"word",
".",
"/",
"#",
":",
"?",
"a",
"://chatterino.com",
"//chatterino.com",
"http://pn.",
"http://pn./",
"https://pn./",
"pn./",
"pn.",
"http/chatterino.com",
"http/wiki.chatterino.com",
"http:cat.com",
"https:cat.com",
"http:/cat.com",
"http:/cat.com",
"https:/cat.com",
"%%%%.com",
"*.com",
"t🤪w🤪i🤪t🤪c🤪h🤪.🤪t🤪v/foo", // Invalid tld
"https։TW❘TCH.tv/ab" // misleading characters: "" and "։"
};
for (const auto &input : inputs)
{