This commit is contained in:
Colton Clemmer 2023-09-03 10:37:03 -05:00
parent d6873ad05d
commit 3c72fc867c
2 changed files with 66 additions and 58 deletions

View file

@ -116,23 +116,26 @@ bool startsWithPort(QStringView string)
// For unicode ranges see: https://unicode.org/charts/ // For unicode ranges see: https://unicode.org/charts/
using UnicodeRange = std::pair<ushort, ushort>; using UnicodeRange = std::pair<ushort, ushort>;
std::vector<UnicodeRange> emojiRanges = { std::vector<UnicodeRange> emojiRanges = {
{U'\U00002700', U'\U000027BF' }, // Dingbats {U'\U00002700', U'\U000027BF'}, // Dingbats
{U'\U00001F60', U'\U0001F64F' }, // Emoticons {U'\U00001F60', U'\U0001F64F'}, // Emoticons
{U'\U00002600', U'\U000026FF' }, // Miscellaneous Symbols {U'\U00002600', U'\U000026FF'}, // Miscellaneous Symbols
{U'\U00001F30', U'\U0001F5FF' }, // Miscellaneous Symbols and Pictographs {U'\U00001F30', U'\U0001F5FF'}, // Miscellaneous Symbols and Pictographs
{U'\U00001F90', U'\U0001F9FF' }, // Supplemental Symbols and Pictographs {U'\U00001F90', U'\U0001F9FF'}, // Supplemental Symbols and Pictographs
}; };
std::vector<UnicodeRange> alphaNumeric = { std::vector<UnicodeRange> alphaNumeric = {
{ U'\u0041', U'\u005A' }, // Upper alphabet {U'\u0041', U'\u005A'}, // Upper alphabet
{ U'\u0061', U'\u007A' }, //Lower alphabet {U'\u0061', U'\u007A'}, //Lower alphabet
{ U'\u0030', U'\u0039' }, // Numbers {U'\u0030', U'\u0039'}, // Numbers
}; };
bool isInUnicodeRange(const QChar& ch, std::vector<UnicodeRange> ranges) { bool isInUnicodeRange(const QChar &ch, std::vector<UnicodeRange> ranges)
{
ushort unicodeValue = ch.unicode(); ushort unicodeValue = ch.unicode();
for (const auto& range : ranges) { for (const auto &range : ranges)
if (unicodeValue >= range.first && unicodeValue <= range.second) { {
if (unicodeValue >= range.first && unicodeValue <= range.second)
{
return true; return true;
} }
} }
@ -147,7 +150,8 @@ QString sanitizeUrl(const QString &unparsedString)
QString sanitizedUrl; QString sanitizedUrl;
for (const QChar &c : unparsedString) for (const QChar &c : unparsedString)
{ {
if (isInUnicodeRange(c, alphaNumeric) || isInUnicodeRange(c, emojiRanges)) if (isInUnicodeRange(c, alphaNumeric) ||
isInUnicodeRange(c, emojiRanges))
{ {
sanitizedUrl.append(c); sanitizedUrl.append(c);
continue; continue;

View file

@ -34,14 +34,16 @@ struct SanitizeCheck {
TEST(LinkParser, parseDomainLinks) TEST(LinkParser, parseDomainLinks)
{ {
const QList<SanitizeCheck> sanitizeCases = { const QList<SanitizeCheck> sanitizeCases = {
{ "TW❘TCH.tv", "TW❘TCH.tv" "" }, // contains dingbat {"TW❘TCH.tv", "TW❘TCH.tv"
{"(twitch.tv/foo)", "twitch.tv", "/foo" }, ""}, // contains dingbat
{"t🤪w🤪i🤪t🤪c🤪h🤪.tv/foo", "t🤪w🤪i🤪t🤪c🤪h🤪.tv", "/foo" }, {"(twitch.tv/foo)", "twitch.tv", "/foo"},
{ "https://🏹.to/bar", "🏹.to", "/bar" }, {"t🤪w🤪i🤪t🤪c🤪h🤪.tv/foo",
{ "😀.com/baz", "😀.com", "/baz" }, // Emoticon "t🤪w🤪i🤪t🤪c🤪h🤪.tv", "/foo"},
{ "❀.com/baz", "❀.com", "/baz" }, // Dingbat {"https://🏹.to/bar", "🏹.to", "/bar"},
{ "⛑.com/baz", "⛑.com", "/baz" }, // Misc Symbol {"😀.com/baz", "😀.com", "/baz"}, // Emoticon
{ "🍀.com/baz", "🍀.com", "/baz" }, // Pictograph {"❀.com/baz", "❀.com", "/baz"}, // Dingbat
{"⛑.com/baz", "⛑.com", "/baz"}, // Misc Symbol
{"🍀.com/baz", "🍀.com", "/baz"}, // Pictograph
}; };
for (auto &c : sanitizeCases) for (auto &c : sanitizeCases)
@ -50,7 +52,8 @@ TEST(LinkParser, parseDomainLinks)
ASSERT_TRUE(p.result().has_value()) << c.testValue.toStdString(); ASSERT_TRUE(p.result().has_value()) << c.testValue.toStdString();
const auto &r = *p.result(); const auto &r = *p.result();
std::ostringstream ss; std::ostringstream ss;
ss << "Expected: " << c.expectedHost.toStdString() << "\nResult: " << r.host.toString().toStdString(); ss << "Expected: " << c.expectedHost.toStdString()
<< "\nResult: " << r.host.toString().toStdString();
ASSERT_EQ(c.expectedHost, r.host) << ss.str(); ASSERT_EQ(c.expectedHost, r.host) << ss.str();
ASSERT_EQ(c.expectedRest, r.rest) << c.expectedRest.toStdString(); ASSERT_EQ(c.expectedRest, r.rest) << c.expectedRest.toStdString();
} }
@ -160,43 +163,44 @@ TEST(LinkParser, doesntParseInvalidIpv4Links)
TEST(LinkParser, doesntParseInvalidLinks) TEST(LinkParser, doesntParseInvalidLinks)
{ {
const QStringList inputs = {"h://foo.com", const QStringList inputs = {
"spotify:1234", "h://foo.com",
"ftp://chatterino.com", "spotify:1234",
"ftps://chatterino.com", "ftp://chatterino.com",
"spotify://chatterino.com", "ftps://chatterino.com",
"httpsx://chatterino.com", "spotify://chatterino.com",
"https:chatterino.com", "httpsx://chatterino.com",
"https:/chatterino.com", "https:chatterino.com",
"http:/chatterino.com", "https:/chatterino.com",
"htp://chatterino.com", "http:/chatterino.com",
"/chatterino.com", "htp://chatterino.com",
"word", "/chatterino.com",
".", "word",
"/", ".",
"#", "/",
":", "#",
"?", ":",
"a", "?",
"://chatterino.com", "a",
"//chatterino.com", "://chatterino.com",
"http://pn.", "//chatterino.com",
"http://pn./", "http://pn.",
"https://pn./", "http://pn./",
"pn./", "https://pn./",
"pn.", "pn./",
"http/chatterino.com", "pn.",
"http/wiki.chatterino.com", "http/chatterino.com",
"http:cat.com", "http/wiki.chatterino.com",
"https:cat.com", "http:cat.com",
"http:/cat.com", "https:cat.com",
"http:/cat.com", "http:/cat.com",
"https:/cat.com", "http:/cat.com",
"%%%%.com", "https:/cat.com",
"*.com", "%%%%.com",
"t🤪w🤪i🤪t🤪c🤪h🤪.🤪t🤪v/foo", // Invalid tld "*.com",
"https։TW❘TCH.tv/ab" // misleading characters: "" and "։" "t🤪w🤪i🤪t🤪c🤪h🤪.🤪t🤪v/foo", // Invalid tld
}; "https։TW❘TCH.tv/ab" // misleading characters: "" and "։"
};
for (const auto &input : inputs) for (const auto &input : inputs)
{ {