From 4a7a5b09ceebea0da3dcf279d6ec52194e636f82 Mon Sep 17 00:00:00 2001 From: nerix Date: Sat, 20 Jul 2024 12:06:23 +0200 Subject: [PATCH] fix: disallow more characters in links (#5509) --- CHANGELOG.md | 1 + src/common/LinkParser.cpp | 15 +++++++++++++++ tests/src/LinkParser.cpp | 32 +++++++++++++++++++++++++++----- 3 files changed, 43 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f3d0e1f0f..f8b104bcb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,6 +32,7 @@ - Bugfix: Fixed user info card popups adding duplicate line to log files. (#5499) - Bugfix: Fixed `/clearmessages` not working with more than one window. (#5489) - Bugfix: Fixed splits staying paused after unfocusing Chatterino in certain configurations. (#5504) +- Bugfix: Links with invalid characters in the domain are no longer detected. (#5509) - Dev: Update Windows build from Qt 6.5.0 to Qt 6.7.1. (#5420) - Dev: Update vcpkg build Qt from 6.5.0 to 6.7.0, boost from 1.83.0 to 1.85.0, openssl from 3.1.3 to 3.3.0. (#5422) - Dev: Unsingletonize `ISoundController`. (#5462) diff --git a/src/common/LinkParser.cpp b/src/common/LinkParser.cpp index ecc8fb751..eed4f97c1 100644 --- a/src/common/LinkParser.cpp +++ b/src/common/LinkParser.cpp @@ -148,6 +148,16 @@ void strip(QStringView &source) } } +/// @brief Checks if @a c is valid in a domain +/// +/// Valid characters are 0-9, A-Z, a-z, '-', '_', and '.' (like in GFM) +/// and all non-ASCII characters (unlike in GFM). +Q_ALWAYS_INLINE bool isValidDomainChar(char16_t c) +{ + return c >= 0x80 || (u'0' <= c && c <= u'9') || (u'A' <= c && c <= u'Z') || + (u'a' <= c && c <= u'z') || c == u'_' || c == u'-' || c == u'.'; +} + } // namespace namespace chatterino::linkparser { @@ -233,6 +243,11 @@ std::optional parse(const QString &source) noexcept rest = remaining.mid(i); break; } + + if (!isValidDomainChar(currentChar)) + { + return result; + } } if (lastWasDot || lastDotPos <= 0) diff --git a/tests/src/LinkParser.cpp b/tests/src/LinkParser.cpp index 746486c97..72748d689 100644 --- a/tests/src/LinkParser.cpp +++ b/tests/src/LinkParser.cpp @@ -1,11 +1,13 @@ #include "common/LinkParser.hpp" +#include "common/Literals.hpp" #include "Test.hpp" #include #include using namespace chatterino; +using namespace literals; struct Case { // -Wmissing-field-initializers complains otherwise @@ -91,6 +93,14 @@ TEST(LinkParser, parseDomainLinks) {"", "a.com", "?("}, {"", "a.com", "#("}, {"", "a.com", "/__my_user__"}, + {"", "a.b.c.-._.1.com", ""}, + {"", "0123456789.com", ""}, + {"", "ABCDEFGHIJKLMNOPQRSTUVWXYZ.com", ""}, + {"", "abcdefghijklmnopqrstuvwxyz.com", ""}, + // non-ASCII characters are allowed + {"", u"köln.de"_s, ""}, + {"", u"ü.com"_s, ""}, + {"", u"─.com"_s, ""}, // test case-insensitiveness {"HtTpS://", "127.0.0.1.CoM"}, {"HTTP://", "XD.CHATTERINO.COM", "/#?FOO"}, @@ -168,6 +178,7 @@ TEST(LinkParser, doesntParseInvalidIpv4Links) "196.162.8.1(())", "196.162.8.1(", "196.162.8.1(!", + "127.1.1;.com", }; for (const auto &input : inputs) @@ -219,11 +230,22 @@ TEST(LinkParser, doesntParseInvalidLinks) "~~a.com()", "https://chatterino.com>", - // invalid characters are still accepted (see #4769) - // "chatterino.com>", - // "", + "chatterino.com>", + "", + "info@example.com", + "user:pass@example.com", + ":.com", + "a:.com", + "1:.com", + "[a].com", + "`a`.com", + "{a}.com", + "a.com:pass@example.com", + "@@@.com", + "%%%.com", + "*.com", }; for (const auto &input : inputs)