mirror of
https://github.com/Chatterino/chatterino2.git
synced 2024-11-21 22:24:07 +01:00
fix: count parentheses in links (#5515)
This commit is contained in:
parent
5deec1f02f
commit
847e4f0932
4 changed files with 60 additions and 11 deletions
|
@ -22,7 +22,7 @@
|
|||
- Minor: Introduce HTTP API for plugins. (#5383, #5492, #5494)
|
||||
- Minor: Support more Firefox variants for incognito link opening. (#5503)
|
||||
- Minor: Replying to a message will now display the message being replied to. (#4350, #5519)
|
||||
- Minor: Links can now have prefixes and suffixes such as parentheses. (#5486)
|
||||
- Minor: Links can now have prefixes and suffixes such as parentheses. (#5486, #5515)
|
||||
- Bugfix: Fixed tab move animation occasionally failing to start after closing a tab. (#5426)
|
||||
- Bugfix: If a network request errors with 200 OK, Qt's error code is now reported instead of the HTTP status. (#5378)
|
||||
- Bugfix: Fixed restricted users usernames not being clickable. (#5405)
|
||||
|
|
|
@ -118,10 +118,12 @@ bool startsWithPort(QStringView string)
|
|||
/// As per https://github.github.com/gfm/#autolinks-extension-:
|
||||
///
|
||||
/// '<', '*', '_', '~', and '(' are ignored at the beginning
|
||||
/// '>', '?', '!', '.', ',', ':', '*', '~', and ')' are ignored at the end
|
||||
/// '>', '?', '!', '.', ',', ':', '*', '~', and ')' are ignored at the end.
|
||||
///
|
||||
/// A difference to GFM is that the source isn't scanned for parentheses and '_'
|
||||
/// isn't a valid suffix.
|
||||
/// A difference to GFM is that '_' isn't a valid suffix.
|
||||
///
|
||||
/// This might remove more than desired (e.g. "(a.com/(foo))" -> "a.com/(foo").
|
||||
/// Parentheses are counted after recognizing a valid IP/host.
|
||||
void strip(QStringView &source)
|
||||
{
|
||||
while (!source.isEmpty())
|
||||
|
@ -259,6 +261,43 @@ std::optional<Parsed> parse(const QString &source) noexcept
|
|||
if ((nDots == 3 && isValidIpv4(host)) ||
|
||||
isValidTld(host.mid(lastDotPos + 1)))
|
||||
{
|
||||
// scan for parentheses (only if there were characters excluded)
|
||||
if (link.end() != source.end() && !rest.empty())
|
||||
{
|
||||
size_t nestingLevel = 0;
|
||||
// position after the last closing brace (i.e. the minimum characters to include)
|
||||
const auto *lastClose = link.end();
|
||||
|
||||
// scan source from rest until the end:
|
||||
// lastClose
|
||||
// v
|
||||
// (example.com/foo/bar/#baz_(qox)),
|
||||
// ▏╌╌rest (before)╌ ▏
|
||||
// ▏╌╌╌╌╌╌╌link (before)╌╌╌╌╌╌╌ ▏
|
||||
// ▏╌╌rest (after)╌╌╌ ▏
|
||||
// ▏╌╌╌╌╌╌╌link (after)╌╌╌╌╌╌╌╌╌ ▏
|
||||
// ▏╌╌╌╌╌╌╌╌╌╌╌╌╌source╌╌╌╌╌╌╌╌╌╌╌╌ ▏
|
||||
// ▏╌╌╌╌╌╌╌search╌╌╌╌╌╌ ▏
|
||||
for (const auto *it = rest.begin(); it < source.end(); it++)
|
||||
{
|
||||
if (it->unicode() == u'(')
|
||||
{
|
||||
nestingLevel++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (nestingLevel != 0 && it->unicode() == u')')
|
||||
{
|
||||
nestingLevel--;
|
||||
if (nestingLevel == 0)
|
||||
{
|
||||
lastClose = it + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
link = QStringView{link.begin(), std::max(link.end(), lastClose)};
|
||||
rest = QStringView{rest.begin(), std::max(rest.end(), lastClose)};
|
||||
}
|
||||
result = Parsed{
|
||||
.protocol = protocol,
|
||||
.host = host,
|
||||
|
|
|
@ -16,8 +16,9 @@ namespace chatterino::linkparser {
|
|||
///
|
||||
/// Prefixes and suffixes are almost identical to the ones in GitHub Flavored
|
||||
/// Markdown (GFM - https://github.github.com/gfm/#autolinks-extension-).
|
||||
/// The main differences are that '_' isn't a valid suffix and parentheses
|
||||
/// aren't counted (e.g. "(a.com/(foo)! would result in "a.com/(foo").
|
||||
/// The main difference is that '_' isn't a valid suffix.
|
||||
/// Parentheses are counted inside the @a rest: parsing "(a.com/(foo))" would
|
||||
/// result in the link "a.com/(foo)".
|
||||
/// Matching is done case insensitive (e.g. "HTTp://a.com" would be valid).
|
||||
///
|
||||
/// A @a protocol can either be empty, "http://", or "https://".
|
||||
|
|
|
@ -23,8 +23,8 @@ struct Case {
|
|||
"", "_", "__", "<", "<<", "<_<", "(((", "<*_~(", "**", "~~",
|
||||
};
|
||||
QStringList suffixes{
|
||||
"", ">", "?", "!", ".", ",", ":",
|
||||
"*", "~", ">>", "?!.", "~~,*!?", "**",
|
||||
"", ">", "?", "!", ".", ",", ":", "*", "~",
|
||||
">>", "?!.", "~~,*!?", "**", ").", "),", ",)", ")),.", ")?",
|
||||
};
|
||||
|
||||
for (const auto &prefix : prefixes)
|
||||
|
@ -89,14 +89,21 @@ TEST(LinkParser, parseDomainLinks)
|
|||
{"", "https.cat"},
|
||||
{"", "httpsd.cat"},
|
||||
{"", "http.cat", "/200"},
|
||||
{"", "http.cat", "/200("},
|
||||
{"", "a.com", "?("},
|
||||
{"", "a.com", "#("},
|
||||
{"", "http.cat", "/200()"},
|
||||
{"", "a.com", "?()"},
|
||||
{"", "a.com", "#()"},
|
||||
{"", "a.com", "/__my_user__"},
|
||||
{"", "a.b.c.-._.1.com", ""},
|
||||
{"", "0123456789.com", ""},
|
||||
{"", "ABCDEFGHIJKLMNOPQRSTUVWXYZ.com", ""},
|
||||
{"", "abcdefghijklmnopqrstuvwxyz.com", ""},
|
||||
{"", "example.com", "/foo(bar)"},
|
||||
{"", "example.com", "/foo((bar))"},
|
||||
{"", "example.com", "/(f)(o)(o)(b)(a)r"},
|
||||
{"", "example.com", "/foobar()()"},
|
||||
{"", "example.com", "/foobar()(())baz"},
|
||||
{"", "example.com", "/(foo)"},
|
||||
{"", "example.com", "/()"},
|
||||
// non-ASCII characters are allowed
|
||||
{"", u"köln.de"_s, ""},
|
||||
{"", u"ü.com"_s, ""},
|
||||
|
@ -246,6 +253,8 @@ TEST(LinkParser, doesntParseInvalidLinks)
|
|||
"@@@.com",
|
||||
"%%%.com",
|
||||
"*.com",
|
||||
"example.com(foo)",
|
||||
"example.com()",
|
||||
};
|
||||
|
||||
for (const auto &input : inputs)
|
||||
|
|
Loading…
Reference in a new issue