mirror of
https://github.com/Chatterino/chatterino2.git
synced 2024-11-13 19:49:51 +01:00
Remove Redundant Parsing of Links (#4507)
Co-authored-by: pajlada <rasmus.karlsson@pajlada.com>
This commit is contained in:
parent
f2938995c1
commit
95e7426283
|
@ -35,6 +35,7 @@
|
|||
- Dev: Add scripting capabilities with Lua (#4341, #4504)
|
||||
- Dev: Conan 2.0 is now used instead of Conan 1.0. (#4417)
|
||||
- Dev: Added tests and benchmarks for `LinkParser`. (#4436)
|
||||
- Dev: Removed redundant parsing of links. (#4507)
|
||||
- Dev: Experimental builds with Qt 6 are now provided. (#4522, #4551, #4553, #4554, #4555, #4556)
|
||||
- Dev: Removed `CHATTERINO_TEST` definitions. (#4526)
|
||||
- Dev: Builds for macOS now have `macos` in their name (previously: `osx`). (#4550)
|
||||
|
|
|
@ -24,7 +24,7 @@ static void BM_LinkParsing(benchmark::State &state)
|
|||
|
||||
// Make sure the TLDs are loaded
|
||||
{
|
||||
benchmark::DoNotOptimize(LinkParser("xd.com").getCaptured());
|
||||
benchmark::DoNotOptimize(LinkParser("xd.com").result());
|
||||
}
|
||||
|
||||
for (auto _ : state)
|
||||
|
@ -32,10 +32,7 @@ static void BM_LinkParsing(benchmark::State &state)
|
|||
for (auto word : words)
|
||||
{
|
||||
LinkParser parser(word);
|
||||
if (parser.hasMatch())
|
||||
{
|
||||
benchmark::DoNotOptimize(parser.getCaptured());
|
||||
}
|
||||
benchmark::DoNotOptimize(parser.result());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -67,7 +67,7 @@ namespace {
|
|||
|
||||
LinkParser::LinkParser(const QString &unparsedString)
|
||||
{
|
||||
this->match_ = unparsedString;
|
||||
ParsedLink result;
|
||||
|
||||
// This is not implemented with a regex to increase performance.
|
||||
// We keep removing parts of the url until there's either nothing left or we fail.
|
||||
|
@ -79,11 +79,13 @@ LinkParser::LinkParser(const QString &unparsedString)
|
|||
if (l.startsWith("https://", Qt::CaseInsensitive))
|
||||
{
|
||||
hasHttp = true;
|
||||
result.protocol = l.mid(0, 8);
|
||||
l = l.mid(8);
|
||||
}
|
||||
else if (l.startsWith("http://", Qt::CaseInsensitive))
|
||||
{
|
||||
hasHttp = true;
|
||||
result.protocol = l.mid(0, 7);
|
||||
l = l.mid(7);
|
||||
}
|
||||
|
||||
|
@ -92,8 +94,10 @@ LinkParser::LinkParser(const QString &unparsedString)
|
|||
|
||||
// Host `a.b.c.com`
|
||||
QStringRef host = l;
|
||||
ParsedLink::StringView rest;
|
||||
bool lastWasDot = true;
|
||||
bool inIpv6 = false;
|
||||
bool hasMatch = false;
|
||||
|
||||
for (int i = 0; i < l.size(); i++)
|
||||
{
|
||||
|
@ -111,24 +115,28 @@ LinkParser::LinkParser(const QString &unparsedString)
|
|||
if (l[i] == ':' && !inIpv6)
|
||||
{
|
||||
host = l.mid(0, i);
|
||||
rest = l.mid(i);
|
||||
l = l.mid(i + 1);
|
||||
goto parsePort;
|
||||
}
|
||||
else if (l[i] == '/')
|
||||
{
|
||||
host = l.mid(0, i);
|
||||
rest = l.mid(i);
|
||||
l = l.mid(i + 1);
|
||||
goto parsePath;
|
||||
}
|
||||
else if (l[i] == '?')
|
||||
{
|
||||
host = l.mid(0, i);
|
||||
rest = l.mid(i);
|
||||
l = l.mid(i + 1);
|
||||
goto parseQuery;
|
||||
}
|
||||
else if (l[i] == '#')
|
||||
{
|
||||
host = l.mid(0, i);
|
||||
rest = l.mid(i);
|
||||
l = l.mid(i + 1);
|
||||
goto parseAnchor;
|
||||
}
|
||||
|
@ -176,32 +184,28 @@ parseAnchor:
|
|||
|
||||
done:
|
||||
// check host
|
||||
this->hasMatch_ = isValidHostname(host) || isValidIpv4(host)
|
||||
hasMatch = isValidHostname(host) || isValidIpv4(host)
|
||||
#ifdef C_MATCH_IPV6_LINK
|
||||
|
||||
|| (hasHttp && isValidIpv6(host))
|
||||
#endif
|
||||
;
|
||||
|
||||
if (this->hasMatch_)
|
||||
if (hasMatch)
|
||||
{
|
||||
this->match_ = unparsedString;
|
||||
result.host = host;
|
||||
result.rest = rest;
|
||||
result.source = unparsedString;
|
||||
this->result_ = std::move(result);
|
||||
}
|
||||
|
||||
return;
|
||||
|
||||
error:
|
||||
hasMatch_ = false;
|
||||
return;
|
||||
}
|
||||
|
||||
bool LinkParser::hasMatch() const
|
||||
const std::optional<ParsedLink> &LinkParser::result() const
|
||||
{
|
||||
return this->hasMatch_;
|
||||
}
|
||||
|
||||
QString LinkParser::getCaptured() const
|
||||
{
|
||||
return this->match_;
|
||||
return this->result_;
|
||||
}
|
||||
|
||||
} // namespace chatterino
|
||||
|
|
|
@ -2,19 +2,31 @@
|
|||
|
||||
#include <QString>
|
||||
|
||||
#include <optional>
|
||||
|
||||
namespace chatterino {
|
||||
|
||||
struct ParsedLink {
|
||||
#if QT_VERSION >= QT_VERSION_CHECK(5, 15, 0)
|
||||
using StringView = QStringView;
|
||||
#else
|
||||
using StringView = QStringRef;
|
||||
#endif
|
||||
StringView protocol;
|
||||
StringView host;
|
||||
StringView rest;
|
||||
QString source;
|
||||
};
|
||||
|
||||
class LinkParser
|
||||
{
|
||||
public:
|
||||
explicit LinkParser(const QString &unparsedString);
|
||||
|
||||
bool hasMatch() const;
|
||||
QString getCaptured() const;
|
||||
const std::optional<ParsedLink> &result() const;
|
||||
|
||||
private:
|
||||
bool hasMatch_{false};
|
||||
QString match_;
|
||||
std::optional<ParsedLink> result_{};
|
||||
};
|
||||
|
||||
} // namespace chatterino
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
#include "Application.hpp"
|
||||
#include "common/Env.hpp"
|
||||
#include "common/LinkParser.hpp"
|
||||
#include "common/NetworkResult.hpp"
|
||||
#include "common/QLogging.hpp"
|
||||
#include "common/SignalVector.hpp"
|
||||
|
@ -148,15 +149,15 @@ bool appendWhisperMessageWordsLocally(const QStringList &words)
|
|||
void operator()(const QString &string,
|
||||
MessageBuilder &b) const
|
||||
{
|
||||
auto linkString = b.matchLink(string);
|
||||
if (linkString.isEmpty())
|
||||
LinkParser parser(string);
|
||||
if (parser.result())
|
||||
{
|
||||
b.emplace<TextElement>(string,
|
||||
MessageElementFlag::Text);
|
||||
b.addLink(*parser.result());
|
||||
}
|
||||
else
|
||||
{
|
||||
b.addLink(string, linkString);
|
||||
b.emplace<TextElement>(string,
|
||||
MessageElementFlag::Text);
|
||||
}
|
||||
}
|
||||
} visitor;
|
||||
|
|
|
@ -240,10 +240,10 @@ MessageBuilder::MessageBuilder(SystemMessageTag, const QString &text,
|
|||
text.split(QRegularExpression("\\s"), Qt::SkipEmptyParts);
|
||||
for (const auto &word : textFragments)
|
||||
{
|
||||
const auto linkString = this->matchLink(word);
|
||||
if (!linkString.isEmpty())
|
||||
LinkParser parser(word);
|
||||
if (parser.result())
|
||||
{
|
||||
this->addLink(word, linkString);
|
||||
this->addLink(*parser.result());
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -707,52 +707,25 @@ std::unique_ptr<MessageElement> MessageBuilder::releaseBack()
|
|||
return ptr;
|
||||
}
|
||||
|
||||
QString MessageBuilder::matchLink(const QString &string)
|
||||
void MessageBuilder::addLink(const ParsedLink &parsedLink)
|
||||
{
|
||||
LinkParser linkParser(string);
|
||||
|
||||
static QRegularExpression httpRegex(
|
||||
"\\bhttps?://", QRegularExpression::CaseInsensitiveOption);
|
||||
static QRegularExpression ftpRegex(
|
||||
"\\bftps?://", QRegularExpression::CaseInsensitiveOption);
|
||||
static QRegularExpression spotifyRegex(
|
||||
"\\bspotify:", QRegularExpression::CaseInsensitiveOption);
|
||||
|
||||
if (!linkParser.hasMatch())
|
||||
{
|
||||
return QString();
|
||||
}
|
||||
|
||||
QString captured = linkParser.getCaptured();
|
||||
|
||||
if (!captured.contains(httpRegex) && !captured.contains(ftpRegex) &&
|
||||
!captured.contains(spotifyRegex))
|
||||
{
|
||||
captured.insert(0, "http://");
|
||||
}
|
||||
|
||||
return captured;
|
||||
}
|
||||
|
||||
void MessageBuilder::addLink(const QString &origLink,
|
||||
const QString &matchedLink)
|
||||
{
|
||||
static QRegularExpression domainRegex(
|
||||
R"(^(?:(?:ftp|http)s?:\/\/)?([^\/]+)(?:\/.*)?$)",
|
||||
QRegularExpression::CaseInsensitiveOption);
|
||||
|
||||
QString lowercaseLinkString;
|
||||
auto match = domainRegex.match(origLink);
|
||||
if (match.isValid())
|
||||
QString origLink = parsedLink.source;
|
||||
QString matchedLink;
|
||||
|
||||
if (parsedLink.protocol.isNull())
|
||||
{
|
||||
lowercaseLinkString = origLink.mid(0, match.capturedStart(1)) +
|
||||
match.captured(1).toLower() +
|
||||
origLink.mid(match.capturedEnd(1));
|
||||
matchedLink = QStringLiteral("http://") + parsedLink.source;
|
||||
}
|
||||
else
|
||||
{
|
||||
lowercaseLinkString = origLink;
|
||||
lowercaseLinkString += parsedLink.protocol;
|
||||
matchedLink = parsedLink.source;
|
||||
}
|
||||
|
||||
lowercaseLinkString += parsedLink.host.toString().toLower();
|
||||
lowercaseLinkString += parsedLink.rest;
|
||||
|
||||
auto linkElement = Link(Link::Url, matchedLink);
|
||||
|
||||
auto textColor = MessageColor(MessageColor::Link);
|
||||
|
@ -816,12 +789,10 @@ void MessageBuilder::addIrcMessageText(const QString &text)
|
|||
auto string = QString(word);
|
||||
|
||||
// Actually just text
|
||||
auto linkString = this->matchLink(string);
|
||||
auto link = Link();
|
||||
|
||||
if (!linkString.isEmpty())
|
||||
LinkParser parser(string);
|
||||
if (parser.result())
|
||||
{
|
||||
this->addLink(string, linkString);
|
||||
this->addLink(*parser.result());
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -909,28 +880,24 @@ void MessageBuilder::addTextOrEmoji(const QString &string_)
|
|||
auto string = QString(string_);
|
||||
|
||||
// Actually just text
|
||||
auto linkString = this->matchLink(string);
|
||||
auto link = Link();
|
||||
LinkParser linkParser(string);
|
||||
if (linkParser.result())
|
||||
{
|
||||
this->addLink(*linkParser.result());
|
||||
return;
|
||||
}
|
||||
|
||||
auto &&textColor = this->textColor_;
|
||||
if (linkString.isEmpty())
|
||||
{
|
||||
if (string.startsWith('@'))
|
||||
{
|
||||
this->emplace<TextElement>(string, MessageElementFlag::BoldUsername,
|
||||
textColor, FontStyle::ChatMediumBold);
|
||||
this->emplace<TextElement>(
|
||||
string, MessageElementFlag::NonBoldUsername, textColor);
|
||||
}
|
||||
else
|
||||
{
|
||||
this->emplace<TextElement>(string, MessageElementFlag::Text,
|
||||
this->emplace<TextElement>(string, MessageElementFlag::NonBoldUsername,
|
||||
textColor);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
this->addLink(string, linkString);
|
||||
this->emplace<TextElement>(string, MessageElementFlag::Text, textColor);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -23,6 +23,8 @@ class TextElement;
|
|||
struct Emote;
|
||||
using EmotePtr = std::shared_ptr<const Emote>;
|
||||
|
||||
struct ParsedLink;
|
||||
|
||||
struct SystemMessageTag {
|
||||
};
|
||||
struct TimeoutMessageTag {
|
||||
|
@ -94,8 +96,7 @@ public:
|
|||
std::weak_ptr<Message> weakOf();
|
||||
|
||||
void append(std::unique_ptr<MessageElement> element);
|
||||
QString matchLink(const QString &string);
|
||||
void addLink(const QString &origLink, const QString &matchedLink);
|
||||
void addLink(const ParsedLink &parsedLink);
|
||||
|
||||
/**
|
||||
* Adds the text, applies irc colors, adds links,
|
||||
|
|
|
@ -15,9 +15,11 @@ bool LinkPredicate::appliesToImpl(const Message &message)
|
|||
{
|
||||
for (const auto &word : message.messageText.split(' ', Qt::SkipEmptyParts))
|
||||
{
|
||||
if (LinkParser(word).hasMatch())
|
||||
if (LinkParser(word).result())
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#include "providers/twitch/TwitchMessageBuilder.hpp"
|
||||
|
||||
#include "Application.hpp"
|
||||
#include "common/LinkParser.hpp"
|
||||
#include "common/QLogging.hpp"
|
||||
#include "controllers/accounts/AccountController.hpp"
|
||||
#include "controllers/ignores/IgnoreController.hpp"
|
||||
|
@ -465,12 +466,12 @@ void TwitchMessageBuilder::addTextOrEmoji(const QString &string_)
|
|||
}
|
||||
|
||||
// Actually just text
|
||||
auto linkString = this->matchLink(string);
|
||||
LinkParser parsed(string);
|
||||
auto textColor = this->textColor_;
|
||||
|
||||
if (!linkString.isEmpty())
|
||||
if (parsed.result())
|
||||
{
|
||||
this->addLink(string, linkString);
|
||||
this->addLink(*parsed.result());
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
@ -6,66 +6,96 @@
|
|||
|
||||
using namespace chatterino;
|
||||
|
||||
TEST(LinkParser, parseDomainLinks)
|
||||
struct Case {
|
||||
QString protocol{};
|
||||
QString host{};
|
||||
QString rest{};
|
||||
|
||||
void check() const
|
||||
{
|
||||
const QStringList inputs = {
|
||||
"https://chatterino.com",
|
||||
"http://chatterino.com",
|
||||
"chatterino.com",
|
||||
"wiki.chatterino.com",
|
||||
"https://wiki.chatterino.com",
|
||||
"http://chatterino.co.uk",
|
||||
"http://a.io",
|
||||
"chatterino.com:80",
|
||||
"wiki.chatterino.com:80",
|
||||
"a.b.c.chatterino.com",
|
||||
"https://a.b.c.chatterino.com/foo",
|
||||
"http://chatterino.com?foo",
|
||||
"http://xd.chatterino.com/#?foo",
|
||||
"chatterino.com#foo",
|
||||
"1.com",
|
||||
"127.0.0.1.com",
|
||||
"https://127.0.0.1.com",
|
||||
auto input = this->protocol + this->host + this->rest;
|
||||
LinkParser p(input);
|
||||
ASSERT_TRUE(p.result().has_value()) << input.toStdString();
|
||||
|
||||
const auto &r = *p.result();
|
||||
ASSERT_EQ(r.source, input);
|
||||
ASSERT_EQ(r.protocol, this->protocol) << this->protocol.toStdString();
|
||||
ASSERT_EQ(r.host, this->host) << this->host.toStdString();
|
||||
ASSERT_EQ(r.rest, this->rest) << this->rest.toStdString();
|
||||
}
|
||||
};
|
||||
|
||||
for (const auto &input : inputs)
|
||||
TEST(LinkParser, parseDomainLinks)
|
||||
{
|
||||
LinkParser p(input);
|
||||
ASSERT_TRUE(p.hasMatch()) << input.toStdString();
|
||||
ASSERT_EQ(p.getCaptured(), input);
|
||||
const QList<Case> cases = {
|
||||
{"https://", "chatterino.com"},
|
||||
{"http://", "chatterino.com"},
|
||||
{"", "chatterino.com"},
|
||||
{"", "wiki.chatterino.com"},
|
||||
{"https://", "wiki.chatterino.com"},
|
||||
{"http://", "chatterino.co.uk"},
|
||||
{"http://", "a.io"},
|
||||
{"", "chatterino.com", ":80"},
|
||||
{"", "wiki.chatterino.com", ":80"},
|
||||
{"", "wiki.chatterino.com", ":80/foo/bar"},
|
||||
{"", "wiki.chatterino.com", "/:80?foo/bar"},
|
||||
{"", "wiki.chatterino.com", "/127.0.0.1"},
|
||||
{"", "a.b.c.chatterino.com"},
|
||||
{"https://", "a.b.c.chatterino.com", "/foo"},
|
||||
{"http://", "chatterino.com", "?foo"},
|
||||
{"http://", "xd.chatterino.com", "/#?foo"},
|
||||
{"", "chatterino.com", "#foo"},
|
||||
{"", "1.com"},
|
||||
{"", "127.0.0.1.com"},
|
||||
{"https://", "127.0.0.1.com"},
|
||||
// test case-insensitiveness
|
||||
{"HtTpS://", "127.0.0.1.CoM"},
|
||||
{"HTTP://", "XD.CHATTERINO.COM", "/#?FOO"},
|
||||
{"HTTPS://", "wikI.chatterino.com"},
|
||||
{"", "chatterino.Org", "#foo"},
|
||||
{"", "CHATTERINO.com", ""},
|
||||
};
|
||||
|
||||
for (const auto &c : cases)
|
||||
{
|
||||
c.check();
|
||||
}
|
||||
}
|
||||
|
||||
TEST(LinkParser, parseIpv4Links)
|
||||
{
|
||||
const QStringList inputs = {
|
||||
"https://127.0.0.1",
|
||||
"http://127.0.0.1",
|
||||
"127.0.0.1",
|
||||
"127.0.0.1:8080",
|
||||
"255.255.255.255",
|
||||
"0.0.0.0",
|
||||
"1.1.1.1",
|
||||
"001.001.01.1",
|
||||
"123.246.87.0",
|
||||
"196.168.0.1:",
|
||||
"196.168.4.2/foo",
|
||||
"196.168.4.2?foo",
|
||||
"http://196.168.4.0#foo",
|
||||
"196.168.4.0/?#foo",
|
||||
"196.168.4.0#?/foo",
|
||||
"256.255.255.255",
|
||||
"http://256.255.255.255",
|
||||
"255.256.255.255",
|
||||
"255.255.256.255",
|
||||
"255.255.255.256",
|
||||
const QList<Case> cases = {
|
||||
{"https://", "127.0.0.1"},
|
||||
{"http://", "127.0.0.1"},
|
||||
{"", "127.0.0.1"},
|
||||
{"", "127.0.0.1", ":8080"},
|
||||
{"", "255.255.255.255"},
|
||||
{"", "0.0.0.0"},
|
||||
{"", "1.1.1.1"},
|
||||
{"", "001.001.01.1"},
|
||||
{"", "123.246.87.0"},
|
||||
{"", "196.168.0.1", ":"},
|
||||
{"", "196.168.4.2", "/foo"},
|
||||
{"", "196.168.4.2", "?foo"},
|
||||
{"http://", "196.168.4.0", "#foo"},
|
||||
{"", "196.168.4.0", "/?#foo"},
|
||||
{"", "196.168.4.0", "#?/foo"},
|
||||
{"", "256.255.255.255"},
|
||||
{"http://", "256.255.255.255"},
|
||||
{"", "255.256.255.255"},
|
||||
{"", "255.255.256.255"},
|
||||
{"", "255.255.255.256"},
|
||||
// test case-insensitiveness
|
||||
{"HTTP://", "196.168.4.0", "#Foo"},
|
||||
{"HTTPS://", "196.168.4.0", "#Foo"},
|
||||
{"htTp://", "127.0.0.1"},
|
||||
{"httpS://", "127.0.0.1"},
|
||||
|
||||
};
|
||||
|
||||
for (const auto &input : inputs)
|
||||
for (const auto &c : cases)
|
||||
{
|
||||
LinkParser p(input);
|
||||
ASSERT_TRUE(p.hasMatch()) << input.toStdString();
|
||||
ASSERT_EQ(p.getCaptured(), input);
|
||||
c.check();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -80,12 +110,14 @@ TEST(LinkParser, doesntParseInvalidIpv4Links)
|
|||
"1.2",
|
||||
"1",
|
||||
"1.2.3",
|
||||
"htt://256.255.255.255",
|
||||
"aliens://256.255.255.255",
|
||||
};
|
||||
|
||||
for (const auto &input : inputs)
|
||||
{
|
||||
LinkParser p(input);
|
||||
ASSERT_FALSE(p.hasMatch()) << input.toStdString();
|
||||
ASSERT_FALSE(p.result().has_value()) << input.toStdString();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -99,6 +131,9 @@ TEST(LinkParser, doesntParseInvalidLinks)
|
|||
"spotify://chatterino.com",
|
||||
"httpsx://chatterino.com",
|
||||
"https:chatterino.com",
|
||||
"https:/chatterino.com",
|
||||
"http:/chatterino.com",
|
||||
"htp://chatterino.com",
|
||||
"/chatterino.com",
|
||||
"word",
|
||||
".",
|
||||
|
@ -114,6 +149,6 @@ TEST(LinkParser, doesntParseInvalidLinks)
|
|||
for (const auto &input : inputs)
|
||||
{
|
||||
LinkParser p(input);
|
||||
ASSERT_FALSE(p.hasMatch()) << input.toStdString();
|
||||
ASSERT_FALSE(p.result().has_value()) << input.toStdString();
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue