From 20c17c33774ccab2ce8a8ffc5221d07032c8a5fe Mon Sep 17 00:00:00 2001 From: Rasmus Karlsson Date: Thu, 28 Jun 2018 00:24:21 +0200 Subject: [PATCH] Make link regular expression only be created once --- chatterino.pro | 6 ++- src/common/LinkParser.cpp | 77 +++++++++++++++++++++++++++++++++ src/common/LinkParser.hpp | 27 ++++++++++++ src/messages/MessageBuilder.cpp | 45 +++---------------- 4 files changed, 114 insertions(+), 41 deletions(-) create mode 100644 src/common/LinkParser.cpp create mode 100644 src/common/LinkParser.hpp diff --git a/chatterino.pro b/chatterino.pro index 360c11d9b..247183cb6 100644 --- a/chatterino.pro +++ b/chatterino.pro @@ -216,7 +216,8 @@ SOURCES += \ src/widgets/splits/SplitOverlay.cpp \ src/widgets/StreamView.cpp \ src/widgets/TooltipWidget.cpp \ - src/widgets/Window.cpp + src/widgets/Window.cpp \ + src/common/LinkParser.cpp HEADERS += \ src/Application.hpp \ @@ -384,7 +385,8 @@ HEADERS += \ src/widgets/TooltipWidget.hpp \ src/widgets/Window.hpp \ src/providers/twitch/TwitchCommon.hpp \ - src/util/IsBigEndian.hpp + src/util/IsBigEndian.hpp \ + src/common/LinkParser.hpp RESOURCES += \ resources/resources.qrc \ diff --git a/src/common/LinkParser.cpp b/src/common/LinkParser.cpp new file mode 100644 index 000000000..1d1e39702 --- /dev/null +++ b/src/common/LinkParser.cpp @@ -0,0 +1,77 @@ +#include "common/LinkParser.hpp" + +#include "debug/Log.hpp" + +#include +#include +#include +#include + +#include + +namespace chatterino { + +namespace { + +std::once_flag regexInitializedFlag; +QRegularExpression *linkRegex = nullptr; + +void initializeRegularExpressions() +{ + std::call_once(regexInitializedFlag, [] { + QFile tldFile(":/tlds.txt"); + tldFile.open(QFile::ReadOnly); + QTextStream t1(&tldFile); + t1.setCodec("UTF-8"); + QString tldData = t1.readAll(); + tldData.replace("\n", "|"); + const QString urlRegExp = + "^" + // protocol identifier + "(?:(?:https?|ftps?)://)?" + // user:pass authentication + "(?:\\S+(?::\\S*)?@)?" + "(?:" + // IP address dotted notation octets + // excludes loopback network 0.0.0.0 + // excludes reserved space >= 224.0.0.0 + // excludes network & broacast addresses + // (first & last IP address of each class) + "(?:[1-9]\\d?|1\\d\\d|2[01]\\d|22[0-3])" + "(?:\\.(?:1?\\d{1,2}|2[0-4]\\d|25[0-5])){2}" + "(?:\\.(?:[1-9]\\d?|1\\d\\d|2[0-4]\\d|25[0-4]))" + "|" + // host name + "(?:(?:[_a-z\\x{00a1}-\\x{ffff}0-9]-*)*[a-z\\x{00a1}-\\x{ffff}0-9]+)" + // domain name + "(?:\\.(?:[a-z\\x{00a1}-\\x{ffff}0-9]-*)*[a-z\\x{00a1}-\\x{ffff}0-9]+)*" + // TLD identifier + //"(?:\\.(?:[a-z\\x{00a1}-\\x{ffff}]{2,}))" + "(?:[\\.](?:" + + tldData + + "))" + "\\.?" + ")" + // port number + "(?::\\d{2,5})?" + // resource path + "(?:[/?#]\\S*)?" + "$"; + linkRegex = new QRegularExpression(urlRegExp, QRegularExpression::CaseInsensitiveOption); + + Log("fully initialized"); + }); + + Log("call_once returned"); +} + +} // namespace + +LinkParser::LinkParser(const QString &unparsedString) +{ + initializeRegularExpressions(); + + this->match_ = linkRegex->match(unparsedString); +} + +} // namespace chatterino diff --git a/src/common/LinkParser.hpp b/src/common/LinkParser.hpp new file mode 100644 index 000000000..3a1ad6fab --- /dev/null +++ b/src/common/LinkParser.hpp @@ -0,0 +1,27 @@ +#pragma once + +#include +#include + +namespace chatterino { + +class LinkParser +{ +public: + explicit LinkParser(const QString &unparsedString); + + bool hasMatch() const + { + return this->match_.hasMatch(); + } + + QString getCaptured() const + { + return this->match_.captured(); + } + +private: + QRegularExpressionMatch match_; +}; + +} // namespace chatterino diff --git a/src/messages/MessageBuilder.cpp b/src/messages/MessageBuilder.cpp index 343b074af..0a8e58f10 100644 --- a/src/messages/MessageBuilder.cpp +++ b/src/messages/MessageBuilder.cpp @@ -1,4 +1,6 @@ #include "MessageBuilder.hpp" + +#include "common/LinkParser.hpp" #include "singletons/EmoteManager.hpp" #include "singletons/ResourceManager.hpp" #include "singletons/ThemeManager.hpp" @@ -43,51 +45,16 @@ void MessageBuilder::appendTimestamp(const QTime &time) QString MessageBuilder::matchLink(const QString &string) { - QFile tldFile(":/tlds.txt"); - tldFile.open(QFile::ReadOnly); - QTextStream t1(&tldFile); - t1.setCodec("UTF-8"); - QString tldData = t1.readAll(); - tldData.replace("\n", "|"); - const QString urlRegExp = "^" - // protocol identifier - "(?:(?:https?|ftps?)://)?" - // user:pass authentication - "(?:\\S+(?::\\S*)?@)?" - "(?:" - // IP address dotted notation octets - // excludes loopback network 0.0.0.0 - // excludes reserved space >= 224.0.0.0 - // excludes network & broacast addresses - // (first & last IP address of each class) - "(?:[1-9]\\d?|1\\d\\d|2[01]\\d|22[0-3])" - "(?:\\.(?:1?\\d{1,2}|2[0-4]\\d|25[0-5])){2}" - "(?:\\.(?:[1-9]\\d?|1\\d\\d|2[0-4]\\d|25[0-4]))" - "|" - // host name - "(?:(?:[_a-z\\x{00a1}-\\x{ffff}0-9]-*)*[a-z\\x{00a1}-\\x{ffff}0-9]+)" - // domain name - "(?:\\.(?:[a-z\\x{00a1}-\\x{ffff}0-9]-*)*[a-z\\x{00a1}-\\x{ffff}0-9]+)*" - // TLD identifier - //"(?:\\.(?:[a-z\\x{00a1}-\\x{ffff}]{2,}))" - "(?:[\\.](?:" + tldData + "))" - "\\.?" - ")" - // port number - "(?::\\d{2,5})?" - // resource path - "(?:[/?#]\\S*)?" - "$"; - static QRegularExpression linkRegex(urlRegExp, QRegularExpression::CaseInsensitiveOption); + LinkParser linkParser(string); + static QRegularExpression httpRegex("\\bhttps?://"); static QRegularExpression ftpRegex("\\bftps?://"); - auto match = linkRegex.match(string); - if (!match.hasMatch()) { + if (!linkParser.hasMatch()) { return QString(); } - QString captured = match.captured(); + QString captured = linkParser.getCaptured(); if (!captured.contains(httpRegex)) { if (!captured.contains(ftpRegex)) {