Make link regular expression only be created once

This commit is contained in:
Rasmus Karlsson 2018-06-28 00:24:21 +02:00 committed by fourtf
parent 7fe8ff97e0
commit 20c17c3377
4 changed files with 114 additions and 41 deletions

View file

@ -216,7 +216,8 @@ SOURCES += \
src/widgets/splits/SplitOverlay.cpp \ src/widgets/splits/SplitOverlay.cpp \
src/widgets/StreamView.cpp \ src/widgets/StreamView.cpp \
src/widgets/TooltipWidget.cpp \ src/widgets/TooltipWidget.cpp \
src/widgets/Window.cpp src/widgets/Window.cpp \
src/common/LinkParser.cpp
HEADERS += \ HEADERS += \
src/Application.hpp \ src/Application.hpp \
@ -384,7 +385,8 @@ HEADERS += \
src/widgets/TooltipWidget.hpp \ src/widgets/TooltipWidget.hpp \
src/widgets/Window.hpp \ src/widgets/Window.hpp \
src/providers/twitch/TwitchCommon.hpp \ src/providers/twitch/TwitchCommon.hpp \
src/util/IsBigEndian.hpp src/util/IsBigEndian.hpp \
src/common/LinkParser.hpp
RESOURCES += \ RESOURCES += \
resources/resources.qrc \ resources/resources.qrc \

77
src/common/LinkParser.cpp Normal file
View file

@ -0,0 +1,77 @@
#include "common/LinkParser.hpp"
#include "debug/Log.hpp"
#include <QFile>
#include <QRegularExpression>
#include <QString>
#include <QTextStream>
#include <mutex>
namespace chatterino {
namespace {
std::once_flag regexInitializedFlag;
QRegularExpression *linkRegex = nullptr;
void initializeRegularExpressions()
{
std::call_once(regexInitializedFlag, [] {
QFile tldFile(":/tlds.txt");
tldFile.open(QFile::ReadOnly);
QTextStream t1(&tldFile);
t1.setCodec("UTF-8");
QString tldData = t1.readAll();
tldData.replace("\n", "|");
const QString urlRegExp =
"^"
// protocol identifier
"(?:(?:https?|ftps?)://)?"
// user:pass authentication
"(?:\\S+(?::\\S*)?@)?"
"(?:"
// IP address dotted notation octets
// excludes loopback network 0.0.0.0
// excludes reserved space >= 224.0.0.0
// excludes network & broacast addresses
// (first & last IP address of each class)
"(?:[1-9]\\d?|1\\d\\d|2[01]\\d|22[0-3])"
"(?:\\.(?:1?\\d{1,2}|2[0-4]\\d|25[0-5])){2}"
"(?:\\.(?:[1-9]\\d?|1\\d\\d|2[0-4]\\d|25[0-4]))"
"|"
// host name
"(?:(?:[_a-z\\x{00a1}-\\x{ffff}0-9]-*)*[a-z\\x{00a1}-\\x{ffff}0-9]+)"
// domain name
"(?:\\.(?:[a-z\\x{00a1}-\\x{ffff}0-9]-*)*[a-z\\x{00a1}-\\x{ffff}0-9]+)*"
// TLD identifier
//"(?:\\.(?:[a-z\\x{00a1}-\\x{ffff}]{2,}))"
"(?:[\\.](?:" +
tldData +
"))"
"\\.?"
")"
// port number
"(?::\\d{2,5})?"
// resource path
"(?:[/?#]\\S*)?"
"$";
linkRegex = new QRegularExpression(urlRegExp, QRegularExpression::CaseInsensitiveOption);
Log("fully initialized");
});
Log("call_once returned");
}
} // namespace
LinkParser::LinkParser(const QString &unparsedString)
{
initializeRegularExpressions();
this->match_ = linkRegex->match(unparsedString);
}
} // namespace chatterino

27
src/common/LinkParser.hpp Normal file
View file

@ -0,0 +1,27 @@
#pragma once
#include <QRegularExpressionMatch>
#include <QString>
namespace chatterino {
class LinkParser
{
public:
explicit LinkParser(const QString &unparsedString);
bool hasMatch() const
{
return this->match_.hasMatch();
}
QString getCaptured() const
{
return this->match_.captured();
}
private:
QRegularExpressionMatch match_;
};
} // namespace chatterino

View file

@ -1,4 +1,6 @@
#include "MessageBuilder.hpp" #include "MessageBuilder.hpp"
#include "common/LinkParser.hpp"
#include "singletons/EmoteManager.hpp" #include "singletons/EmoteManager.hpp"
#include "singletons/ResourceManager.hpp" #include "singletons/ResourceManager.hpp"
#include "singletons/ThemeManager.hpp" #include "singletons/ThemeManager.hpp"
@ -43,51 +45,16 @@ void MessageBuilder::appendTimestamp(const QTime &time)
QString MessageBuilder::matchLink(const QString &string) QString MessageBuilder::matchLink(const QString &string)
{ {
QFile tldFile(":/tlds.txt"); LinkParser linkParser(string);
tldFile.open(QFile::ReadOnly);
QTextStream t1(&tldFile);
t1.setCodec("UTF-8");
QString tldData = t1.readAll();
tldData.replace("\n", "|");
const QString urlRegExp = "^"
// protocol identifier
"(?:(?:https?|ftps?)://)?"
// user:pass authentication
"(?:\\S+(?::\\S*)?@)?"
"(?:"
// IP address dotted notation octets
// excludes loopback network 0.0.0.0
// excludes reserved space >= 224.0.0.0
// excludes network & broacast addresses
// (first & last IP address of each class)
"(?:[1-9]\\d?|1\\d\\d|2[01]\\d|22[0-3])"
"(?:\\.(?:1?\\d{1,2}|2[0-4]\\d|25[0-5])){2}"
"(?:\\.(?:[1-9]\\d?|1\\d\\d|2[0-4]\\d|25[0-4]))"
"|"
// host name
"(?:(?:[_a-z\\x{00a1}-\\x{ffff}0-9]-*)*[a-z\\x{00a1}-\\x{ffff}0-9]+)"
// domain name
"(?:\\.(?:[a-z\\x{00a1}-\\x{ffff}0-9]-*)*[a-z\\x{00a1}-\\x{ffff}0-9]+)*"
// TLD identifier
//"(?:\\.(?:[a-z\\x{00a1}-\\x{ffff}]{2,}))"
"(?:[\\.](?:" + tldData + "))"
"\\.?"
")"
// port number
"(?::\\d{2,5})?"
// resource path
"(?:[/?#]\\S*)?"
"$";
static QRegularExpression linkRegex(urlRegExp, QRegularExpression::CaseInsensitiveOption);
static QRegularExpression httpRegex("\\bhttps?://"); static QRegularExpression httpRegex("\\bhttps?://");
static QRegularExpression ftpRegex("\\bftps?://"); static QRegularExpression ftpRegex("\\bftps?://");
auto match = linkRegex.match(string);
if (!match.hasMatch()) { if (!linkParser.hasMatch()) {
return QString(); return QString();
} }
QString captured = match.captured(); QString captured = linkParser.getCaptured();
if (!captured.contains(httpRegex)) { if (!captured.contains(httpRegex)) {
if (!captured.contains(ftpRegex)) { if (!captured.contains(ftpRegex)) {