chore: improve link parser and its tests a bit (#5522)

This commit is contained in:
nerix 2024-07-23 23:38:17 +02:00 committed by GitHub
parent a0b70b8c5e
commit a2cbe6377d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 29 additions and 16 deletions

View file

@ -1,17 +1,24 @@
#define QT_NO_CAST_FROM_ASCII // avoids unexpected implicit casts #define QT_NO_CAST_FROM_ASCII // avoids unexpected implicit casts
#include "common/LinkParser.hpp" #include "common/LinkParser.hpp"
#include "util/QCompareCaseInsensitive.hpp"
#include <QFile> #include <QFile>
#include <QSet>
#include <QString> #include <QString>
#include <QStringView> #include <QStringView>
#include <QTextStream> #include <QTextStream>
#include <set>
namespace { namespace {
QSet<QString> &tlds() using namespace chatterino;
using TldSet = std::set<QString, QCompareCaseInsensitive>;
TldSet &tlds()
{ {
static QSet<QString> tlds = [] { static TldSet tlds = [] {
QFile file(QStringLiteral(":/tlds.txt")); QFile file(QStringLiteral(":/tlds.txt"));
file.open(QFile::ReadOnly); file.open(QFile::ReadOnly);
QTextStream stream(&file); QTextStream stream(&file);
@ -21,19 +28,12 @@ QSet<QString> &tlds()
#else #else
stream.setCodec("UTF-8"); stream.setCodec("UTF-8");
#endif #endif
int safetyMax = 20000;
QSet<QString> set; TldSet set;
while (!stream.atEnd()) while (!stream.atEnd())
{ {
auto line = stream.readLine(); set.emplace(stream.readLine());
set.insert(line);
if (safetyMax-- == 0)
{
break;
}
} }
return set; return set;
@ -43,7 +43,7 @@ QSet<QString> &tlds()
bool isValidTld(QStringView tld) bool isValidTld(QStringView tld)
{ {
return tlds().contains(tld.toString().toLower()); return tlds().contains(tld);
} }
bool isValidIpv4(QStringView host) bool isValidIpv4(QStringView host)
@ -166,6 +166,8 @@ namespace chatterino::linkparser {
std::optional<Parsed> parse(const QString &source) noexcept std::optional<Parsed> parse(const QString &source) noexcept
{ {
using SizeType = QString::size_type;
std::optional<Parsed> result; std::optional<Parsed> result;
// This is not implemented with a regex to increase performance. // This is not implemented with a regex to increase performance.
@ -201,11 +203,11 @@ std::optional<Parsed> parse(const QString &source) noexcept
QStringView host = remaining; QStringView host = remaining;
QStringView rest; QStringView rest;
bool lastWasDot = true; bool lastWasDot = true;
int lastDotPos = -1; SizeType lastDotPos = -1;
int nDots = 0; SizeType nDots = 0;
// Extract the host // Extract the host
for (int i = 0; i < remaining.size(); i++) for (SizeType i = 0; i < remaining.size(); i++)
{ {
char16_t currentChar = remaining[i].unicode(); char16_t currentChar = remaining[i].unicode();
if (currentChar == u'.') if (currentChar == u'.')

View file

@ -72,6 +72,8 @@ TEST(LinkParser, parseDomainLinks)
{"", "chatterino.com", ":80"}, {"", "chatterino.com", ":80"},
{"", "wiki.chatterino.com", ":80"}, {"", "wiki.chatterino.com", ":80"},
{"", "wiki.chatterino.com", ":80/foo/bar"}, {"", "wiki.chatterino.com", ":80/foo/bar"},
{"", "wiki.chatterino.com", ":80?foo"},
{"", "wiki.chatterino.com", ":80#foo"},
{"", "wiki.chatterino.com", "/:80?foo/bar"}, {"", "wiki.chatterino.com", "/:80?foo/bar"},
{"", "wiki.chatterino.com", "/127.0.0.1"}, {"", "wiki.chatterino.com", "/127.0.0.1"},
{"", "a.b.c.chatterino.com"}, {"", "a.b.c.chatterino.com"},
@ -156,6 +158,7 @@ TEST(LinkParser, parseIpv4Links)
TEST(LinkParser, doesntParseInvalidIpv4Links) TEST(LinkParser, doesntParseInvalidIpv4Links)
{ {
const QStringList inputs = { const QStringList inputs = {
"196.162.a.1",
// U+0660 - in category "number digits" // U+0660 - in category "number digits"
QStringLiteral("٠.٠.٠.٠"), QStringLiteral("٠.٠.٠.٠"),
"https://127.0.0.", "https://127.0.0.",
@ -186,6 +189,10 @@ TEST(LinkParser, doesntParseInvalidIpv4Links)
"196.162.8.1(", "196.162.8.1(",
"196.162.8.1(!", "196.162.8.1(!",
"127.1.1;.com", "127.1.1;.com",
"127.0.-.1",
"127...",
"1.1.1.",
"1.1.1.:80",
}; };
for (const auto &input : inputs) for (const auto &input : inputs)
@ -223,6 +230,10 @@ TEST(LinkParser, doesntParseInvalidLinks)
"https://pn./", "https://pn./",
"pn./", "pn./",
"pn.", "pn.",
"pn.:80",
"pn./foo",
"pn.#foo",
"pn.?foo",
"http/chatterino.com", "http/chatterino.com",
"http/wiki.chatterino.com", "http/wiki.chatterino.com",
"http:cat.com", "http:cat.com",