chore: improve link parser and its tests a bit (#5522)

This commit is contained in:
nerix 2024-07-23 23:38:17 +02:00 committed by GitHub
parent a0b70b8c5e
commit a2cbe6377d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 29 additions and 16 deletions

View file

@ -1,17 +1,24 @@
#define QT_NO_CAST_FROM_ASCII // avoids unexpected implicit casts
#include "common/LinkParser.hpp"
#include "util/QCompareCaseInsensitive.hpp"
#include <QFile>
#include <QSet>
#include <QString>
#include <QStringView>
#include <QTextStream>
#include <set>
namespace {
QSet<QString> &tlds()
using namespace chatterino;
using TldSet = std::set<QString, QCompareCaseInsensitive>;
TldSet &tlds()
{
static QSet<QString> tlds = [] {
static TldSet tlds = [] {
QFile file(QStringLiteral(":/tlds.txt"));
file.open(QFile::ReadOnly);
QTextStream stream(&file);
@ -21,19 +28,12 @@ QSet<QString> &tlds()
#else
stream.setCodec("UTF-8");
#endif
int safetyMax = 20000;
QSet<QString> set;
TldSet set;
while (!stream.atEnd())
{
auto line = stream.readLine();
set.insert(line);
if (safetyMax-- == 0)
{
break;
}
set.emplace(stream.readLine());
}
return set;
@ -43,7 +43,7 @@ QSet<QString> &tlds()
bool isValidTld(QStringView tld)
{
return tlds().contains(tld.toString().toLower());
return tlds().contains(tld);
}
bool isValidIpv4(QStringView host)
@ -166,6 +166,8 @@ namespace chatterino::linkparser {
std::optional<Parsed> parse(const QString &source) noexcept
{
using SizeType = QString::size_type;
std::optional<Parsed> result;
// This is not implemented with a regex to increase performance.
@ -201,11 +203,11 @@ std::optional<Parsed> parse(const QString &source) noexcept
QStringView host = remaining;
QStringView rest;
bool lastWasDot = true;
int lastDotPos = -1;
int nDots = 0;
SizeType lastDotPos = -1;
SizeType nDots = 0;
// Extract the host
for (int i = 0; i < remaining.size(); i++)
for (SizeType i = 0; i < remaining.size(); i++)
{
char16_t currentChar = remaining[i].unicode();
if (currentChar == u'.')

View file

@ -72,6 +72,8 @@ TEST(LinkParser, parseDomainLinks)
{"", "chatterino.com", ":80"},
{"", "wiki.chatterino.com", ":80"},
{"", "wiki.chatterino.com", ":80/foo/bar"},
{"", "wiki.chatterino.com", ":80?foo"},
{"", "wiki.chatterino.com", ":80#foo"},
{"", "wiki.chatterino.com", "/:80?foo/bar"},
{"", "wiki.chatterino.com", "/127.0.0.1"},
{"", "a.b.c.chatterino.com"},
@ -156,6 +158,7 @@ TEST(LinkParser, parseIpv4Links)
TEST(LinkParser, doesntParseInvalidIpv4Links)
{
const QStringList inputs = {
"196.162.a.1",
// U+0660 - in category "number digits"
QStringLiteral("٠.٠.٠.٠"),
"https://127.0.0.",
@ -186,6 +189,10 @@ TEST(LinkParser, doesntParseInvalidIpv4Links)
"196.162.8.1(",
"196.162.8.1(!",
"127.1.1;.com",
"127.0.-.1",
"127...",
"1.1.1.",
"1.1.1.:80",
};
for (const auto &input : inputs)
@ -223,6 +230,10 @@ TEST(LinkParser, doesntParseInvalidLinks)
"https://pn./",
"pn./",
"pn.",
"pn.:80",
"pn./foo",
"pn.#foo",
"pn.?foo",
"http/chatterino.com",
"http/wiki.chatterino.com",
"http:cat.com",