mirror of
https://github.com/Chatterino/chatterino2.git
synced 2024-11-13 19:49:51 +01:00
chore: improve link parser and its tests a bit (#5522)
This commit is contained in:
parent
a0b70b8c5e
commit
a2cbe6377d
|
@ -1,17 +1,24 @@
|
||||||
#define QT_NO_CAST_FROM_ASCII // avoids unexpected implicit casts
|
#define QT_NO_CAST_FROM_ASCII // avoids unexpected implicit casts
|
||||||
#include "common/LinkParser.hpp"
|
#include "common/LinkParser.hpp"
|
||||||
|
|
||||||
|
#include "util/QCompareCaseInsensitive.hpp"
|
||||||
|
|
||||||
#include <QFile>
|
#include <QFile>
|
||||||
#include <QSet>
|
|
||||||
#include <QString>
|
#include <QString>
|
||||||
#include <QStringView>
|
#include <QStringView>
|
||||||
#include <QTextStream>
|
#include <QTextStream>
|
||||||
|
|
||||||
|
#include <set>
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
QSet<QString> &tlds()
|
using namespace chatterino;
|
||||||
|
|
||||||
|
using TldSet = std::set<QString, QCompareCaseInsensitive>;
|
||||||
|
|
||||||
|
TldSet &tlds()
|
||||||
{
|
{
|
||||||
static QSet<QString> tlds = [] {
|
static TldSet tlds = [] {
|
||||||
QFile file(QStringLiteral(":/tlds.txt"));
|
QFile file(QStringLiteral(":/tlds.txt"));
|
||||||
file.open(QFile::ReadOnly);
|
file.open(QFile::ReadOnly);
|
||||||
QTextStream stream(&file);
|
QTextStream stream(&file);
|
||||||
|
@ -21,19 +28,12 @@ QSet<QString> &tlds()
|
||||||
#else
|
#else
|
||||||
stream.setCodec("UTF-8");
|
stream.setCodec("UTF-8");
|
||||||
#endif
|
#endif
|
||||||
int safetyMax = 20000;
|
|
||||||
|
|
||||||
QSet<QString> set;
|
TldSet set;
|
||||||
|
|
||||||
while (!stream.atEnd())
|
while (!stream.atEnd())
|
||||||
{
|
{
|
||||||
auto line = stream.readLine();
|
set.emplace(stream.readLine());
|
||||||
set.insert(line);
|
|
||||||
|
|
||||||
if (safetyMax-- == 0)
|
|
||||||
{
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return set;
|
return set;
|
||||||
|
@ -43,7 +43,7 @@ QSet<QString> &tlds()
|
||||||
|
|
||||||
bool isValidTld(QStringView tld)
|
bool isValidTld(QStringView tld)
|
||||||
{
|
{
|
||||||
return tlds().contains(tld.toString().toLower());
|
return tlds().contains(tld);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool isValidIpv4(QStringView host)
|
bool isValidIpv4(QStringView host)
|
||||||
|
@ -166,6 +166,8 @@ namespace chatterino::linkparser {
|
||||||
|
|
||||||
std::optional<Parsed> parse(const QString &source) noexcept
|
std::optional<Parsed> parse(const QString &source) noexcept
|
||||||
{
|
{
|
||||||
|
using SizeType = QString::size_type;
|
||||||
|
|
||||||
std::optional<Parsed> result;
|
std::optional<Parsed> result;
|
||||||
// This is not implemented with a regex to increase performance.
|
// This is not implemented with a regex to increase performance.
|
||||||
|
|
||||||
|
@ -201,11 +203,11 @@ std::optional<Parsed> parse(const QString &source) noexcept
|
||||||
QStringView host = remaining;
|
QStringView host = remaining;
|
||||||
QStringView rest;
|
QStringView rest;
|
||||||
bool lastWasDot = true;
|
bool lastWasDot = true;
|
||||||
int lastDotPos = -1;
|
SizeType lastDotPos = -1;
|
||||||
int nDots = 0;
|
SizeType nDots = 0;
|
||||||
|
|
||||||
// Extract the host
|
// Extract the host
|
||||||
for (int i = 0; i < remaining.size(); i++)
|
for (SizeType i = 0; i < remaining.size(); i++)
|
||||||
{
|
{
|
||||||
char16_t currentChar = remaining[i].unicode();
|
char16_t currentChar = remaining[i].unicode();
|
||||||
if (currentChar == u'.')
|
if (currentChar == u'.')
|
||||||
|
|
|
@ -72,6 +72,8 @@ TEST(LinkParser, parseDomainLinks)
|
||||||
{"", "chatterino.com", ":80"},
|
{"", "chatterino.com", ":80"},
|
||||||
{"", "wiki.chatterino.com", ":80"},
|
{"", "wiki.chatterino.com", ":80"},
|
||||||
{"", "wiki.chatterino.com", ":80/foo/bar"},
|
{"", "wiki.chatterino.com", ":80/foo/bar"},
|
||||||
|
{"", "wiki.chatterino.com", ":80?foo"},
|
||||||
|
{"", "wiki.chatterino.com", ":80#foo"},
|
||||||
{"", "wiki.chatterino.com", "/:80?foo/bar"},
|
{"", "wiki.chatterino.com", "/:80?foo/bar"},
|
||||||
{"", "wiki.chatterino.com", "/127.0.0.1"},
|
{"", "wiki.chatterino.com", "/127.0.0.1"},
|
||||||
{"", "a.b.c.chatterino.com"},
|
{"", "a.b.c.chatterino.com"},
|
||||||
|
@ -156,6 +158,7 @@ TEST(LinkParser, parseIpv4Links)
|
||||||
TEST(LinkParser, doesntParseInvalidIpv4Links)
|
TEST(LinkParser, doesntParseInvalidIpv4Links)
|
||||||
{
|
{
|
||||||
const QStringList inputs = {
|
const QStringList inputs = {
|
||||||
|
"196.162.a.1",
|
||||||
// U+0660 - in category "number digits"
|
// U+0660 - in category "number digits"
|
||||||
QStringLiteral("٠.٠.٠.٠"),
|
QStringLiteral("٠.٠.٠.٠"),
|
||||||
"https://127.0.0.",
|
"https://127.0.0.",
|
||||||
|
@ -186,6 +189,10 @@ TEST(LinkParser, doesntParseInvalidIpv4Links)
|
||||||
"196.162.8.1(",
|
"196.162.8.1(",
|
||||||
"196.162.8.1(!",
|
"196.162.8.1(!",
|
||||||
"127.1.1;.com",
|
"127.1.1;.com",
|
||||||
|
"127.0.-.1",
|
||||||
|
"127...",
|
||||||
|
"1.1.1.",
|
||||||
|
"1.1.1.:80",
|
||||||
};
|
};
|
||||||
|
|
||||||
for (const auto &input : inputs)
|
for (const auto &input : inputs)
|
||||||
|
@ -223,6 +230,10 @@ TEST(LinkParser, doesntParseInvalidLinks)
|
||||||
"https://pn./",
|
"https://pn./",
|
||||||
"pn./",
|
"pn./",
|
||||||
"pn.",
|
"pn.",
|
||||||
|
"pn.:80",
|
||||||
|
"pn./foo",
|
||||||
|
"pn.#foo",
|
||||||
|
"pn.?foo",
|
||||||
"http/chatterino.com",
|
"http/chatterino.com",
|
||||||
"http/wiki.chatterino.com",
|
"http/wiki.chatterino.com",
|
||||||
"http:cat.com",
|
"http:cat.com",
|
||||||
|
|
Loading…
Reference in a new issue