From a2cbe6377dabd59c025d5bdf46c956a334b274a6 Mon Sep 17 00:00:00 2001 From: nerix Date: Tue, 23 Jul 2024 23:38:17 +0200 Subject: [PATCH] chore: improve link parser and its tests a bit (#5522) --- src/common/LinkParser.cpp | 34 ++++++++++++++++++---------------- tests/src/LinkParser.cpp | 11 +++++++++++ 2 files changed, 29 insertions(+), 16 deletions(-) diff --git a/src/common/LinkParser.cpp b/src/common/LinkParser.cpp index 6858ff902..c6ec1ab2d 100644 --- a/src/common/LinkParser.cpp +++ b/src/common/LinkParser.cpp @@ -1,17 +1,24 @@ #define QT_NO_CAST_FROM_ASCII // avoids unexpected implicit casts #include "common/LinkParser.hpp" +#include "util/QCompareCaseInsensitive.hpp" + #include -#include #include #include #include +#include + namespace { -QSet &tlds() +using namespace chatterino; + +using TldSet = std::set; + +TldSet &tlds() { - static QSet tlds = [] { + static TldSet tlds = [] { QFile file(QStringLiteral(":/tlds.txt")); file.open(QFile::ReadOnly); QTextStream stream(&file); @@ -21,19 +28,12 @@ QSet &tlds() #else stream.setCodec("UTF-8"); #endif - int safetyMax = 20000; - QSet set; + TldSet set; while (!stream.atEnd()) { - auto line = stream.readLine(); - set.insert(line); - - if (safetyMax-- == 0) - { - break; - } + set.emplace(stream.readLine()); } return set; @@ -43,7 +43,7 @@ QSet &tlds() bool isValidTld(QStringView tld) { - return tlds().contains(tld.toString().toLower()); + return tlds().contains(tld); } bool isValidIpv4(QStringView host) @@ -166,6 +166,8 @@ namespace chatterino::linkparser { std::optional parse(const QString &source) noexcept { + using SizeType = QString::size_type; + std::optional result; // This is not implemented with a regex to increase performance. @@ -201,11 +203,11 @@ std::optional parse(const QString &source) noexcept QStringView host = remaining; QStringView rest; bool lastWasDot = true; - int lastDotPos = -1; - int nDots = 0; + SizeType lastDotPos = -1; + SizeType nDots = 0; // Extract the host - for (int i = 0; i < remaining.size(); i++) + for (SizeType i = 0; i < remaining.size(); i++) { char16_t currentChar = remaining[i].unicode(); if (currentChar == u'.') diff --git a/tests/src/LinkParser.cpp b/tests/src/LinkParser.cpp index 67fdff0b0..32a5ad340 100644 --- a/tests/src/LinkParser.cpp +++ b/tests/src/LinkParser.cpp @@ -72,6 +72,8 @@ TEST(LinkParser, parseDomainLinks) {"", "chatterino.com", ":80"}, {"", "wiki.chatterino.com", ":80"}, {"", "wiki.chatterino.com", ":80/foo/bar"}, + {"", "wiki.chatterino.com", ":80?foo"}, + {"", "wiki.chatterino.com", ":80#foo"}, {"", "wiki.chatterino.com", "/:80?foo/bar"}, {"", "wiki.chatterino.com", "/127.0.0.1"}, {"", "a.b.c.chatterino.com"}, @@ -156,6 +158,7 @@ TEST(LinkParser, parseIpv4Links) TEST(LinkParser, doesntParseInvalidIpv4Links) { const QStringList inputs = { + "196.162.a.1", // U+0660 - in category "number digits" QStringLiteral("٠.٠.٠.٠"), "https://127.0.0.", @@ -186,6 +189,10 @@ TEST(LinkParser, doesntParseInvalidIpv4Links) "196.162.8.1(", "196.162.8.1(!", "127.1.1;.com", + "127.0.-.1", + "127...", + "1.1.1.", + "1.1.1.:80", }; for (const auto &input : inputs) @@ -223,6 +230,10 @@ TEST(LinkParser, doesntParseInvalidLinks) "https://pn./", "pn./", "pn.", + "pn.:80", + "pn./foo", + "pn.#foo", + "pn.?foo", "http/chatterino.com", "http/wiki.chatterino.com", "http:cat.com",