diff --git a/src/common/LinkParser.cpp b/src/common/LinkParser.cpp index 78a0e80c5..03b85a254 100644 --- a/src/common/LinkParser.cpp +++ b/src/common/LinkParser.cpp @@ -1,30 +1,204 @@ #include "common/LinkParser.hpp" #include +#include #include #include +#include #include namespace chatterino { +namespace { + QSet &tlds() + { + static QSet tlds = [] { + QFile file(":/tlds.txt"); + file.open(QFile::ReadOnly); + QTextStream stream(&file); + stream.setCodec("UTF-8"); + int safetyMax = 20000; + + QSet set; + + while (!stream.atEnd()) + { + auto line = stream.readLine(); + set.insert(line); + + if (safetyMax-- == 0) + break; + } + + return set; + }(); + return tlds; + } + + bool isValidHostname(QStringRef &host) + { + int index = host.lastIndexOf('.'); + + return index != -1 && + tlds().contains(host.mid(index + 1).toString().toLower()); + } + + bool isValidIpv4(QStringRef &host) + { + static auto exp = QRegularExpression("^\\d{1,3}(?:\\.\\d{1,3}){3}$"); + + return exp.match(host).hasMatch(); + } + +#ifdef C_MATCH_IPV6_LINK + bool isValidIpv6(QStringRef &host) + { + static auto exp = QRegularExpression("^\\[[a-f0-9:%]+\\]$"); + + return exp.match(host).hasMatch(); + } +#endif +} // namespace LinkParser::LinkParser(const QString &unparsedString) { - static QRegularExpression linkRegex( - "^(?:http(s)?:\\/\\/)?[\\w.-]+(?:\\.[\\w\\.-]+)+[\\w\\-\\._~:/" - "?#[\\]@!\\$&'\\(\\)\\*\\+,;=.]+$", - QRegularExpression::CaseInsensitiveOption); + this->match_ = unparsedString; - this->match_ = linkRegex.match(unparsedString); + // This is not implemented with a regex to increase performance. + // We keep removing parts of the url until there's either nothing left or we fail. + QStringRef l(&unparsedString); + + bool hasHttp = false; + + // Protocol `https?://` + if (l.startsWith("https://")) + { + hasHttp = true; + l = l.mid(8); + } + else if (l.startsWith("http://")) + { + hasHttp = true; + l = l.mid(7); + } + + // Http basic auth `user:password`. + // Not supported for security reasons (misleading links) + + // Host `a.b.c.com` + QStringRef host = l; + bool lastWasDot = true; + bool inIpv6 = false; + + for (int i = 0; i < l.size(); i++) + { + if (l[i] == '.') + { + if (lastWasDot == true) // no double dots .. + goto error; + lastWasDot = true; + } + else + { + lastWasDot = false; + } + + if (l[i] == ':' && !inIpv6) + { + host = l.mid(0, i); + l = l.mid(i + 1); + goto parsePort; + } + else if (l[i] == '/') + { + host = l.mid(0, i); + l = l.mid(i + 1); + goto parsePath; + } + else if (l[i] == '?') + { + host = l.mid(0, i); + l = l.mid(i + 1); + goto parseQuery; + } + else if (l[i] == '#') + { + host = l.mid(0, i); + l = l.mid(i + 1); + goto parseAnchor; + } + + // ipv6 + if (l[i] == '[') + { + if (i == 0) + inIpv6 = true; + else + goto error; + } + else if (l[i] == ']') + { + inIpv6 = false; + } + } + + if (lastWasDot) + goto error; + else + goto done; + +parsePort: + // Port `:12345` + for (int i = 0; i < std::min(5, l.size()); i++) + { + if (l[i] == '/') + goto parsePath; + else if (l[i] == '?') + goto parseQuery; + else if (l[i] == '#') + goto parseAnchor; + + if (!l[i].isDigit()) + goto error; + } + + goto done; + +parsePath: +parseQuery: +parseAnchor: + // we accept everything in the path/query/anchor + +done: + // check host + if (this->hasMatch_) + { + this->hasMatch_ = isValidHostname(host) || isValidIpv4(host) +#ifdef C_MATCH_IPV6_LINK + + || (hasHttp && isValidIpv6(host)) +#endif + ; + + if (this->hasMatch_) + { + this->match_ = unparsedString; + } + } + + return; + +error: + hasMatch_ = false; } bool LinkParser::hasMatch() const { - return this->match_.hasMatch(); + return this->hasMatch_; } QString LinkParser::getCaptured() const { - return this->match_.captured(); + return this->match_; } } // namespace chatterino diff --git a/src/common/LinkParser.hpp b/src/common/LinkParser.hpp index 2d56cbde0..d72e1139c 100644 --- a/src/common/LinkParser.hpp +++ b/src/common/LinkParser.hpp @@ -14,7 +14,8 @@ public: QString getCaptured() const; private: - QRegularExpressionMatch match_; + bool hasMatch_; + QString match_; }; } // namespace chatterino