2018-06-28 00:24:21 +02:00
|
|
|
#include "common/LinkParser.hpp"
|
|
|
|
|
|
|
|
#include <QFile>
|
|
|
|
#include <QRegularExpression>
|
|
|
|
#include <QString>
|
|
|
|
#include <QTextStream>
|
|
|
|
|
|
|
|
namespace chatterino {
|
|
|
|
|
2018-06-28 00:48:25 +02:00
|
|
|
LinkParser::LinkParser(const QString &unparsedString)
|
2018-06-28 00:24:21 +02:00
|
|
|
{
|
2018-06-28 00:48:25 +02:00
|
|
|
static QRegularExpression linkRegex = [] {
|
2018-06-29 10:22:09 +02:00
|
|
|
static QRegularExpression newLineRegex("\r?\n");
|
2018-06-28 00:24:21 +02:00
|
|
|
QFile tldFile(":/tlds.txt");
|
|
|
|
tldFile.open(QFile::ReadOnly);
|
2018-06-28 00:48:25 +02:00
|
|
|
|
2018-06-28 00:24:21 +02:00
|
|
|
QTextStream t1(&tldFile);
|
|
|
|
t1.setCodec("UTF-8");
|
2018-06-28 00:48:25 +02:00
|
|
|
|
|
|
|
// Read the TLDs in and replace the newlines with pipes
|
2018-06-29 10:22:09 +02:00
|
|
|
QString tldData = t1.readAll().replace(newLineRegex, "|");
|
2018-06-28 00:48:25 +02:00
|
|
|
|
2018-06-28 00:24:21 +02:00
|
|
|
const QString urlRegExp =
|
|
|
|
"^"
|
|
|
|
// protocol identifier
|
|
|
|
"(?:(?:https?|ftps?)://)?"
|
|
|
|
// user:pass authentication
|
|
|
|
"(?:\\S+(?::\\S*)?@)?"
|
|
|
|
"(?:"
|
|
|
|
// IP address dotted notation octets
|
|
|
|
// excludes loopback network 0.0.0.0
|
|
|
|
// excludes reserved space >= 224.0.0.0
|
|
|
|
// excludes network & broacast addresses
|
|
|
|
// (first & last IP address of each class)
|
|
|
|
"(?:[1-9]\\d?|1\\d\\d|2[01]\\d|22[0-3])"
|
|
|
|
"(?:\\.(?:1?\\d{1,2}|2[0-4]\\d|25[0-5])){2}"
|
|
|
|
"(?:\\.(?:[1-9]\\d?|1\\d\\d|2[0-4]\\d|25[0-4]))"
|
|
|
|
"|"
|
|
|
|
// host name
|
|
|
|
"(?:(?:[_a-z\\x{00a1}-\\x{ffff}0-9]-*)*[a-z\\x{00a1}-\\x{ffff}0-9]+)"
|
|
|
|
// domain name
|
|
|
|
"(?:\\.(?:[a-z\\x{00a1}-\\x{ffff}0-9]-*)*[a-z\\x{00a1}-\\x{ffff}0-9]+)*"
|
|
|
|
// TLD identifier
|
|
|
|
//"(?:\\.(?:[a-z\\x{00a1}-\\x{ffff}]{2,}))"
|
|
|
|
"(?:[\\.](?:" +
|
|
|
|
tldData +
|
|
|
|
"))"
|
|
|
|
"\\.?"
|
|
|
|
")"
|
|
|
|
// port number
|
|
|
|
"(?::\\d{2,5})?"
|
|
|
|
// resource path
|
|
|
|
"(?:[/?#]\\S*)?"
|
|
|
|
"$";
|
|
|
|
|
2018-06-28 00:48:25 +02:00
|
|
|
return QRegularExpression(urlRegExp, QRegularExpression::CaseInsensitiveOption);
|
|
|
|
}();
|
2018-06-28 00:24:21 +02:00
|
|
|
|
2018-06-28 00:48:25 +02:00
|
|
|
this->match_ = linkRegex.match(unparsedString);
|
2018-06-28 00:24:21 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
} // namespace chatterino
|