2018-06-28 00:24:21 +02:00
|
|
|
#include "common/LinkParser.hpp"
|
|
|
|
|
|
|
|
#include <QFile>
|
|
|
|
#include <QRegularExpression>
|
|
|
|
#include <QString>
|
|
|
|
#include <QTextStream>
|
|
|
|
|
|
|
|
namespace chatterino {
|
|
|
|
|
2018-06-28 00:48:25 +02:00
|
|
|
LinkParser::LinkParser(const QString &unparsedString)
|
2018-06-28 00:24:21 +02:00
|
|
|
{
|
2018-06-28 00:48:25 +02:00
|
|
|
static QRegularExpression linkRegex = [] {
|
2018-06-29 10:22:09 +02:00
|
|
|
static QRegularExpression newLineRegex("\r?\n");
|
2018-06-28 00:24:21 +02:00
|
|
|
QFile tldFile(":/tlds.txt");
|
|
|
|
tldFile.open(QFile::ReadOnly);
|
2018-06-28 00:48:25 +02:00
|
|
|
|
2018-06-28 00:24:21 +02:00
|
|
|
QTextStream t1(&tldFile);
|
|
|
|
t1.setCodec("UTF-8");
|
2018-06-28 00:48:25 +02:00
|
|
|
|
|
|
|
// Read the TLDs in and replace the newlines with pipes
|
2018-06-29 10:22:09 +02:00
|
|
|
QString tldData = t1.readAll().replace(newLineRegex, "|");
|
2018-06-28 00:48:25 +02:00
|
|
|
|
2018-07-11 13:50:05 +02:00
|
|
|
const QString hyperlinkRegExp =
|
2018-06-28 00:24:21 +02:00
|
|
|
"^"
|
2018-07-11 13:50:05 +02:00
|
|
|
// Identifier for spotify
|
|
|
|
"(?x-mi:(spotify:(?:"
|
|
|
|
"(?:artist|album|track|user:[^:]+:playlist):"
|
|
|
|
"[a-zA-Z0-9]+|user:[^:]+|search:"
|
|
|
|
"(?:[-\\w$\\.+!*'(),]+|%[a-fA-F0-9]{2})+)))"
|
|
|
|
// If nothing matches then just go on
|
|
|
|
"|"
|
2018-07-12 18:56:08 +02:00
|
|
|
"^"
|
2018-07-11 13:50:05 +02:00
|
|
|
// Identifier for http and ftp
|
2018-06-28 00:24:21 +02:00
|
|
|
"(?:(?:https?|ftps?)://)?"
|
|
|
|
// user:pass authentication
|
|
|
|
"(?:\\S+(?::\\S*)?@)?"
|
|
|
|
"(?:"
|
|
|
|
// IP address dotted notation octets
|
|
|
|
// excludes loopback network 0.0.0.0
|
|
|
|
// excludes reserved space >= 224.0.0.0
|
|
|
|
// excludes network & broacast addresses
|
|
|
|
// (first & last IP address of each class)
|
|
|
|
"(?:[1-9]\\d?|1\\d\\d|2[01]\\d|22[0-3])"
|
|
|
|
"(?:\\.(?:1?\\d{1,2}|2[0-4]\\d|25[0-5])){2}"
|
|
|
|
"(?:\\.(?:[1-9]\\d?|1\\d\\d|2[0-4]\\d|25[0-4]))"
|
|
|
|
"|"
|
|
|
|
// host name
|
|
|
|
"(?:(?:[_a-z\\x{00a1}-\\x{ffff}0-9]-*)*[a-z\\x{00a1}-\\x{ffff}0-9]+)"
|
|
|
|
// domain name
|
|
|
|
"(?:\\.(?:[a-z\\x{00a1}-\\x{ffff}0-9]-*)*[a-z\\x{00a1}-\\x{ffff}0-9]+)*"
|
|
|
|
// TLD identifier
|
|
|
|
//"(?:\\.(?:[a-z\\x{00a1}-\\x{ffff}]{2,}))"
|
|
|
|
"(?:[\\.](?:" +
|
|
|
|
tldData +
|
|
|
|
"))"
|
|
|
|
"\\.?"
|
|
|
|
")"
|
|
|
|
// port number
|
|
|
|
"(?::\\d{2,5})?"
|
|
|
|
// resource path
|
|
|
|
"(?:[/?#]\\S*)?"
|
|
|
|
"$";
|
|
|
|
|
2018-07-11 13:50:05 +02:00
|
|
|
return QRegularExpression(hyperlinkRegExp, QRegularExpression::CaseInsensitiveOption);
|
2018-06-28 00:48:25 +02:00
|
|
|
}();
|
2018-06-28 00:24:21 +02:00
|
|
|
|
2018-06-28 00:48:25 +02:00
|
|
|
this->match_ = linkRegex.match(unparsedString);
|
2018-06-28 00:24:21 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
} // namespace chatterino
|