mirror of
https://github.com/Chatterino/chatterino2.git
synced 2024-11-13 19:49:51 +01:00
Initial rework of matchLink
This commit is contained in:
parent
6a93aa412e
commit
7fe8ff97e0
|
@ -66,6 +66,7 @@
|
||||||
<file>images/buttons/unban.png</file>
|
<file>images/buttons/unban.png</file>
|
||||||
<file>images/buttons/unmod.png</file>
|
<file>images/buttons/unmod.png</file>
|
||||||
<file>images/emote_dark.svg</file>
|
<file>images/emote_dark.svg</file>
|
||||||
|
<file>tlds.txt</file>
|
||||||
</qresource>
|
</qresource>
|
||||||
<qresource prefix="/qt/etc">
|
<qresource prefix="/qt/etc">
|
||||||
<file>qt.conf</file>
|
<file>qt.conf</file>
|
||||||
|
|
1693
resources/tlds.txt
Normal file
1693
resources/tlds.txt
Normal file
File diff suppressed because it is too large
Load diff
|
@ -43,9 +43,44 @@ void MessageBuilder::appendTimestamp(const QTime &time)
|
||||||
|
|
||||||
QString MessageBuilder::matchLink(const QString &string)
|
QString MessageBuilder::matchLink(const QString &string)
|
||||||
{
|
{
|
||||||
static QRegularExpression linkRegex("[[:ascii:]]*\\.[a-zA-Z]+\\/?[[:ascii:]]*");
|
QFile tldFile(":/tlds.txt");
|
||||||
|
tldFile.open(QFile::ReadOnly);
|
||||||
|
QTextStream t1(&tldFile);
|
||||||
|
t1.setCodec("UTF-8");
|
||||||
|
QString tldData = t1.readAll();
|
||||||
|
tldData.replace("\n", "|");
|
||||||
|
const QString urlRegExp = "^"
|
||||||
|
// protocol identifier
|
||||||
|
"(?:(?:https?|ftps?)://)?"
|
||||||
|
// user:pass authentication
|
||||||
|
"(?:\\S+(?::\\S*)?@)?"
|
||||||
|
"(?:"
|
||||||
|
// IP address dotted notation octets
|
||||||
|
// excludes loopback network 0.0.0.0
|
||||||
|
// excludes reserved space >= 224.0.0.0
|
||||||
|
// excludes network & broacast addresses
|
||||||
|
// (first & last IP address of each class)
|
||||||
|
"(?:[1-9]\\d?|1\\d\\d|2[01]\\d|22[0-3])"
|
||||||
|
"(?:\\.(?:1?\\d{1,2}|2[0-4]\\d|25[0-5])){2}"
|
||||||
|
"(?:\\.(?:[1-9]\\d?|1\\d\\d|2[0-4]\\d|25[0-4]))"
|
||||||
|
"|"
|
||||||
|
// host name
|
||||||
|
"(?:(?:[_a-z\\x{00a1}-\\x{ffff}0-9]-*)*[a-z\\x{00a1}-\\x{ffff}0-9]+)"
|
||||||
|
// domain name
|
||||||
|
"(?:\\.(?:[a-z\\x{00a1}-\\x{ffff}0-9]-*)*[a-z\\x{00a1}-\\x{ffff}0-9]+)*"
|
||||||
|
// TLD identifier
|
||||||
|
//"(?:\\.(?:[a-z\\x{00a1}-\\x{ffff}]{2,}))"
|
||||||
|
"(?:[\\.](?:" + tldData + "))"
|
||||||
|
"\\.?"
|
||||||
|
")"
|
||||||
|
// port number
|
||||||
|
"(?::\\d{2,5})?"
|
||||||
|
// resource path
|
||||||
|
"(?:[/?#]\\S*)?"
|
||||||
|
"$";
|
||||||
|
static QRegularExpression linkRegex(urlRegExp, QRegularExpression::CaseInsensitiveOption);
|
||||||
static QRegularExpression httpRegex("\\bhttps?://");
|
static QRegularExpression httpRegex("\\bhttps?://");
|
||||||
|
static QRegularExpression ftpRegex("\\bftps?://");
|
||||||
auto match = linkRegex.match(string);
|
auto match = linkRegex.match(string);
|
||||||
|
|
||||||
if (!match.hasMatch()) {
|
if (!match.hasMatch()) {
|
||||||
|
@ -55,7 +90,9 @@ QString MessageBuilder::matchLink(const QString &string)
|
||||||
QString captured = match.captured();
|
QString captured = match.captured();
|
||||||
|
|
||||||
if (!captured.contains(httpRegex)) {
|
if (!captured.contains(httpRegex)) {
|
||||||
captured.insert(0, "http://");
|
if (!captured.contains(ftpRegex)) {
|
||||||
|
captured.insert(0, "http://");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return captured;
|
return captured;
|
||||||
|
|
13
tools/get-tlds-update.sh
Normal file
13
tools/get-tlds-update.sh
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
#Download the official list of active TLDs from IANA
|
||||||
|
#Remove the first line that contains data not needed.
|
||||||
|
#Put everything that can be into lowercase.
|
||||||
|
#Output the result to a file.
|
||||||
|
curl -s 'https://data.iana.org/TLD/tlds-alpha-by-domain.txt' | sed -e '1d' -e 's/\(.*\)/\L\1/' > tlds.txt
|
||||||
|
|
||||||
|
#Get the TLDs in punycode format.
|
||||||
|
#Convert the punycode to Unicode.
|
||||||
|
#Append the results to the current file.
|
||||||
|
sed -n -e '/^xn--/p' tlds.txt | idn2 -d >> tlds.txt
|
||||||
|
mv tlds.txt ../resources/tlds.txt
|
Loading…
Reference in a new issue