mirror of
https://github.com/Chatterino/chatterino2.git
synced 2024-11-21 22:24:07 +01:00
Initial rework of matchLink
This commit is contained in:
parent
6a93aa412e
commit
7fe8ff97e0
4 changed files with 1747 additions and 3 deletions
|
@ -66,6 +66,7 @@
|
|||
<file>images/buttons/unban.png</file>
|
||||
<file>images/buttons/unmod.png</file>
|
||||
<file>images/emote_dark.svg</file>
|
||||
<file>tlds.txt</file>
|
||||
</qresource>
|
||||
<qresource prefix="/qt/etc">
|
||||
<file>qt.conf</file>
|
||||
|
|
1693
resources/tlds.txt
Normal file
1693
resources/tlds.txt
Normal file
File diff suppressed because it is too large
Load diff
|
@ -43,9 +43,44 @@ void MessageBuilder::appendTimestamp(const QTime &time)
|
|||
|
||||
QString MessageBuilder::matchLink(const QString &string)
|
||||
{
|
||||
static QRegularExpression linkRegex("[[:ascii:]]*\\.[a-zA-Z]+\\/?[[:ascii:]]*");
|
||||
QFile tldFile(":/tlds.txt");
|
||||
tldFile.open(QFile::ReadOnly);
|
||||
QTextStream t1(&tldFile);
|
||||
t1.setCodec("UTF-8");
|
||||
QString tldData = t1.readAll();
|
||||
tldData.replace("\n", "|");
|
||||
const QString urlRegExp = "^"
|
||||
// protocol identifier
|
||||
"(?:(?:https?|ftps?)://)?"
|
||||
// user:pass authentication
|
||||
"(?:\\S+(?::\\S*)?@)?"
|
||||
"(?:"
|
||||
// IP address dotted notation octets
|
||||
// excludes loopback network 0.0.0.0
|
||||
// excludes reserved space >= 224.0.0.0
|
||||
// excludes network & broacast addresses
|
||||
// (first & last IP address of each class)
|
||||
"(?:[1-9]\\d?|1\\d\\d|2[01]\\d|22[0-3])"
|
||||
"(?:\\.(?:1?\\d{1,2}|2[0-4]\\d|25[0-5])){2}"
|
||||
"(?:\\.(?:[1-9]\\d?|1\\d\\d|2[0-4]\\d|25[0-4]))"
|
||||
"|"
|
||||
// host name
|
||||
"(?:(?:[_a-z\\x{00a1}-\\x{ffff}0-9]-*)*[a-z\\x{00a1}-\\x{ffff}0-9]+)"
|
||||
// domain name
|
||||
"(?:\\.(?:[a-z\\x{00a1}-\\x{ffff}0-9]-*)*[a-z\\x{00a1}-\\x{ffff}0-9]+)*"
|
||||
// TLD identifier
|
||||
//"(?:\\.(?:[a-z\\x{00a1}-\\x{ffff}]{2,}))"
|
||||
"(?:[\\.](?:" + tldData + "))"
|
||||
"\\.?"
|
||||
")"
|
||||
// port number
|
||||
"(?::\\d{2,5})?"
|
||||
// resource path
|
||||
"(?:[/?#]\\S*)?"
|
||||
"$";
|
||||
static QRegularExpression linkRegex(urlRegExp, QRegularExpression::CaseInsensitiveOption);
|
||||
static QRegularExpression httpRegex("\\bhttps?://");
|
||||
|
||||
static QRegularExpression ftpRegex("\\bftps?://");
|
||||
auto match = linkRegex.match(string);
|
||||
|
||||
if (!match.hasMatch()) {
|
||||
|
@ -55,7 +90,9 @@ QString MessageBuilder::matchLink(const QString &string)
|
|||
QString captured = match.captured();
|
||||
|
||||
if (!captured.contains(httpRegex)) {
|
||||
captured.insert(0, "http://");
|
||||
if (!captured.contains(ftpRegex)) {
|
||||
captured.insert(0, "http://");
|
||||
}
|
||||
}
|
||||
|
||||
return captured;
|
||||
|
|
13
tools/get-tlds-update.sh
Normal file
13
tools/get-tlds-update.sh
Normal file
|
@ -0,0 +1,13 @@
|
|||
#!/bin/sh
|
||||
|
||||
#Download the official list of active TLDs from IANA
|
||||
#Remove the first line that contains data not needed.
|
||||
#Put everything that can be into lowercase.
|
||||
#Output the result to a file.
|
||||
curl -s 'https://data.iana.org/TLD/tlds-alpha-by-domain.txt' | sed -e '1d' -e 's/\(.*\)/\L\1/' > tlds.txt
|
||||
|
||||
#Get the TLDs in punycode format.
|
||||
#Convert the punycode to Unicode.
|
||||
#Append the results to the current file.
|
||||
sed -n -e '/^xn--/p' tlds.txt | idn2 -d >> tlds.txt
|
||||
mv tlds.txt ../resources/tlds.txt
|
Loading…
Reference in a new issue