mirror of
https://github.com/Chatterino/chatterino2.git
synced 2024-11-13 19:49:51 +01:00
refactor: move zero width replacement to a function (#5594)
This commit is contained in:
parent
e149be3820
commit
d0bcf35fdc
|
@ -90,6 +90,7 @@
|
|||
- Dev: The timer for `StreamerMode` is now destroyed on the correct thread. (#5571)
|
||||
- Dev: Cleanup some parts of the `magic_enum` adaptation for Qt. (#5587)
|
||||
- Dev: Refactored `static`s in headers to only be present once in the final app. (#5588)
|
||||
- Dev: Refactored legacy Unicode zero-width-joiner replacement. (#5594)
|
||||
- Dev: The JSON output when copying a message (<kbd>SHIFT</kbd> + right-click) is now more extensive. (#5600)
|
||||
|
||||
## 2.5.1
|
||||
|
|
|
@ -1,22 +1,13 @@
|
|||
#include "providers/recentmessages/Impl.hpp"
|
||||
|
||||
#include "common/Env.hpp"
|
||||
#include "common/QLogging.hpp"
|
||||
#include "messages/MessageBuilder.hpp"
|
||||
#include "providers/twitch/IrcMessageHandler.hpp"
|
||||
#include "providers/twitch/TwitchChannel.hpp"
|
||||
#include "util/FormatTime.hpp"
|
||||
#include "util/Helpers.hpp"
|
||||
|
||||
#include <QJsonArray>
|
||||
#include <QUrlQuery>
|
||||
|
||||
namespace {
|
||||
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
|
||||
const auto &LOG = chatterinoRecentMessages;
|
||||
|
||||
} // namespace
|
||||
|
||||
namespace chatterino::recentmessages::detail {
|
||||
|
||||
// Parse the IRC messages returned in JSON form into Communi messages
|
||||
|
@ -33,11 +24,7 @@ std::vector<Communi::IrcMessage *> parseRecentMessages(
|
|||
|
||||
for (const auto &jsonMessage : jsonMessages)
|
||||
{
|
||||
auto content = jsonMessage.toString();
|
||||
|
||||
// For explanation of why this exists, see src/providers/twitch/TwitchChannel.hpp,
|
||||
// where these constants are defined
|
||||
content.replace(COMBINED_FIXER, ZERO_WIDTH_JOINER);
|
||||
auto content = unescapeZeroWidthJoiner(jsonMessage.toString());
|
||||
|
||||
auto *message =
|
||||
Communi::IrcMessage::fromData(content.toUtf8(), nullptr);
|
||||
|
|
|
@ -702,15 +702,8 @@ void IrcMessageHandler::handlePrivMessage(Communi::IrcPrivateMessage *message,
|
|||
}
|
||||
}
|
||||
|
||||
// This is for compatibility with older Chatterino versions. Twitch didn't use
|
||||
// to allow ZERO WIDTH JOINER unicode character, so Chatterino used ESCAPE_TAG
|
||||
// instead.
|
||||
// See https://github.com/Chatterino/chatterino2/issues/3384 and
|
||||
// https://mm2pl.github.io/emoji_rfc.pdf for more details
|
||||
this->addMessage(
|
||||
message, chan,
|
||||
message->content().replace(COMBINED_FIXER, ZERO_WIDTH_JOINER),
|
||||
twitchServer, false, message->isAction());
|
||||
this->addMessage(message, chan, unescapeZeroWidthJoiner(message->content()),
|
||||
twitchServer, false, message->isAction());
|
||||
|
||||
if (message->tags().contains(u"pinned-chat-paid-amount"_s))
|
||||
{
|
||||
|
@ -915,10 +908,9 @@ void IrcMessageHandler::handleWhisperMessage(Communi::IrcMessage *ircMessage)
|
|||
|
||||
auto *c = getApp()->getTwitch()->getWhispersChannel().get();
|
||||
|
||||
MessageBuilder builder(
|
||||
c, ircMessage, args,
|
||||
ircMessage->parameter(1).replace(COMBINED_FIXER, ZERO_WIDTH_JOINER),
|
||||
false);
|
||||
MessageBuilder builder(c, ircMessage, args,
|
||||
unescapeZeroWidthJoiner(ircMessage->parameter(1)),
|
||||
false);
|
||||
|
||||
if (builder.isIgnored())
|
||||
{
|
||||
|
|
|
@ -27,24 +27,6 @@
|
|||
|
||||
namespace chatterino {
|
||||
|
||||
// This is for compatibility with older Chatterino versions. Twitch didn't use
|
||||
// to allow ZERO WIDTH JOINER unicode character, so Chatterino used ESCAPE_TAG
|
||||
// instead.
|
||||
// See https://github.com/Chatterino/chatterino2/issues/3384 and
|
||||
// https://mm2pl.github.io/emoji_rfc.pdf for more details
|
||||
const QString ZERO_WIDTH_JOINER = QString(QChar(0x200D));
|
||||
|
||||
// Here be MSVC: Do NOT replace with "\U" literal, it will fail silently.
|
||||
namespace {
|
||||
const QChar ESCAPE_TAG_CHARS[2] = {QChar::highSurrogate(0xE0002),
|
||||
QChar::lowSurrogate(0xE0002)};
|
||||
}
|
||||
const QString ESCAPE_TAG = QString(ESCAPE_TAG_CHARS, 2);
|
||||
|
||||
const static QRegularExpression COMBINED_FIXER(
|
||||
QString("(?<!%1)%1").arg(ESCAPE_TAG),
|
||||
QRegularExpression::UseUnicodePropertiesOption);
|
||||
|
||||
enum class HighlightState;
|
||||
|
||||
struct Emote;
|
||||
|
|
|
@ -7,6 +7,18 @@
|
|||
#include <QRegularExpression>
|
||||
#include <QUuid>
|
||||
|
||||
namespace {
|
||||
|
||||
const QString ZERO_WIDTH_JOINER = QStringLiteral("\u200D");
|
||||
|
||||
// Note: \U requires /utf-8 for MSVC
|
||||
// See https://mm2pl.github.io/emoji_rfc.pdf
|
||||
const QRegularExpression ESCAPE_TAG_REGEX(
|
||||
QStringLiteral("(?<!\U000E0002)\U000E0002"),
|
||||
QRegularExpression::UseUnicodePropertiesOption);
|
||||
|
||||
} // namespace
|
||||
|
||||
namespace chatterino {
|
||||
|
||||
namespace helpers::detail {
|
||||
|
@ -283,4 +295,10 @@ bool compareEmoteStrings(const QString &a, const QString &b)
|
|||
return k < 0;
|
||||
}
|
||||
|
||||
QString unescapeZeroWidthJoiner(QString escaped)
|
||||
{
|
||||
escaped.replace(ESCAPE_TAG_REGEX, ZERO_WIDTH_JOINER);
|
||||
return escaped;
|
||||
}
|
||||
|
||||
} // namespace chatterino
|
||||
|
|
|
@ -182,4 +182,11 @@ constexpr std::optional<std::decay_t<T>> makeConditionedOptional(bool condition,
|
|||
return std::nullopt;
|
||||
}
|
||||
|
||||
/// @brief Unescapes zero width joiners (ZWJ; U+200D) from Twitch messages
|
||||
///
|
||||
/// Older Chatterino versions escape ZWJ with an ESCAPE TAG (U+E0002), following
|
||||
/// https://mm2pl.github.io/emoji_rfc.pdf. This function unescapes all tags with
|
||||
/// a ZWJ. See also: https://github.com/Chatterino/chatterino2/issues/3384.
|
||||
QString unescapeZeroWidthJoiner(QString escaped);
|
||||
|
||||
} // namespace chatterino
|
||||
|
|
|
@ -2,6 +2,8 @@
|
|||
|
||||
#include "Test.hpp"
|
||||
|
||||
#include <span>
|
||||
|
||||
using namespace chatterino;
|
||||
using namespace helpers::detail;
|
||||
|
||||
|
@ -500,3 +502,57 @@ TEST(Helpers, parseDurationToSeconds)
|
|||
<< c.output;
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Helpers, unescapeZeroWidthJoiner)
|
||||
{
|
||||
struct TestCase {
|
||||
QStringView input;
|
||||
QStringView output;
|
||||
};
|
||||
|
||||
std::vector<TestCase> tests{
|
||||
{u"foo bar", u"foo bar"},
|
||||
{u"", u""},
|
||||
{u"a", u"a"},
|
||||
{u"\U000E0002", u"\u200D"},
|
||||
{u"foo\U000E0002bar", u"foo\u200Dbar"},
|
||||
{u"foo \U000E0002 bar", u"foo \u200D bar"},
|
||||
{u"\U0001F468\U000E0002\U0001F33E", u"\U0001F468\u200D\U0001F33E"},
|
||||
// don't replace ZWJ
|
||||
{u"\U0001F468\u200D\U0001F33E", u"\U0001F468\u200D\U0001F33E"},
|
||||
// only replace the first escape tag in sequences
|
||||
{
|
||||
u"\U0001F468\U000E0002\U000E0002\U0001F33E",
|
||||
u"\U0001F468\u200D\U000E0002\U0001F33E",
|
||||
},
|
||||
{
|
||||
u"\U0001F468\U000E0002\U000E0002\U000E0002\U0001F33E",
|
||||
u"\U0001F468\u200D\U000E0002\U000E0002\U0001F33E",
|
||||
},
|
||||
};
|
||||
|
||||
// sanity check that the compiler supports unicode string literals
|
||||
static_assert(
|
||||
[] {
|
||||
constexpr std::span zwj = u"\u200D";
|
||||
static_assert(zwj.size() == 2);
|
||||
static_assert(zwj[0] == u'\x200D');
|
||||
static_assert(zwj[1] == u'\0');
|
||||
|
||||
constexpr std::span escapeTag = u"\U000E0002";
|
||||
static_assert(escapeTag.size() == 3);
|
||||
static_assert(escapeTag[0] == u'\xDB40');
|
||||
static_assert(escapeTag[1] == u'\xDC02');
|
||||
static_assert(escapeTag[2] == u'\0');
|
||||
|
||||
return true;
|
||||
}(),
|
||||
"The compiler must support Unicode string literals");
|
||||
|
||||
for (const auto &c : tests)
|
||||
{
|
||||
const auto actual = unescapeZeroWidthJoiner(c.input.toString());
|
||||
|
||||
EXPECT_EQ(actual, c.output);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue