Fix emoji unified/non-qualified version for sending & parsing (#4840)

Co-authored-by: nerix <nerixdev@outlook.de>
This commit is contained in:
pajlada 2023-10-07 12:21:30 +02:00 committed by GitHub
parent ab4a0c054a
commit 774eaa14ce
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 269 additions and 116 deletions

View file

@ -6,6 +6,7 @@
- Minor: The account switcher is now styled to match your theme. (#4817) - Minor: The account switcher is now styled to match your theme. (#4817)
- Minor: Add an invisible resize handle to the bottom of frameless user info popups and reply thread popups. (#4795) - Minor: Add an invisible resize handle to the bottom of frameless user info popups and reply thread popups. (#4795)
- Minor: The installer now checks for the VC Runtime version and shows more info when it's outdated. (#4847) - Minor: The installer now checks for the VC Runtime version and shows more info when it's outdated. (#4847)
- Bugfix: Fixed an issue where certain emojis did not send to Twitch chat correctly. (#4840)
- Bugfix: Fixed capitalized channel names in log inclusion list not being logged. (#4848) - Bugfix: Fixed capitalized channel names in log inclusion list not being logged. (#4848)
- Bugfix: Trimmed custom streamlink paths on all platforms making sure you don't accidentally add spaces at the beginning or end of its path. (#4834) - Bugfix: Trimmed custom streamlink paths on all platforms making sure you don't accidentally add spaces at the beginning or end of its path. (#4834)
- Bugfix: Fixed a performance issue when displaying replies to certain messages. (#4807) - Bugfix: Fixed a performance issue when displaying replies to certain messages. (#4807)

View file

@ -55,3 +55,120 @@ static void BM_ShortcodeParsing(benchmark::State &state)
} }
BENCHMARK(BM_ShortcodeParsing); BENCHMARK(BM_ShortcodeParsing);
static void BM_EmojiParsing(benchmark::State &state)
{
Emojis emojis;
emojis.load();
struct TestCase {
QString input;
std::vector<boost::variant<EmotePtr, QString>> expectedOutput;
};
const auto &emojiMap = emojis.getEmojis();
std::shared_ptr<EmojiData> penguin;
emojiMap.tryGet("1F427", penguin);
auto penguinEmoji = penguin->emote;
std::vector<TestCase> tests{
{
// 1 emoji
"foo 🐧 bar",
// expected output
{
"foo ",
penguinEmoji,
" bar",
},
},
{
// no emoji
"foo bar",
// expected output
{
"foo bar",
},
},
{
// many emoji
"foo 🐧 bar 🐧🐧🐧🐧🐧",
// expected output
{
"foo ",
penguinEmoji,
" bar ",
penguinEmoji,
penguinEmoji,
penguinEmoji,
penguinEmoji,
penguinEmoji,
},
},
};
for (auto _ : state)
{
for (const auto &test : tests)
{
auto output = emojis.parse(test.input);
bool areEqual = std::equal(output.begin(), output.end(),
test.expectedOutput.begin());
if (!areEqual)
{
qDebug() << "BAD BENCH";
for (const auto &v : output)
{
if (v.type() == typeid(QString))
{
qDebug() << "output:" << boost::get<QString>(v);
}
}
}
}
}
}
BENCHMARK(BM_EmojiParsing);
template <class... Args>
static void BM_EmojiParsing2(benchmark::State &state, Args &&...args)
{
Emojis emojis;
emojis.load();
auto argsTuple = std::make_tuple(std::move(args)...);
auto input = std::get<0>(argsTuple);
auto expectedNumEmojis = std::get<1>(argsTuple);
for (auto _ : state)
{
auto output = emojis.parse(input);
int actualNumEmojis = 0;
for (const auto &part : output)
{
if (part.type() == typeid(EmotePtr))
{
++actualNumEmojis;
}
}
if (actualNumEmojis != expectedNumEmojis)
{
qDebug() << "BAD BENCH, EXPECTED NUM EMOJIS IS WRONG"
<< actualNumEmojis;
}
}
}
BENCHMARK_CAPTURE(BM_EmojiParsing2, one_emoji, "foo 🐧 bar", 1);
BENCHMARK_CAPTURE(BM_EmojiParsing2, two_emoji, "foo 🐧 bar 🐧", 2);
BENCHMARK_CAPTURE(
BM_EmojiParsing2, many_emoji,
"😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 "
"😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 "
"😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 ",
61);

View file

@ -12,22 +12,23 @@
#include <rapidjson/error/error.h> #include <rapidjson/error/error.h>
#include <rapidjson/rapidjson.h> #include <rapidjson/rapidjson.h>
#include <array> #include <map>
#include <memory> #include <memory>
namespace chatterino {
namespace { namespace {
auto toneNames = std::map<QString, QString>{ using namespace chatterino;
const std::map<QString, QString> TONE_NAMES{
{"1F3FB", "tone1"}, {"1F3FC", "tone2"}, {"1F3FD", "tone3"}, {"1F3FB", "tone1"}, {"1F3FC", "tone2"}, {"1F3FD", "tone3"},
{"1F3FE", "tone4"}, {"1F3FF", "tone5"}, {"1F3FE", "tone4"}, {"1F3FF", "tone5"},
}; };
void parseEmoji(const std::shared_ptr<EmojiData> &emojiData, void parseEmoji(const std::shared_ptr<EmojiData> &emojiData,
const rapidjson::Value &unparsedEmoji, const rapidjson::Value &unparsedEmoji,
QString shortCode = QString()) const QString &shortCode = {})
{ {
std::array<uint32_t, 9> unicodeBytes{}; std::vector<uint32_t> unicodeBytes{};
struct { struct {
bool apple; bool apple;
@ -42,16 +43,17 @@ namespace {
} }
else else
{ {
const auto &shortCodes = unparsedEmoji["short_names"]; // Load short codes from the suggested short_names
for (const auto &_shortCode : shortCodes.GetArray()) const auto &shortNames = unparsedEmoji["short_names"];
for (const auto &shortName : shortNames.GetArray())
{ {
emojiData->shortCodes.emplace_back(_shortCode.GetString()); emojiData->shortCodes.emplace_back(shortName.GetString());
} }
} }
rj::getSafe(unparsedEmoji, "non_qualified", rj::getSafe(unparsedEmoji, "non_qualified", emojiData->nonQualifiedCode);
emojiData->nonQualifiedCode);
rj::getSafe(unparsedEmoji, "unified", emojiData->unifiedCode); rj::getSafe(unparsedEmoji, "unified", emojiData->unifiedCode);
assert(!emojiData->unifiedCode.isEmpty());
rj::getSafe(unparsedEmoji, "has_img_apple", capabilities.apple); rj::getSafe(unparsedEmoji, "has_img_apple", capabilities.apple);
rj::getSafe(unparsedEmoji, "has_img_google", capabilities.google); rj::getSafe(unparsedEmoji, "has_img_google", capabilities.google);
@ -75,31 +77,48 @@ namespace {
emojiData->capabilities.insert("Facebook"); emojiData->capabilities.insert("Facebook");
} }
QStringList unicodeCharacters; QStringList unicodeCharacters = emojiData->unifiedCode.toLower().split('-');
if (!emojiData->nonQualifiedCode.isEmpty())
{
unicodeCharacters =
emojiData->nonQualifiedCode.toLower().split('-');
}
else
{
unicodeCharacters = emojiData->unifiedCode.toLower().split('-');
}
if (unicodeCharacters.length() < 1)
{
return;
}
int numUnicodeBytes = 0;
for (const QString &unicodeCharacter : unicodeCharacters) for (const QString &unicodeCharacter : unicodeCharacters)
{ {
unicodeBytes.at(numUnicodeBytes++) = bool ok{false};
QString(unicodeCharacter).toUInt(nullptr, 16); unicodeBytes.push_back(QString(unicodeCharacter).toUInt(&ok, 16));
if (!ok)
{
qCWarning(chatterinoEmoji)
<< "Failed to parse emoji" << emojiData->shortCodes;
return;
}
} }
emojiData->value = // We can safely do a narrowing static cast since unicodeBytes will never be a large number
QString::fromUcs4(unicodeBytes.data(), numUnicodeBytes); emojiData->value = QString::fromUcs4(unicodeBytes.data(),
static_cast<int>(unicodeBytes.size()));
if (!emojiData->nonQualifiedCode.isEmpty())
{
QStringList nonQualifiedCharacters =
emojiData->nonQualifiedCode.toLower().split('-');
std::vector<uint32_t> nonQualifiedBytes{};
for (const QString &unicodeCharacter : nonQualifiedCharacters)
{
bool ok{false};
nonQualifiedBytes.push_back(
QString(unicodeCharacter).toUInt(&ok, 16));
if (!ok)
{
qCWarning(chatterinoEmoji)
<< "Failed to parse emoji nonQualified"
<< emojiData->shortCodes;
return;
}
}
// We can safely do a narrowing static cast since unicodeBytes will never be a large number
emojiData->nonQualified =
QString::fromUcs4(nonQualifiedBytes.data(),
static_cast<int>(nonQualifiedBytes.size()));
}
} }
// getToneNames takes a tones and returns their names in the same order // getToneNames takes a tones and returns their names in the same order
@ -111,8 +130,8 @@ namespace {
QStringList toneNameResults; QStringList toneNameResults;
for (const auto &tonePart : toneParts) for (const auto &tonePart : toneParts)
{ {
auto toneNameIt = toneNames.find(tonePart); auto toneNameIt = TONE_NAMES.find(tonePart);
if (toneNameIt == toneNames.end()) if (toneNameIt == TONE_NAMES.end())
{ {
qDebug() << "Tone with key" << tonePart qDebug() << "Tone with key" << tonePart
<< "does not exist in tone names map"; << "does not exist in tone names map";
@ -129,6 +148,8 @@ namespace {
} // namespace } // namespace
namespace chatterino {
void Emojis::load() void Emojis::load()
{ {
this->loadEmojis(); this->loadEmojis();
@ -219,6 +240,8 @@ void Emojis::loadEmojiSet()
getSettings()->emojiSet.connect([this](const auto &emojiSet) { getSettings()->emojiSet.connect([this](const auto &emojiSet) {
this->emojis.each([=](const auto &name, this->emojis.each([=](const auto &name,
std::shared_ptr<EmojiData> &emoji) { std::shared_ptr<EmojiData> &emoji) {
(void)name;
QString emojiSetToUse = emojiSet; QString emojiSetToUse = emojiSet;
// clang-format off // clang-format off
static std::map<QString, QString> emojiSets = { static std::map<QString, QString> emojiSets = {
@ -243,7 +266,7 @@ void Emojis::loadEmojiSet()
}; };
// clang-format on // clang-format on
if (emoji->capabilities.count(emojiSetToUse) == 0) if (!emoji->capabilities.contains(emojiSetToUse))
{ {
emojiSetToUse = "Twitter"; emojiSetToUse = "Twitter";
} }
@ -268,7 +291,7 @@ std::vector<boost::variant<EmotePtr, QString>> Emojis::parse(
const QString &text) const const QString &text) const
{ {
auto result = std::vector<boost::variant<EmotePtr, QString>>(); auto result = std::vector<boost::variant<EmotePtr, QString>>();
int lastParsedEmojiEndIndex = 0; QString::size_type lastParsedEmojiEndIndex = 0;
for (auto i = 0; i < text.length(); ++i) for (auto i = 0; i < text.length(); ++i)
{ {
@ -288,32 +311,22 @@ std::vector<boost::variant<EmotePtr, QString>> Emojis::parse(
const auto &possibleEmojis = it.value(); const auto &possibleEmojis = it.value();
int remainingCharacters = text.length() - i - 1; auto remainingCharacters = text.length() - i - 1;
std::shared_ptr<EmojiData> matchedEmoji; std::shared_ptr<EmojiData> matchedEmoji;
int matchedEmojiLength = 0; QString::size_type matchedEmojiLength = 0;
for (const std::shared_ptr<EmojiData> &emoji : possibleEmojis) for (const std::shared_ptr<EmojiData> &emoji : possibleEmojis)
{ {
int emojiExtraCharacters = emoji->value.length() - 1; auto emojiNonQualifiedExtraCharacters =
if (emojiExtraCharacters > remainingCharacters) emoji->nonQualified.length() - 1;
auto emojiExtraCharacters = emoji->value.length() - 1;
if (remainingCharacters >= emojiExtraCharacters)
{ {
// It cannot be this emoji, there's not enough space for it // look in emoji->value
continue; bool match = QStringView{emoji->value}.mid(1) ==
} QStringView{text}.mid(i + 1, emojiExtraCharacters);
bool match = true;
for (int j = 1; j < emoji->value.length(); ++j)
{
if (text.at(i + j) != emoji->value.at(j))
{
match = false;
break;
}
}
if (match) if (match)
{ {
@ -323,16 +336,34 @@ std::vector<boost::variant<EmotePtr, QString>> Emojis::parse(
break; break;
} }
} }
if (!emoji->nonQualified.isNull() &&
remainingCharacters >= emojiNonQualifiedExtraCharacters)
{
// This checking here relies on the fact that the nonQualified string
// always starts with the same byte as value (the unified string)
bool match = QStringView{emoji->nonQualified}.mid(1) ==
QStringView{text}.mid(
i + 1, emojiNonQualifiedExtraCharacters);
if (match)
{
matchedEmoji = emoji;
matchedEmojiLength = emoji->nonQualified.length();
break;
}
}
}
if (matchedEmojiLength == 0) if (matchedEmojiLength == 0)
{ {
continue; continue;
} }
int currentParsedEmojiFirstIndex = i; auto currentParsedEmojiFirstIndex = i;
int currentParsedEmojiEndIndex = i + (matchedEmojiLength); auto currentParsedEmojiEndIndex = i + (matchedEmojiLength);
int charactersFromLastParsedEmoji = auto charactersFromLastParsedEmoji =
currentParsedEmojiFirstIndex - lastParsedEmojiEndIndex; currentParsedEmojiFirstIndex - lastParsedEmojiEndIndex;
if (charactersFromLastParsedEmoji > 0) if (charactersFromLastParsedEmoji > 0)
@ -382,7 +413,7 @@ QString Emojis::replaceShortCodes(const QString &text) const
continue; continue;
} }
auto emojiData = emojiIt.value(); const auto &emojiData = emojiIt.value();
ret.replace(offset + match.capturedStart(), match.capturedLength(), ret.replace(offset + match.capturedStart(), match.capturedLength(),
emojiData->value); emojiData->value);

View file

@ -7,7 +7,6 @@
#include <QRegularExpression> #include <QRegularExpression>
#include <QVector> #include <QVector>
#include <map>
#include <set> #include <set>
#include <vector> #include <vector>
@ -21,6 +20,9 @@ struct EmojiData {
// :male:) // :male:)
QString value; QString value;
// actual byte-representation of the non qualified emoji
QString nonQualified;
// i.e. 204e-50a2 // i.e. 204e-50a2
QString unifiedCode; QString unifiedCode;
QString nonQualifiedCode; QString nonQualifiedCode;

View file

@ -3,6 +3,8 @@
#include "util/CombinePath.hpp" #include "util/CombinePath.hpp"
#include "util/Qt.hpp" #include "util/Qt.hpp"
#include <unordered_map>
namespace chatterino { namespace chatterino {
#if defined(Q_OS_UNIX) and !defined(Q_OS_DARWIN) #if defined(Q_OS_UNIX) and !defined(Q_OS_DARWIN)