Fix emoji unified/non-qualified version for sending & parsing (#4840)

Co-authored-by: nerix <nerixdev@outlook.de>
This commit is contained in:
pajlada 2023-10-07 12:21:30 +02:00 committed by GitHub
parent ab4a0c054a
commit 774eaa14ce
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 269 additions and 116 deletions

View file

@ -6,6 +6,7 @@
- Minor: The account switcher is now styled to match your theme. (#4817) - Minor: The account switcher is now styled to match your theme. (#4817)
- Minor: Add an invisible resize handle to the bottom of frameless user info popups and reply thread popups. (#4795) - Minor: Add an invisible resize handle to the bottom of frameless user info popups and reply thread popups. (#4795)
- Minor: The installer now checks for the VC Runtime version and shows more info when it's outdated. (#4847) - Minor: The installer now checks for the VC Runtime version and shows more info when it's outdated. (#4847)
- Bugfix: Fixed an issue where certain emojis did not send to Twitch chat correctly. (#4840)
- Bugfix: Fixed capitalized channel names in log inclusion list not being logged. (#4848) - Bugfix: Fixed capitalized channel names in log inclusion list not being logged. (#4848)
- Bugfix: Trimmed custom streamlink paths on all platforms making sure you don't accidentally add spaces at the beginning or end of its path. (#4834) - Bugfix: Trimmed custom streamlink paths on all platforms making sure you don't accidentally add spaces at the beginning or end of its path. (#4834)
- Bugfix: Fixed a performance issue when displaying replies to certain messages. (#4807) - Bugfix: Fixed a performance issue when displaying replies to certain messages. (#4807)

View file

@ -55,3 +55,120 @@ static void BM_ShortcodeParsing(benchmark::State &state)
} }
BENCHMARK(BM_ShortcodeParsing); BENCHMARK(BM_ShortcodeParsing);
static void BM_EmojiParsing(benchmark::State &state)
{
Emojis emojis;
emojis.load();
struct TestCase {
QString input;
std::vector<boost::variant<EmotePtr, QString>> expectedOutput;
};
const auto &emojiMap = emojis.getEmojis();
std::shared_ptr<EmojiData> penguin;
emojiMap.tryGet("1F427", penguin);
auto penguinEmoji = penguin->emote;
std::vector<TestCase> tests{
{
// 1 emoji
"foo 🐧 bar",
// expected output
{
"foo ",
penguinEmoji,
" bar",
},
},
{
// no emoji
"foo bar",
// expected output
{
"foo bar",
},
},
{
// many emoji
"foo 🐧 bar 🐧🐧🐧🐧🐧",
// expected output
{
"foo ",
penguinEmoji,
" bar ",
penguinEmoji,
penguinEmoji,
penguinEmoji,
penguinEmoji,
penguinEmoji,
},
},
};
for (auto _ : state)
{
for (const auto &test : tests)
{
auto output = emojis.parse(test.input);
bool areEqual = std::equal(output.begin(), output.end(),
test.expectedOutput.begin());
if (!areEqual)
{
qDebug() << "BAD BENCH";
for (const auto &v : output)
{
if (v.type() == typeid(QString))
{
qDebug() << "output:" << boost::get<QString>(v);
}
}
}
}
}
}
BENCHMARK(BM_EmojiParsing);
template <class... Args>
static void BM_EmojiParsing2(benchmark::State &state, Args &&...args)
{
Emojis emojis;
emojis.load();
auto argsTuple = std::make_tuple(std::move(args)...);
auto input = std::get<0>(argsTuple);
auto expectedNumEmojis = std::get<1>(argsTuple);
for (auto _ : state)
{
auto output = emojis.parse(input);
int actualNumEmojis = 0;
for (const auto &part : output)
{
if (part.type() == typeid(EmotePtr))
{
++actualNumEmojis;
}
}
if (actualNumEmojis != expectedNumEmojis)
{
qDebug() << "BAD BENCH, EXPECTED NUM EMOJIS IS WRONG"
<< actualNumEmojis;
}
}
}
BENCHMARK_CAPTURE(BM_EmojiParsing2, one_emoji, "foo 🐧 bar", 1);
BENCHMARK_CAPTURE(BM_EmojiParsing2, two_emoji, "foo 🐧 bar 🐧", 2);
BENCHMARK_CAPTURE(
BM_EmojiParsing2, many_emoji,
"😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 "
"😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 "
"😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 ",
61);

View file

@ -12,123 +12,144 @@
#include <rapidjson/error/error.h> #include <rapidjson/error/error.h>
#include <rapidjson/rapidjson.h> #include <rapidjson/rapidjson.h>
#include <array> #include <map>
#include <memory> #include <memory>
namespace chatterino {
namespace { namespace {
auto toneNames = std::map<QString, QString>{ using namespace chatterino;
{"1F3FB", "tone1"}, {"1F3FC", "tone2"}, {"1F3FD", "tone3"},
{"1F3FE", "tone4"}, {"1F3FF", "tone5"},
};
void parseEmoji(const std::shared_ptr<EmojiData> &emojiData, const std::map<QString, QString> TONE_NAMES{
const rapidjson::Value &unparsedEmoji, {"1F3FB", "tone1"}, {"1F3FC", "tone2"}, {"1F3FD", "tone3"},
QString shortCode = QString()) {"1F3FE", "tone4"}, {"1F3FF", "tone5"},
};
void parseEmoji(const std::shared_ptr<EmojiData> &emojiData,
const rapidjson::Value &unparsedEmoji,
const QString &shortCode = {})
{
std::vector<uint32_t> unicodeBytes{};
struct {
bool apple;
bool google;
bool twitter;
bool facebook;
} capabilities{};
if (!shortCode.isEmpty())
{ {
std::array<uint32_t, 9> unicodeBytes{}; emojiData->shortCodes.push_back(shortCode);
}
else
{
// Load short codes from the suggested short_names
const auto &shortNames = unparsedEmoji["short_names"];
for (const auto &shortName : shortNames.GetArray())
{
emojiData->shortCodes.emplace_back(shortName.GetString());
}
}
struct { rj::getSafe(unparsedEmoji, "non_qualified", emojiData->nonQualifiedCode);
bool apple; rj::getSafe(unparsedEmoji, "unified", emojiData->unifiedCode);
bool google; assert(!emojiData->unifiedCode.isEmpty());
bool twitter;
bool facebook;
} capabilities{};
if (!shortCode.isEmpty()) rj::getSafe(unparsedEmoji, "has_img_apple", capabilities.apple);
{ rj::getSafe(unparsedEmoji, "has_img_google", capabilities.google);
emojiData->shortCodes.push_back(shortCode); rj::getSafe(unparsedEmoji, "has_img_twitter", capabilities.twitter);
} rj::getSafe(unparsedEmoji, "has_img_facebook", capabilities.facebook);
else
{
const auto &shortCodes = unparsedEmoji["short_names"];
for (const auto &_shortCode : shortCodes.GetArray())
{
emojiData->shortCodes.emplace_back(_shortCode.GetString());
}
}
rj::getSafe(unparsedEmoji, "non_qualified", if (capabilities.apple)
emojiData->nonQualifiedCode); {
rj::getSafe(unparsedEmoji, "unified", emojiData->unifiedCode); emojiData->capabilities.insert("Apple");
}
if (capabilities.google)
{
emojiData->capabilities.insert("Google");
}
if (capabilities.twitter)
{
emojiData->capabilities.insert("Twitter");
}
if (capabilities.facebook)
{
emojiData->capabilities.insert("Facebook");
}
rj::getSafe(unparsedEmoji, "has_img_apple", capabilities.apple); QStringList unicodeCharacters = emojiData->unifiedCode.toLower().split('-');
rj::getSafe(unparsedEmoji, "has_img_google", capabilities.google);
rj::getSafe(unparsedEmoji, "has_img_twitter", capabilities.twitter);
rj::getSafe(unparsedEmoji, "has_img_facebook", capabilities.facebook);
if (capabilities.apple) for (const QString &unicodeCharacter : unicodeCharacters)
{ {
emojiData->capabilities.insert("Apple"); bool ok{false};
} unicodeBytes.push_back(QString(unicodeCharacter).toUInt(&ok, 16));
if (capabilities.google) if (!ok)
{
emojiData->capabilities.insert("Google");
}
if (capabilities.twitter)
{
emojiData->capabilities.insert("Twitter");
}
if (capabilities.facebook)
{
emojiData->capabilities.insert("Facebook");
}
QStringList unicodeCharacters;
if (!emojiData->nonQualifiedCode.isEmpty())
{
unicodeCharacters =
emojiData->nonQualifiedCode.toLower().split('-');
}
else
{
unicodeCharacters = emojiData->unifiedCode.toLower().split('-');
}
if (unicodeCharacters.length() < 1)
{ {
qCWarning(chatterinoEmoji)
<< "Failed to parse emoji" << emojiData->shortCodes;
return; return;
} }
int numUnicodeBytes = 0;
for (const QString &unicodeCharacter : unicodeCharacters)
{
unicodeBytes.at(numUnicodeBytes++) =
QString(unicodeCharacter).toUInt(nullptr, 16);
}
emojiData->value =
QString::fromUcs4(unicodeBytes.data(), numUnicodeBytes);
} }
// getToneNames takes a tones and returns their names in the same order // We can safely do a narrowing static cast since unicodeBytes will never be a large number
// The format of the tones is: "1F3FB-1F3FB" or "1F3FB" emojiData->value = QString::fromUcs4(unicodeBytes.data(),
// The output of the tone names is: "tone1-tone1" or "tone1" static_cast<int>(unicodeBytes.size()));
QString getToneNames(const QString &tones)
if (!emojiData->nonQualifiedCode.isEmpty())
{ {
auto toneParts = tones.split('-'); QStringList nonQualifiedCharacters =
QStringList toneNameResults; emojiData->nonQualifiedCode.toLower().split('-');
for (const auto &tonePart : toneParts) std::vector<uint32_t> nonQualifiedBytes{};
for (const QString &unicodeCharacter : nonQualifiedCharacters)
{ {
auto toneNameIt = toneNames.find(tonePart); bool ok{false};
if (toneNameIt == toneNames.end()) nonQualifiedBytes.push_back(
QString(unicodeCharacter).toUInt(&ok, 16));
if (!ok)
{ {
qDebug() << "Tone with key" << tonePart qCWarning(chatterinoEmoji)
<< "does not exist in tone names map"; << "Failed to parse emoji nonQualified"
continue; << emojiData->shortCodes;
return;
} }
toneNameResults.append(toneNameIt->second);
} }
assert(!toneNameResults.isEmpty()); // We can safely do a narrowing static cast since unicodeBytes will never be a large number
emojiData->nonQualified =
return toneNameResults.join('-'); QString::fromUcs4(nonQualifiedBytes.data(),
static_cast<int>(nonQualifiedBytes.size()));
} }
}
// getToneNames takes a tones and returns their names in the same order
// The format of the tones is: "1F3FB-1F3FB" or "1F3FB"
// The output of the tone names is: "tone1-tone1" or "tone1"
QString getToneNames(const QString &tones)
{
auto toneParts = tones.split('-');
QStringList toneNameResults;
for (const auto &tonePart : toneParts)
{
auto toneNameIt = TONE_NAMES.find(tonePart);
if (toneNameIt == TONE_NAMES.end())
{
qDebug() << "Tone with key" << tonePart
<< "does not exist in tone names map";
continue;
}
toneNameResults.append(toneNameIt->second);
}
assert(!toneNameResults.isEmpty());
return toneNameResults.join('-');
}
} // namespace } // namespace
namespace chatterino {
void Emojis::load() void Emojis::load()
{ {
this->loadEmojis(); this->loadEmojis();
@ -219,6 +240,8 @@ void Emojis::loadEmojiSet()
getSettings()->emojiSet.connect([this](const auto &emojiSet) { getSettings()->emojiSet.connect([this](const auto &emojiSet) {
this->emojis.each([=](const auto &name, this->emojis.each([=](const auto &name,
std::shared_ptr<EmojiData> &emoji) { std::shared_ptr<EmojiData> &emoji) {
(void)name;
QString emojiSetToUse = emojiSet; QString emojiSetToUse = emojiSet;
// clang-format off // clang-format off
static std::map<QString, QString> emojiSets = { static std::map<QString, QString> emojiSets = {
@ -243,7 +266,7 @@ void Emojis::loadEmojiSet()
}; };
// clang-format on // clang-format on
if (emoji->capabilities.count(emojiSetToUse) == 0) if (!emoji->capabilities.contains(emojiSetToUse))
{ {
emojiSetToUse = "Twitter"; emojiSetToUse = "Twitter";
} }
@ -268,7 +291,7 @@ std::vector<boost::variant<EmotePtr, QString>> Emojis::parse(
const QString &text) const const QString &text) const
{ {
auto result = std::vector<boost::variant<EmotePtr, QString>>(); auto result = std::vector<boost::variant<EmotePtr, QString>>();
int lastParsedEmojiEndIndex = 0; QString::size_type lastParsedEmojiEndIndex = 0;
for (auto i = 0; i < text.length(); ++i) for (auto i = 0; i < text.length(); ++i)
{ {
@ -288,39 +311,47 @@ std::vector<boost::variant<EmotePtr, QString>> Emojis::parse(
const auto &possibleEmojis = it.value(); const auto &possibleEmojis = it.value();
int remainingCharacters = text.length() - i - 1; auto remainingCharacters = text.length() - i - 1;
std::shared_ptr<EmojiData> matchedEmoji; std::shared_ptr<EmojiData> matchedEmoji;
int matchedEmojiLength = 0; QString::size_type matchedEmojiLength = 0;
for (const std::shared_ptr<EmojiData> &emoji : possibleEmojis) for (const std::shared_ptr<EmojiData> &emoji : possibleEmojis)
{ {
int emojiExtraCharacters = emoji->value.length() - 1; auto emojiNonQualifiedExtraCharacters =
if (emojiExtraCharacters > remainingCharacters) emoji->nonQualified.length() - 1;
auto emojiExtraCharacters = emoji->value.length() - 1;
if (remainingCharacters >= emojiExtraCharacters)
{ {
// It cannot be this emoji, there's not enough space for it // look in emoji->value
continue; bool match = QStringView{emoji->value}.mid(1) ==
} QStringView{text}.mid(i + 1, emojiExtraCharacters);
bool match = true; if (match)
for (int j = 1; j < emoji->value.length(); ++j)
{
if (text.at(i + j) != emoji->value.at(j))
{ {
match = false; matchedEmoji = emoji;
matchedEmojiLength = emoji->value.length();
break; break;
} }
} }
if (!emoji->nonQualified.isNull() &&
if (match) remainingCharacters >= emojiNonQualifiedExtraCharacters)
{ {
matchedEmoji = emoji; // This checking here relies on the fact that the nonQualified string
matchedEmojiLength = emoji->value.length(); // always starts with the same byte as value (the unified string)
bool match = QStringView{emoji->nonQualified}.mid(1) ==
QStringView{text}.mid(
i + 1, emojiNonQualifiedExtraCharacters);
break; if (match)
{
matchedEmoji = emoji;
matchedEmojiLength = emoji->nonQualified.length();
break;
}
} }
} }
@ -329,10 +360,10 @@ std::vector<boost::variant<EmotePtr, QString>> Emojis::parse(
continue; continue;
} }
int currentParsedEmojiFirstIndex = i; auto currentParsedEmojiFirstIndex = i;
int currentParsedEmojiEndIndex = i + (matchedEmojiLength); auto currentParsedEmojiEndIndex = i + (matchedEmojiLength);
int charactersFromLastParsedEmoji = auto charactersFromLastParsedEmoji =
currentParsedEmojiFirstIndex - lastParsedEmojiEndIndex; currentParsedEmojiFirstIndex - lastParsedEmojiEndIndex;
if (charactersFromLastParsedEmoji > 0) if (charactersFromLastParsedEmoji > 0)
@ -382,7 +413,7 @@ QString Emojis::replaceShortCodes(const QString &text) const
continue; continue;
} }
auto emojiData = emojiIt.value(); const auto &emojiData = emojiIt.value();
ret.replace(offset + match.capturedStart(), match.capturedLength(), ret.replace(offset + match.capturedStart(), match.capturedLength(),
emojiData->value); emojiData->value);

View file

@ -7,7 +7,6 @@
#include <QRegularExpression> #include <QRegularExpression>
#include <QVector> #include <QVector>
#include <map>
#include <set> #include <set>
#include <vector> #include <vector>
@ -21,6 +20,9 @@ struct EmojiData {
// :male:) // :male:)
QString value; QString value;
// actual byte-representation of the non qualified emoji
QString nonQualified;
// i.e. 204e-50a2 // i.e. 204e-50a2
QString unifiedCode; QString unifiedCode;
QString nonQualifiedCode; QString nonQualifiedCode;

View file

@ -3,6 +3,8 @@
#include "util/CombinePath.hpp" #include "util/CombinePath.hpp"
#include "util/Qt.hpp" #include "util/Qt.hpp"
#include <unordered_map>
namespace chatterino { namespace chatterino {
#if defined(Q_OS_UNIX) and !defined(Q_OS_DARWIN) #if defined(Q_OS_UNIX) and !defined(Q_OS_DARWIN)