Fix emoji unified/non-qualified version for sending & parsing (#4840)

Co-authored-by: nerix <nerixdev@outlook.de>
This commit is contained in:
pajlada 2023-10-07 12:21:30 +02:00 committed by GitHub
parent ab4a0c054a
commit 774eaa14ce
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 269 additions and 116 deletions

View file

@ -6,6 +6,7 @@
- Minor: The account switcher is now styled to match your theme. (#4817)
- Minor: Add an invisible resize handle to the bottom of frameless user info popups and reply thread popups. (#4795)
- Minor: The installer now checks for the VC Runtime version and shows more info when it's outdated. (#4847)
- Bugfix: Fixed an issue where certain emojis did not send to Twitch chat correctly. (#4840)
- Bugfix: Fixed capitalized channel names in log inclusion list not being logged. (#4848)
- Bugfix: Trimmed custom streamlink paths on all platforms making sure you don't accidentally add spaces at the beginning or end of its path. (#4834)
- Bugfix: Fixed a performance issue when displaying replies to certain messages. (#4807)

View file

@ -55,3 +55,120 @@ static void BM_ShortcodeParsing(benchmark::State &state)
}
BENCHMARK(BM_ShortcodeParsing);
static void BM_EmojiParsing(benchmark::State &state)
{
Emojis emojis;
emojis.load();
struct TestCase {
QString input;
std::vector<boost::variant<EmotePtr, QString>> expectedOutput;
};
const auto &emojiMap = emojis.getEmojis();
std::shared_ptr<EmojiData> penguin;
emojiMap.tryGet("1F427", penguin);
auto penguinEmoji = penguin->emote;
std::vector<TestCase> tests{
{
// 1 emoji
"foo 🐧 bar",
// expected output
{
"foo ",
penguinEmoji,
" bar",
},
},
{
// no emoji
"foo bar",
// expected output
{
"foo bar",
},
},
{
// many emoji
"foo 🐧 bar 🐧🐧🐧🐧🐧",
// expected output
{
"foo ",
penguinEmoji,
" bar ",
penguinEmoji,
penguinEmoji,
penguinEmoji,
penguinEmoji,
penguinEmoji,
},
},
};
for (auto _ : state)
{
for (const auto &test : tests)
{
auto output = emojis.parse(test.input);
bool areEqual = std::equal(output.begin(), output.end(),
test.expectedOutput.begin());
if (!areEqual)
{
qDebug() << "BAD BENCH";
for (const auto &v : output)
{
if (v.type() == typeid(QString))
{
qDebug() << "output:" << boost::get<QString>(v);
}
}
}
}
}
}
BENCHMARK(BM_EmojiParsing);
template <class... Args>
static void BM_EmojiParsing2(benchmark::State &state, Args &&...args)
{
Emojis emojis;
emojis.load();
auto argsTuple = std::make_tuple(std::move(args)...);
auto input = std::get<0>(argsTuple);
auto expectedNumEmojis = std::get<1>(argsTuple);
for (auto _ : state)
{
auto output = emojis.parse(input);
int actualNumEmojis = 0;
for (const auto &part : output)
{
if (part.type() == typeid(EmotePtr))
{
++actualNumEmojis;
}
}
if (actualNumEmojis != expectedNumEmojis)
{
qDebug() << "BAD BENCH, EXPECTED NUM EMOJIS IS WRONG"
<< actualNumEmojis;
}
}
}
BENCHMARK_CAPTURE(BM_EmojiParsing2, one_emoji, "foo 🐧 bar", 1);
BENCHMARK_CAPTURE(BM_EmojiParsing2, two_emoji, "foo 🐧 bar 🐧", 2);
BENCHMARK_CAPTURE(
BM_EmojiParsing2, many_emoji,
"😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 "
"😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 "
"😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 ",
61);

View file

@ -12,123 +12,144 @@
#include <rapidjson/error/error.h>
#include <rapidjson/rapidjson.h>
#include <array>
#include <map>
#include <memory>
namespace chatterino {
namespace {
auto toneNames = std::map<QString, QString>{
{"1F3FB", "tone1"}, {"1F3FC", "tone2"}, {"1F3FD", "tone3"},
{"1F3FE", "tone4"}, {"1F3FF", "tone5"},
};
using namespace chatterino;
void parseEmoji(const std::shared_ptr<EmojiData> &emojiData,
const rapidjson::Value &unparsedEmoji,
QString shortCode = QString())
const std::map<QString, QString> TONE_NAMES{
{"1F3FB", "tone1"}, {"1F3FC", "tone2"}, {"1F3FD", "tone3"},
{"1F3FE", "tone4"}, {"1F3FF", "tone5"},
};
void parseEmoji(const std::shared_ptr<EmojiData> &emojiData,
const rapidjson::Value &unparsedEmoji,
const QString &shortCode = {})
{
std::vector<uint32_t> unicodeBytes{};
struct {
bool apple;
bool google;
bool twitter;
bool facebook;
} capabilities{};
if (!shortCode.isEmpty())
{
std::array<uint32_t, 9> unicodeBytes{};
emojiData->shortCodes.push_back(shortCode);
}
else
{
// Load short codes from the suggested short_names
const auto &shortNames = unparsedEmoji["short_names"];
for (const auto &shortName : shortNames.GetArray())
{
emojiData->shortCodes.emplace_back(shortName.GetString());
}
}
struct {
bool apple;
bool google;
bool twitter;
bool facebook;
} capabilities{};
rj::getSafe(unparsedEmoji, "non_qualified", emojiData->nonQualifiedCode);
rj::getSafe(unparsedEmoji, "unified", emojiData->unifiedCode);
assert(!emojiData->unifiedCode.isEmpty());
if (!shortCode.isEmpty())
{
emojiData->shortCodes.push_back(shortCode);
}
else
{
const auto &shortCodes = unparsedEmoji["short_names"];
for (const auto &_shortCode : shortCodes.GetArray())
{
emojiData->shortCodes.emplace_back(_shortCode.GetString());
}
}
rj::getSafe(unparsedEmoji, "has_img_apple", capabilities.apple);
rj::getSafe(unparsedEmoji, "has_img_google", capabilities.google);
rj::getSafe(unparsedEmoji, "has_img_twitter", capabilities.twitter);
rj::getSafe(unparsedEmoji, "has_img_facebook", capabilities.facebook);
rj::getSafe(unparsedEmoji, "non_qualified",
emojiData->nonQualifiedCode);
rj::getSafe(unparsedEmoji, "unified", emojiData->unifiedCode);
if (capabilities.apple)
{
emojiData->capabilities.insert("Apple");
}
if (capabilities.google)
{
emojiData->capabilities.insert("Google");
}
if (capabilities.twitter)
{
emojiData->capabilities.insert("Twitter");
}
if (capabilities.facebook)
{
emojiData->capabilities.insert("Facebook");
}
rj::getSafe(unparsedEmoji, "has_img_apple", capabilities.apple);
rj::getSafe(unparsedEmoji, "has_img_google", capabilities.google);
rj::getSafe(unparsedEmoji, "has_img_twitter", capabilities.twitter);
rj::getSafe(unparsedEmoji, "has_img_facebook", capabilities.facebook);
QStringList unicodeCharacters = emojiData->unifiedCode.toLower().split('-');
if (capabilities.apple)
{
emojiData->capabilities.insert("Apple");
}
if (capabilities.google)
{
emojiData->capabilities.insert("Google");
}
if (capabilities.twitter)
{
emojiData->capabilities.insert("Twitter");
}
if (capabilities.facebook)
{
emojiData->capabilities.insert("Facebook");
}
QStringList unicodeCharacters;
if (!emojiData->nonQualifiedCode.isEmpty())
{
unicodeCharacters =
emojiData->nonQualifiedCode.toLower().split('-');
}
else
{
unicodeCharacters = emojiData->unifiedCode.toLower().split('-');
}
if (unicodeCharacters.length() < 1)
for (const QString &unicodeCharacter : unicodeCharacters)
{
bool ok{false};
unicodeBytes.push_back(QString(unicodeCharacter).toUInt(&ok, 16));
if (!ok)
{
qCWarning(chatterinoEmoji)
<< "Failed to parse emoji" << emojiData->shortCodes;
return;
}
int numUnicodeBytes = 0;
for (const QString &unicodeCharacter : unicodeCharacters)
{
unicodeBytes.at(numUnicodeBytes++) =
QString(unicodeCharacter).toUInt(nullptr, 16);
}
emojiData->value =
QString::fromUcs4(unicodeBytes.data(), numUnicodeBytes);
}
// getToneNames takes a tones and returns their names in the same order
// The format of the tones is: "1F3FB-1F3FB" or "1F3FB"
// The output of the tone names is: "tone1-tone1" or "tone1"
QString getToneNames(const QString &tones)
// We can safely do a narrowing static cast since unicodeBytes will never be a large number
emojiData->value = QString::fromUcs4(unicodeBytes.data(),
static_cast<int>(unicodeBytes.size()));
if (!emojiData->nonQualifiedCode.isEmpty())
{
auto toneParts = tones.split('-');
QStringList toneNameResults;
for (const auto &tonePart : toneParts)
QStringList nonQualifiedCharacters =
emojiData->nonQualifiedCode.toLower().split('-');
std::vector<uint32_t> nonQualifiedBytes{};
for (const QString &unicodeCharacter : nonQualifiedCharacters)
{
auto toneNameIt = toneNames.find(tonePart);
if (toneNameIt == toneNames.end())
bool ok{false};
nonQualifiedBytes.push_back(
QString(unicodeCharacter).toUInt(&ok, 16));
if (!ok)
{
qDebug() << "Tone with key" << tonePart
<< "does not exist in tone names map";
continue;
qCWarning(chatterinoEmoji)
<< "Failed to parse emoji nonQualified"
<< emojiData->shortCodes;
return;
}
toneNameResults.append(toneNameIt->second);
}
assert(!toneNameResults.isEmpty());
return toneNameResults.join('-');
// We can safely do a narrowing static cast since unicodeBytes will never be a large number
emojiData->nonQualified =
QString::fromUcs4(nonQualifiedBytes.data(),
static_cast<int>(nonQualifiedBytes.size()));
}
}
// getToneNames takes a tones and returns their names in the same order
// The format of the tones is: "1F3FB-1F3FB" or "1F3FB"
// The output of the tone names is: "tone1-tone1" or "tone1"
QString getToneNames(const QString &tones)
{
auto toneParts = tones.split('-');
QStringList toneNameResults;
for (const auto &tonePart : toneParts)
{
auto toneNameIt = TONE_NAMES.find(tonePart);
if (toneNameIt == TONE_NAMES.end())
{
qDebug() << "Tone with key" << tonePart
<< "does not exist in tone names map";
continue;
}
toneNameResults.append(toneNameIt->second);
}
assert(!toneNameResults.isEmpty());
return toneNameResults.join('-');
}
} // namespace
namespace chatterino {
void Emojis::load()
{
this->loadEmojis();
@ -219,6 +240,8 @@ void Emojis::loadEmojiSet()
getSettings()->emojiSet.connect([this](const auto &emojiSet) {
this->emojis.each([=](const auto &name,
std::shared_ptr<EmojiData> &emoji) {
(void)name;
QString emojiSetToUse = emojiSet;
// clang-format off
static std::map<QString, QString> emojiSets = {
@ -243,7 +266,7 @@ void Emojis::loadEmojiSet()
};
// clang-format on
if (emoji->capabilities.count(emojiSetToUse) == 0)
if (!emoji->capabilities.contains(emojiSetToUse))
{
emojiSetToUse = "Twitter";
}
@ -268,7 +291,7 @@ std::vector<boost::variant<EmotePtr, QString>> Emojis::parse(
const QString &text) const
{
auto result = std::vector<boost::variant<EmotePtr, QString>>();
int lastParsedEmojiEndIndex = 0;
QString::size_type lastParsedEmojiEndIndex = 0;
for (auto i = 0; i < text.length(); ++i)
{
@ -288,39 +311,47 @@ std::vector<boost::variant<EmotePtr, QString>> Emojis::parse(
const auto &possibleEmojis = it.value();
int remainingCharacters = text.length() - i - 1;
auto remainingCharacters = text.length() - i - 1;
std::shared_ptr<EmojiData> matchedEmoji;
int matchedEmojiLength = 0;
QString::size_type matchedEmojiLength = 0;
for (const std::shared_ptr<EmojiData> &emoji : possibleEmojis)
{
int emojiExtraCharacters = emoji->value.length() - 1;
if (emojiExtraCharacters > remainingCharacters)
auto emojiNonQualifiedExtraCharacters =
emoji->nonQualified.length() - 1;
auto emojiExtraCharacters = emoji->value.length() - 1;
if (remainingCharacters >= emojiExtraCharacters)
{
// It cannot be this emoji, there's not enough space for it
continue;
}
// look in emoji->value
bool match = QStringView{emoji->value}.mid(1) ==
QStringView{text}.mid(i + 1, emojiExtraCharacters);
bool match = true;
for (int j = 1; j < emoji->value.length(); ++j)
{
if (text.at(i + j) != emoji->value.at(j))
if (match)
{
match = false;
matchedEmoji = emoji;
matchedEmojiLength = emoji->value.length();
break;
}
}
if (match)
if (!emoji->nonQualified.isNull() &&
remainingCharacters >= emojiNonQualifiedExtraCharacters)
{
matchedEmoji = emoji;
matchedEmojiLength = emoji->value.length();
// This checking here relies on the fact that the nonQualified string
// always starts with the same byte as value (the unified string)
bool match = QStringView{emoji->nonQualified}.mid(1) ==
QStringView{text}.mid(
i + 1, emojiNonQualifiedExtraCharacters);
break;
if (match)
{
matchedEmoji = emoji;
matchedEmojiLength = emoji->nonQualified.length();
break;
}
}
}
@ -329,10 +360,10 @@ std::vector<boost::variant<EmotePtr, QString>> Emojis::parse(
continue;
}
int currentParsedEmojiFirstIndex = i;
int currentParsedEmojiEndIndex = i + (matchedEmojiLength);
auto currentParsedEmojiFirstIndex = i;
auto currentParsedEmojiEndIndex = i + (matchedEmojiLength);
int charactersFromLastParsedEmoji =
auto charactersFromLastParsedEmoji =
currentParsedEmojiFirstIndex - lastParsedEmojiEndIndex;
if (charactersFromLastParsedEmoji > 0)
@ -382,7 +413,7 @@ QString Emojis::replaceShortCodes(const QString &text) const
continue;
}
auto emojiData = emojiIt.value();
const auto &emojiData = emojiIt.value();
ret.replace(offset + match.capturedStart(), match.capturedLength(),
emojiData->value);

View file

@ -7,7 +7,6 @@
#include <QRegularExpression>
#include <QVector>
#include <map>
#include <set>
#include <vector>
@ -21,6 +20,9 @@ struct EmojiData {
// :male:)
QString value;
// actual byte-representation of the non qualified emoji
QString nonQualified;
// i.e. 204e-50a2
QString unifiedCode;
QString nonQualifiedCode;

View file

@ -3,6 +3,8 @@
#include "util/CombinePath.hpp"
#include "util/Qt.hpp"
#include <unordered_map>
namespace chatterino {
#if defined(Q_OS_UNIX) and !defined(Q_OS_DARWIN)