mirror-chatterino2/src/providers/emoji/Emojis.cpp

400 lines
12 KiB
C++
Raw Normal View History

2018-06-26 14:09:39 +02:00
#include "providers/emoji/Emojis.hpp"
2018-06-05 18:53:49 +02:00
2018-06-26 14:09:39 +02:00
#include "Application.hpp"
#include "messages/Emote.hpp"
2018-06-28 19:46:45 +02:00
#include "singletons/Settings.hpp"
2018-08-02 14:23:27 +02:00
#include <rapidjson/error/en.h>
#include <rapidjson/error/error.h>
#include <rapidjson/rapidjson.h>
2018-06-20 19:10:54 +02:00
#include <QFile>
2018-08-02 14:23:27 +02:00
#include <boost/variant.hpp>
#include <memory>
#include "common/QLogging.hpp"
2018-06-20 19:10:54 +02:00
2018-06-05 18:53:49 +02:00
namespace chatterino {
namespace {
2021-02-13 19:17:22 +01:00
auto toneNames = std::map<QString, QString>{
{"1F3FB", "tone1"}, {"1F3FC", "tone2"}, {"1F3FD", "tone3"},
{"1F3FE", "tone4"}, {"1F3FF", "tone5"},
};
2018-08-15 22:46:20 +02:00
void parseEmoji(const std::shared_ptr<EmojiData> &emojiData,
const rapidjson::Value &unparsedEmoji,
QString shortCode = QString())
{
std::array<uint32_t, 9> unicodeBytes;
2018-08-15 22:46:20 +02:00
struct {
bool apple;
bool google;
bool twitter;
bool facebook;
} capabilities;
2018-10-21 13:43:02 +02:00
if (!shortCode.isEmpty())
{
2018-08-15 22:46:20 +02:00
emojiData->shortCodes.push_back(shortCode);
2018-10-21 13:43:02 +02:00
}
else
{
2018-08-15 22:46:20 +02:00
const auto &shortCodes = unparsedEmoji["short_names"];
for (const auto &_shortCode : shortCodes.GetArray())
2018-10-21 13:43:02 +02:00
{
emojiData->shortCodes.emplace_back(_shortCode.GetString());
2018-08-15 22:46:20 +02:00
}
}
2018-08-15 22:46:20 +02:00
rj::getSafe(unparsedEmoji, "non_qualified",
emojiData->nonQualifiedCode);
rj::getSafe(unparsedEmoji, "unified", emojiData->unifiedCode);
2018-08-15 22:46:20 +02:00
rj::getSafe(unparsedEmoji, "has_img_apple", capabilities.apple);
rj::getSafe(unparsedEmoji, "has_img_google", capabilities.google);
rj::getSafe(unparsedEmoji, "has_img_twitter", capabilities.twitter);
rj::getSafe(unparsedEmoji, "has_img_facebook", capabilities.facebook);
2018-10-21 13:43:02 +02:00
if (capabilities.apple)
{
2018-08-15 22:46:20 +02:00
emojiData->capabilities.insert("Apple");
}
2018-10-21 13:43:02 +02:00
if (capabilities.google)
{
2018-08-15 22:46:20 +02:00
emojiData->capabilities.insert("Google");
}
2018-10-21 13:43:02 +02:00
if (capabilities.twitter)
{
2018-08-15 22:46:20 +02:00
emojiData->capabilities.insert("Twitter");
}
2018-10-21 13:43:02 +02:00
if (capabilities.facebook)
{
2018-08-15 22:46:20 +02:00
emojiData->capabilities.insert("Facebook");
}
2018-08-15 22:46:20 +02:00
QStringList unicodeCharacters;
2018-10-21 13:43:02 +02:00
if (!emojiData->nonQualifiedCode.isEmpty())
{
2018-08-15 22:46:20 +02:00
unicodeCharacters =
emojiData->nonQualifiedCode.toLower().split('-');
2018-10-21 13:43:02 +02:00
}
else
{
2018-08-15 22:46:20 +02:00
unicodeCharacters = emojiData->unifiedCode.toLower().split('-');
}
2018-10-21 13:43:02 +02:00
if (unicodeCharacters.length() < 1)
{
2018-08-15 22:46:20 +02:00
return;
}
2018-08-15 22:46:20 +02:00
int numUnicodeBytes = 0;
2018-10-21 13:43:02 +02:00
for (const QString &unicodeCharacter : unicodeCharacters)
{
unicodeBytes.at(numUnicodeBytes++) =
2018-08-15 22:46:20 +02:00
QString(unicodeCharacter).toUInt(nullptr, 16);
}
emojiData->value =
QString::fromUcs4(unicodeBytes.data(), numUnicodeBytes);
2018-08-15 22:46:20 +02:00
}
2021-02-13 19:17:22 +01:00
// getToneNames takes a tones and returns their names in the same order
// The format of the tones is: "1F3FB-1F3FB" or "1F3FB"
// The output of the tone names is: "tone1-tone1" or "tone1"
QString getToneNames(const QString &tones)
{
auto toneParts = tones.split('-');
QStringList toneNameResults;
for (const auto &tonePart : toneParts)
{
auto toneNameIt = toneNames.find(tonePart);
if (toneNameIt == toneNames.end())
{
qDebug() << "Tone with key" << tonePart
<< "does not exist in tone names map";
continue;
}
toneNameResults.append(toneNameIt->second);
}
assert(!toneNameResults.isEmpty());
return toneNameResults.join('-');
}
} // namespace
2018-06-05 18:53:49 +02:00
void Emojis::load()
{
this->loadEmojis();
this->sortEmojis();
this->loadEmojiSet();
}
void Emojis::loadEmojis()
{
2022-03-19 11:30:01 +01:00
// Current version: https://github.com/iamcal/emoji-data/blob/v14.0.0/emoji.json (Emoji version 14.0 (2022))
QFile file(":/emoji.json");
file.open(QFile::ReadOnly);
QTextStream s1(&file);
QString data = s1.readAll();
rapidjson::Document root;
rapidjson::ParseResult result = root.Parse(data.toUtf8(), data.length());
2018-10-21 13:43:02 +02:00
if (result.Code() != rapidjson::kParseErrorNone)
{
qCWarning(chatterinoEmoji)
<< "JSON parse error:" << rapidjson::GetParseError_En(result.Code())
<< "(" << result.Offset() << ")";
return;
}
2018-10-21 13:43:02 +02:00
for (const auto &unparsedEmoji : root.GetArray())
{
auto emojiData = std::make_shared<EmojiData>();
parseEmoji(emojiData, unparsedEmoji);
2018-10-21 13:43:02 +02:00
for (const auto &shortCode : emojiData->shortCodes)
{
2018-07-06 19:23:47 +02:00
this->emojiShortCodeToEmoji_.insert(shortCode, emojiData);
this->shortCodes.emplace_back(shortCode);
}
2018-07-06 19:23:47 +02:00
this->emojiFirstByte_[emojiData->value.at(0)].append(emojiData);
this->emojis.insert(emojiData->unifiedCode, emojiData);
2018-10-21 13:43:02 +02:00
if (unparsedEmoji.HasMember("skin_variations"))
{
2018-08-06 21:17:03 +02:00
for (const auto &skinVariation :
2018-10-21 13:43:02 +02:00
unparsedEmoji["skin_variations"].GetObject())
{
2021-02-13 19:17:22 +01:00
auto toneName = getToneNames(skinVariation.name.GetString());
const auto &variation = skinVariation.value;
auto variationEmojiData = std::make_shared<EmojiData>();
parseEmoji(variationEmojiData, variation,
2021-02-13 19:17:22 +01:00
emojiData->shortCodes[0] + "_" + toneName);
2018-08-06 21:17:03 +02:00
this->emojiShortCodeToEmoji_.insert(
variationEmojiData->shortCodes[0], variationEmojiData);
this->shortCodes.push_back(variationEmojiData->shortCodes[0]);
2018-08-06 21:17:03 +02:00
this->emojiFirstByte_[variationEmojiData->value.at(0)].append(
variationEmojiData);
2018-08-06 21:17:03 +02:00
this->emojis.insert(variationEmojiData->unifiedCode,
variationEmojiData);
}
}
}
}
void Emojis::sortEmojis()
{
2018-10-21 13:43:02 +02:00
for (auto &p : this->emojiFirstByte_)
{
2018-08-06 21:17:03 +02:00
std::stable_sort(p.begin(), p.end(),
[](const auto &lhs, const auto &rhs) {
return lhs->value.length() > rhs->value.length();
});
}
auto &p = this->shortCodes;
2018-08-06 21:17:03 +02:00
std::stable_sort(p.begin(), p.end(), [](const auto &lhs, const auto &rhs) {
return lhs < rhs;
});
}
void Emojis::loadEmojiSet()
{
2021-02-13 19:17:22 +01:00
#ifndef CHATTERINO_TEST
getSettings()->emojiSet.connect([=](const auto &emojiSet) {
2021-02-13 19:17:22 +01:00
#else
const QString emojiSet = "twitter";
#endif
2018-08-06 21:17:03 +02:00
this->emojis.each([=](const auto &name,
std::shared_ptr<EmojiData> &emoji) {
QString emojiSetToUse = emojiSet;
// clang-format off
static std::map<QString, QString> emojiSets = {
2021-02-13 19:17:22 +01:00
// JSDELIVR
2018-06-20 20:30:54 +02:00
// {"Twitter", "https://cdn.jsdelivr.net/npm/emoji-datasource-twitter@4.0.4/img/twitter/64/"},
// {"Facebook", "https://cdn.jsdelivr.net/npm/emoji-datasource-facebook@4.0.4/img/facebook/64/"},
2021-02-13 19:17:22 +01:00
// {"Apple", "https://cdn.jsdelivr.net/npm/emoji-datasource-apple@5.0.1/img/apple/64/"},
2018-06-20 20:30:54 +02:00
// {"Google", "https://cdn.jsdelivr.net/npm/emoji-datasource-google@4.0.4/img/google/64/"},
// {"Messenger", "https://cdn.jsdelivr.net/npm/emoji-datasource-messenger@4.0.4/img/messenger/64/"},
2021-02-13 19:17:22 +01:00
// OBRODAI
{"Twitter", "https://pajbot.com/static/emoji-v2/img/twitter/64/"},
{"Facebook", "https://pajbot.com/static/emoji-v2/img/facebook/64/"},
{"Apple", "https://pajbot.com/static/emoji-v2/img/apple/64/"},
{"Google", "https://pajbot.com/static/emoji-v2/img/google/64/"},
// Cloudflare+B2 bucket
// {"Twitter", "https://chatterino2-emoji-cdn.pajlada.se/file/c2-emojis/emojis-v1/twitter/64/"},
// {"Facebook", "https://chatterino2-emoji-cdn.pajlada.se/file/c2-emojis/emojis-v1/facebook/64/"},
// {"Apple", "https://chatterino2-emoji-cdn.pajlada.se/file/c2-emojis/emojis-v1/apple/64/"},
// {"Google", "https://chatterino2-emoji-cdn.pajlada.se/file/c2-emojis/emojis-v1/google/64/"},
};
// clang-format on
2018-10-21 13:43:02 +02:00
if (emoji->capabilities.count(emojiSetToUse) == 0)
{
2021-02-13 19:17:22 +01:00
emojiSetToUse = "Twitter";
}
2021-02-13 19:17:22 +01:00
QString code = emoji->unifiedCode.toLower();
QString urlPrefix =
"https://pajbot.com/static/emoji-v2/img/twitter/64/";
auto it = emojiSets.find(emojiSetToUse);
2018-10-21 13:43:02 +02:00
if (it != emojiSets.end())
{
urlPrefix = it->second;
}
QString url = urlPrefix + code + ".png";
2018-08-06 21:17:03 +02:00
emoji->emote = std::make_shared<Emote>(Emote{
EmoteName{emoji->value}, ImageSet{Image::fromUrl({url}, 0.35)},
Tooltip{":" + emoji->shortCodes[0] + ":<br/>Emoji"}, Url{}});
});
2021-02-13 19:17:22 +01:00
#ifndef CHATTERINO_TEST
});
2021-02-13 19:17:22 +01:00
#endif
}
2018-08-06 21:17:03 +02:00
std::vector<boost::variant<EmotePtr, QString>> Emojis::parse(
const QString &text)
2018-06-05 18:53:49 +02:00
{
2018-08-02 14:23:27 +02:00
auto result = std::vector<boost::variant<EmotePtr, QString>>();
2018-06-05 18:53:49 +02:00
int lastParsedEmojiEndIndex = 0;
2018-10-21 13:43:02 +02:00
for (auto i = 0; i < text.length(); ++i)
{
2018-06-05 18:53:49 +02:00
const QChar character = text.at(i);
2018-10-21 13:43:02 +02:00
if (character.isLowSurrogate())
{
2018-06-05 18:53:49 +02:00
continue;
}
2018-07-06 19:23:47 +02:00
auto it = this->emojiFirstByte_.find(character);
2018-10-21 13:43:02 +02:00
if (it == this->emojiFirstByte_.end())
{
2018-06-05 18:53:49 +02:00
// No emoji starts with this character
continue;
}
const auto &possibleEmojis = it.value();
2018-06-05 18:53:49 +02:00
int remainingCharacters = text.length() - i - 1;
std::shared_ptr<EmojiData> matchedEmoji;
2018-06-05 18:53:49 +02:00
int matchedEmojiLength = 0;
2018-10-21 13:43:02 +02:00
for (const std::shared_ptr<EmojiData> &emoji : possibleEmojis)
{
int emojiExtraCharacters = emoji->value.length() - 1;
2018-10-21 13:43:02 +02:00
if (emojiExtraCharacters > remainingCharacters)
{
2018-06-05 18:53:49 +02:00
// It cannot be this emoji, there's not enough space for it
continue;
}
bool match = true;
2018-10-21 13:43:02 +02:00
for (int j = 1; j < emoji->value.length(); ++j)
{
if (text.at(i + j) != emoji->value.at(j))
{
2018-06-05 18:53:49 +02:00
match = false;
break;
}
}
2018-10-21 13:43:02 +02:00
if (match)
{
2018-06-05 18:53:49 +02:00
matchedEmoji = emoji;
matchedEmojiLength = emoji->value.length();
2018-06-05 18:53:49 +02:00
break;
}
}
2018-10-21 13:43:02 +02:00
if (matchedEmojiLength == 0)
{
2018-06-05 18:53:49 +02:00
continue;
}
int currentParsedEmojiFirstIndex = i;
int currentParsedEmojiEndIndex = i + (matchedEmojiLength);
2018-08-06 21:17:03 +02:00
int charactersFromLastParsedEmoji =
currentParsedEmojiFirstIndex - lastParsedEmojiEndIndex;
2018-06-05 18:53:49 +02:00
2018-10-21 13:43:02 +02:00
if (charactersFromLastParsedEmoji > 0)
{
2018-06-05 18:53:49 +02:00
// Add characters inbetween emojis
2018-08-06 21:17:03 +02:00
result.emplace_back(text.mid(lastParsedEmojiEndIndex,
charactersFromLastParsedEmoji));
2018-06-05 18:53:49 +02:00
}
// Push the emoji as a word to parsedWords
2018-08-02 14:23:27 +02:00
result.emplace_back(matchedEmoji->emote);
2018-06-05 18:53:49 +02:00
lastParsedEmojiEndIndex = currentParsedEmojiEndIndex;
i += matchedEmojiLength - 1;
}
2018-10-21 13:43:02 +02:00
if (lastParsedEmojiEndIndex < text.length())
{
2018-06-05 18:53:49 +02:00
// Add remaining characters
2018-08-02 14:23:27 +02:00
result.emplace_back(text.mid(lastParsedEmojiEndIndex));
2018-06-05 18:53:49 +02:00
}
2018-08-02 14:23:27 +02:00
return result;
2018-06-05 18:53:49 +02:00
}
QString Emojis::replaceShortCodes(const QString &text)
{
QString ret(text);
2018-07-06 19:23:47 +02:00
auto it = this->findShortCodesRegex_.globalMatch(text);
2018-06-05 18:53:49 +02:00
int32_t offset = 0;
2018-10-21 13:43:02 +02:00
while (it.hasNext())
{
2018-06-05 18:53:49 +02:00
auto match = it.next();
auto capturedString = match.captured();
2018-08-06 21:17:03 +02:00
QString matchString =
capturedString.toLower().mid(1, capturedString.size() - 2);
2018-06-05 18:53:49 +02:00
2018-07-06 19:23:47 +02:00
auto emojiIt = this->emojiShortCodeToEmoji_.constFind(matchString);
2018-06-05 18:53:49 +02:00
2018-10-21 13:43:02 +02:00
if (emojiIt == this->emojiShortCodeToEmoji_.constEnd())
{
2018-06-05 18:53:49 +02:00
continue;
}
auto emojiData = emojiIt.value();
2018-08-06 21:17:03 +02:00
ret.replace(offset + match.capturedStart(), match.capturedLength(),
emojiData->value);
2018-06-05 18:53:49 +02:00
offset += emojiData->value.size() - match.capturedLength();
2018-06-05 18:53:49 +02:00
}
return ret;
}
} // namespace chatterino