From 36ef8fb99d85746f216e62fa1ade51d543214df5 Mon Sep 17 00:00:00 2001 From: nerix Date: Sat, 27 Jan 2024 15:46:11 +0100 Subject: [PATCH] fix: support captures in ignores (#5126) --- CHANGELOG.md | 6 +- src/providers/twitch/TwitchMessageBuilder.cpp | 164 ++++++++++++++++-- src/providers/twitch/TwitchMessageBuilder.hpp | 7 +- tests/src/TwitchMessageBuilder.cpp | 140 +++++++++++++++ 4 files changed, 295 insertions(+), 22 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c4773ba81..f7e4900a8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -53,10 +53,10 @@ - Bugfix: Fixed thread popup window missing messages for nested threads. (#4923) - Bugfix: Fixed an occasional crash for channel point redemptions with text input. (#4949) - Bugfix: Fixed triple click on message also selecting moderation buttons. (#4961) -- Bugfix: Fixed a freeze from a bad regex in _Ignores_. (#4965) +- Bugfix: Fixed a freeze from a bad regex in _Ignores_. (#4965, #5126) - Bugfix: Fixed badge highlight changes not immediately being reflected. (#5110) -- Bugfix: Fixed some emotes not appearing when using _Ignores_. (#4965) -- Bugfix: Fixed lookahead/-behind not working in _Ignores_. (#4965) +- Bugfix: Fixed some emotes not appearing when using _Ignores_. (#4965, #5126) +- Bugfix: Fixed lookahead/-behind not working in _Ignores_. (#4965, #5126) - Bugfix: Fixed Image Uploader accidentally deleting images with some hosts when link resolver was enabled. (#4971) - Bugfix: Fixed rare crash with Image Uploader when closing a split right after starting an upload. (#4971) - Bugfix: Fixed an issue on macOS where the image uploader would keep prompting the user even after they clicked "Yes, don't ask again". (#5011) diff --git a/src/providers/twitch/TwitchMessageBuilder.cpp b/src/providers/twitch/TwitchMessageBuilder.cpp index 960c65ada..4d99241a8 100644 --- a/src/providers/twitch/TwitchMessageBuilder.cpp +++ b/src/providers/twitch/TwitchMessageBuilder.cpp @@ -269,6 +269,128 @@ namespace { builder->message().badgeInfos = badgeInfos; } + /** + * Computes (only) the replacement of @a match in @a source. + * The parts before and after the match in @a source are ignored. + * + * Occurrences of \b{\\1}, \b{\\2}, ..., in @a replacement are replaced + * with the string captured by the corresponding capturing group. + * This function should only be used if the regex contains capturing groups. + * + * Since Qt doesn't provide a way of replacing a single match with some replacement + * while supporting both capturing groups and lookahead/-behind in the regex, + * this is included here. It's essentially the implementation of + * QString::replace(const QRegularExpression &, const QString &). + * @see https://github.com/qt/qtbase/blob/97bb0ecfe628b5bb78e798563212adf02129c6f6/src/corelib/text/qstring.cpp#L4594-L4703 + */ + QString makeRegexReplacement(QStringView source, + const QRegularExpression ®ex, + const QRegularExpressionMatch &match, + const QString &replacement) + { + using SizeType = QString::size_type; + struct QStringCapture { + SizeType pos; + SizeType len; + int captureNumber; + }; + + qsizetype numCaptures = regex.captureCount(); + + // 1. build the backreferences list, holding where the backreferences + // are in the replacement string + QVarLengthArray backReferences; + + SizeType replacementLength = replacement.size(); + for (SizeType i = 0; i < replacementLength - 1; i++) + { + if (replacement[i] != u'\\') + { + continue; + } + + int no = replacement[i + 1].digitValue(); + if (no <= 0 || no > numCaptures) + { + continue; + } + + QStringCapture backReference{.pos = i, .len = 2}; + + if (i < replacementLength - 2) + { + int secondDigit = replacement[i + 2].digitValue(); + if (secondDigit != -1 && + ((no * 10) + secondDigit) <= numCaptures) + { + no = (no * 10) + secondDigit; + ++backReference.len; + } + } + + backReference.captureNumber = no; + backReferences.append(backReference); + } + + // 2. iterate on the matches. + // For every match, copy the replacement string in chunks + // with the proper replacements for the backreferences + + // length of the new string, with all the replacements + SizeType newLength = 0; + QVarLengthArray chunks; + QStringView replacementView{replacement}; + + // Initially: empty, as we only care about the replacement + SizeType len = 0; + SizeType lastEnd = 0; + for (const QStringCapture &backReference : + std::as_const(backReferences)) + { + // part of "replacement" before the backreference + len = backReference.pos - lastEnd; + if (len > 0) + { + chunks << replacementView.mid(lastEnd, len); + newLength += len; + } + + // backreference itself + len = match.capturedLength(backReference.captureNumber); + if (len > 0) + { + chunks << source.mid( + match.capturedStart(backReference.captureNumber), len); + newLength += len; + } + + lastEnd = backReference.pos + backReference.len; + } + + // add the last part of the replacement string + len = replacementView.size() - lastEnd; + if (len > 0) + { + chunks << replacementView.mid(lastEnd, len); + newLength += len; + } + + // 3. assemble the chunks together + QString dst; + dst.reserve(newLength); + for (const QStringView &chunk : std::as_const(chunks)) + { +#if QT_VERSION < QT_VERSION_CHECK(5, 15, 2) + static_assert(sizeof(QChar) == sizeof(decltype(*chunk.utf16()))); + dst.append(reinterpret_cast(chunk.utf16()), + chunk.length()); +#else + dst += chunk; +#endif + } + return dst; + } + } // namespace TwitchMessageBuilder::TwitchMessageBuilder( @@ -419,7 +541,9 @@ MessagePtr TwitchMessageBuilder::build() this->tags, this->originalMessage_, this->messageOffset_); // This runs through all ignored phrases and runs its replacements on this->originalMessage_ - this->runIgnoreReplaces(twitchEmotes); + TwitchMessageBuilder::processIgnorePhrases( + *getSettings()->ignoredMessages.readOnly(), this->originalMessage_, + twitchEmotes); std::sort(twitchEmotes.begin(), twitchEmotes.end(), [](const auto &a, const auto &b) { @@ -960,12 +1084,12 @@ void TwitchMessageBuilder::appendUsername() } } -void TwitchMessageBuilder::runIgnoreReplaces( +void TwitchMessageBuilder::processIgnorePhrases( + const std::vector &phrases, QString &originalMessage, std::vector &twitchEmotes) { using SizeType = QString::size_type; - auto phrases = getSettings()->ignoredMessages.readOnly(); auto removeEmotesInRange = [&twitchEmotes](SizeType pos, SizeType len) { // all emotes outside the range come before `it` // all emotes in the range start at `it` @@ -1034,20 +1158,20 @@ void TwitchMessageBuilder::runIgnoreReplaces( auto replaceMessageAt = [&](const IgnorePhrase &phrase, SizeType from, SizeType length, const QString &replacement) { auto removedEmotes = removeEmotesInRange(from, length); - this->originalMessage_.replace(from, length, replacement); + originalMessage.replace(from, length, replacement); auto wordStart = from; while (wordStart > 0) { - if (this->originalMessage_[wordStart - 1] == ' ') + if (originalMessage[wordStart - 1] == ' ') { break; } --wordStart; } auto wordEnd = from + replacement.length(); - while (wordEnd < this->originalMessage_.length()) + while (wordEnd < originalMessage.length()) { - if (this->originalMessage_[wordEnd] == ' ') + if (originalMessage[wordEnd] == ' ') { break; } @@ -1058,11 +1182,11 @@ void TwitchMessageBuilder::runIgnoreReplaces( static_cast(replacement.length() - length)); #if QT_VERSION >= QT_VERSION_CHECK(5, 15, 0) - auto midExtendedRef = QStringView{this->originalMessage_}.mid( - wordStart, wordEnd - wordStart); + auto midExtendedRef = + QStringView{originalMessage}.mid(wordStart, wordEnd - wordStart); #else auto midExtendedRef = - this->originalMessage_.midRef(wordStart, wordEnd - wordStart); + originalMessage.midRef(wordStart, wordEnd - wordStart); #endif for (auto &emote : removedEmotes) @@ -1088,7 +1212,7 @@ void TwitchMessageBuilder::runIgnoreReplaces( addReplEmotes(phrase, midExtendedRef, wordStart); }; - for (const auto &phrase : *phrases) + for (const auto &phrase : phrases) { if (phrase.isBlock()) { @@ -1110,16 +1234,22 @@ void TwitchMessageBuilder::runIgnoreReplaces( QRegularExpressionMatch match; size_t iterations = 0; SizeType from = 0; - while ((from = this->originalMessage_.indexOf(regex, from, - &match)) != -1) + while ((from = originalMessage.indexOf(regex, from, &match)) != -1) { + auto replacement = phrase.getReplace(); + if (regex.captureCount() > 0) + { + replacement = makeRegexReplacement(originalMessage, regex, + match, replacement); + } + replaceMessageAt(phrase, from, match.capturedLength(), - phrase.getReplace()); + replacement); from += phrase.getReplace().length(); iterations++; if (iterations >= 128) { - this->originalMessage_ = + originalMessage = u"Too many replacements - check your ignores!"_s; return; } @@ -1129,8 +1259,8 @@ void TwitchMessageBuilder::runIgnoreReplaces( } SizeType from = 0; - while ((from = this->originalMessage_.indexOf( - pattern, from, phrase.caseSensitivity())) != -1) + while ((from = originalMessage.indexOf(pattern, from, + phrase.caseSensitivity())) != -1) { replaceMessageAt(phrase, from, pattern.length(), phrase.getReplace()); diff --git a/src/providers/twitch/TwitchMessageBuilder.hpp b/src/providers/twitch/TwitchMessageBuilder.hpp index a9fb15f4f..dd38fc790 100644 --- a/src/providers/twitch/TwitchMessageBuilder.hpp +++ b/src/providers/twitch/TwitchMessageBuilder.hpp @@ -20,6 +20,7 @@ using EmotePtr = std::shared_ptr; class Channel; class TwitchChannel; class MessageThread; +class IgnorePhrase; struct HelixVip; using HelixModerator = HelixVip; struct ChannelPointReward; @@ -108,6 +109,10 @@ public: const QVariantMap &tags, const QString &originalMessage, int messageOffset); + static void processIgnorePhrases( + const std::vector &phrases, QString &originalMessage, + std::vector &twitchEmotes); + private: void parseUsernameColor() override; void parseUsername() override; @@ -118,8 +123,6 @@ private: void parseThread(); void appendUsername(); - void runIgnoreReplaces(std::vector &twitchEmotes); - Outcome tryAppendEmote(const EmoteName &name) override; void addWords(const QStringList &words, diff --git a/tests/src/TwitchMessageBuilder.cpp b/tests/src/TwitchMessageBuilder.cpp index 7bd18e90c..6e9989410 100644 --- a/tests/src/TwitchMessageBuilder.cpp +++ b/tests/src/TwitchMessageBuilder.cpp @@ -3,6 +3,7 @@ #include "common/Channel.hpp" #include "controllers/accounts/AccountController.hpp" #include "controllers/highlights/HighlightController.hpp" +#include "controllers/ignores/IgnorePhrase.hpp" #include "messages/MessageBuilder.hpp" #include "mocks/Channel.hpp" #include "mocks/ChatterinoBadges.hpp" @@ -478,3 +479,142 @@ TEST_F(TestTwitchMessageBuilder, ParseMessage) delete privmsg; } } + +TEST_F(TestTwitchMessageBuilder, IgnoresReplace) +{ + struct TestCase { + std::vector phrases; + QString input; + std::vector twitchEmotes; + QString expectedMessage; + std::vector expectedTwitchEmotes; + }; + + auto *twitchEmotes = this->mockApplication->getEmotes()->getTwitchEmotes(); + + auto emoteAt = [&](int at, const QString &name) { + return TwitchEmoteOccurrence{ + .start = at, + .end = static_cast(at + name.size() - 1), + .ptr = + twitchEmotes->getOrCreateEmote(EmoteId{name}, EmoteName{name}), + .name = EmoteName{name}, + }; + }; + + auto regularReplace = [](auto pattern, auto replace, + bool caseSensitive = true) { + return IgnorePhrase(pattern, false, false, replace, caseSensitive); + }; + auto regexReplace = [](auto pattern, auto regex, + bool caseSensitive = true) { + return IgnorePhrase(pattern, true, false, regex, caseSensitive); + }; + + std::vector testCases{ + { + {regularReplace("foo1", "baz1")}, + "foo1 Kappa", + {emoteAt(4, "Kappa")}, + "baz1 Kappa", + {emoteAt(4, "Kappa")}, + }, + { + {regularReplace("foo1", "baz1", false)}, + "FoO1 Kappa", + {emoteAt(4, "Kappa")}, + "baz1 Kappa", + {emoteAt(4, "Kappa")}, + }, + { + {regexReplace("f(o+)1", "baz1[\\1]")}, + "foo1 Kappa", + {emoteAt(4, "Kappa")}, + "baz1[oo] Kappa", + {emoteAt(8, "Kappa")}, + }, + + { + {regexReplace("f(o+)1", R"(baz1[\0][\1][\2])")}, + "foo1 Kappa", + {emoteAt(4, "Kappa")}, + "baz1[\\0][oo][\\2] Kappa", + {emoteAt(16, "Kappa")}, + }, + { + {regexReplace("f(o+)(\\d+)", "baz1[\\1+\\2]")}, + "foo123 Kappa", + {emoteAt(6, "Kappa")}, + "baz1[oo+123] Kappa", + {emoteAt(12, "Kappa")}, + }, + { + {regexReplace("(?<=foo)(\\d+)", "[\\1]")}, + "foo123 Kappa", + {emoteAt(6, "Kappa")}, + "foo[123] Kappa", + {emoteAt(8, "Kappa")}, + }, + { + {regexReplace("a(?=a| )", "b")}, + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + "aaaa" + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa " + "Kappa", + {emoteAt(127, "Kappa")}, + "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" + "bbbb" + "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb " + "Kappa", + {emoteAt(127, "Kappa")}, + }, + { + {regexReplace("abc", "def", false)}, + "AbC Kappa", + {emoteAt(3, "Kappa")}, + "def Kappa", + {emoteAt(3, "Kappa")}, + }, + { + { + regexReplace("abc", "def", false), + regularReplace("def", "ghi"), + }, + "AbC Kappa", + {emoteAt(3, "Kappa")}, + "ghi Kappa", + {emoteAt(3, "Kappa")}, + }, + { + { + regexReplace("a(?=a| )", "b"), + regexReplace("b(?=b| )", "c"), + }, + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + "aaaa" + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa " + "Kappa", + {emoteAt(127, "Kappa")}, + "cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc" + "cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc " + "Kappa", + {emoteAt(127, "Kappa")}, + }, + }; + + for (const auto &test : testCases) + { + auto message = test.input; + auto emotes = test.twitchEmotes; + TwitchMessageBuilder::processIgnorePhrases(test.phrases, message, + emotes); + + EXPECT_EQ(message, test.expectedMessage) + << "Message not equal for input '" << test.input.toStdString() + << "' - expected: '" << test.expectedMessage.toStdString() + << "' got: '" << message.toStdString() << "'"; + EXPECT_EQ(emotes, test.expectedTwitchEmotes) + << "Twitch emotes not equal for input '" << test.input.toStdString() + << "' and output '" << message.toStdString() << "'"; + } +}