mirror of
https://gitdab.com/cadence/out-of-your-element.git
synced 2025-09-10 12:22:50 +02:00
New unicode emoji processor
This commit is contained in:
parent
14574b4e2c
commit
f42eb6495f
7 changed files with 4015 additions and 46 deletions
|
@ -20,7 +20,7 @@ async function addReaction(event) {
|
|||
if (!messageID) return // Nothing can be done if the parent message was never bridged.
|
||||
|
||||
const key = event.content["m.relates_to"].key
|
||||
const discordPreferredEncoding = emoji.encodeEmoji(key, event.content.shortcode)
|
||||
const discordPreferredEncoding = await emoji.encodeEmoji(key, event.content.shortcode)
|
||||
if (!discordPreferredEncoding) return
|
||||
|
||||
await discord.snow.channel.createReaction(channelID, messageID, discordPreferredEncoding) // acting as the discord bot itself
|
||||
|
|
|
@ -1,58 +1,98 @@
|
|||
// @ts-check
|
||||
|
||||
const assert = require("assert").strict
|
||||
const Ty = require("../../types")
|
||||
const fsp = require("fs").promises
|
||||
const {join} = require("path")
|
||||
const emojisp = fsp.readFile(join(__dirname, "emojis.txt"), "utf8").then(content => content.split("\n"))
|
||||
|
||||
const passthrough = require("../../passthrough")
|
||||
const {sync, select} = passthrough
|
||||
const {select} = passthrough
|
||||
|
||||
|
||||
/**
|
||||
* @param {string} input
|
||||
* @param {string | null | undefined} shortcode
|
||||
* @returns {string?}
|
||||
*/
|
||||
function encodeEmoji(input, shortcode) {
|
||||
let discordPreferredEncoding
|
||||
if (input.startsWith("mxc://")) {
|
||||
// Custom emoji
|
||||
let row = select("emoji", ["emoji_id", "name"], {mxc_url: input}).get()
|
||||
if (!row && shortcode) {
|
||||
// Use the name to try to find a known emoji with the same name.
|
||||
const name = shortcode.replace(/^:|:$/g, "")
|
||||
row = select("emoji", ["emoji_id", "name"], {name: name}).get()
|
||||
}
|
||||
if (!row) {
|
||||
// We don't have this emoji and there's no realistic way to just-in-time upload a new emoji somewhere.
|
||||
// Sucks!
|
||||
return null
|
||||
}
|
||||
// Cool, we got an exact or a candidate emoji.
|
||||
discordPreferredEncoding = encodeURIComponent(`${row.name}:${row.emoji_id}`)
|
||||
} else {
|
||||
// Default emoji
|
||||
// https://github.com/discord/discord-api-docs/issues/2723#issuecomment-807022205 ????????????
|
||||
const encoded = encodeURIComponent(input)
|
||||
const encodedTrimmed = encoded.replace(/%EF%B8%8F/g, "")
|
||||
|
||||
const forceTrimmedList = [
|
||||
"%F0%9F%91%8D", // 👍
|
||||
"%F0%9F%91%8E", // 👎️
|
||||
"%E2%AD%90", // ⭐
|
||||
"%F0%9F%90%88", // 🐈
|
||||
"%E2%9D%93", // ❓
|
||||
"%F0%9F%8F%86", // 🏆️
|
||||
"%F0%9F%93%9A", // 📚️
|
||||
"%F0%9F%90%9F", // 🐟️
|
||||
]
|
||||
|
||||
discordPreferredEncoding =
|
||||
( forceTrimmedList.includes(encodedTrimmed) ? encodedTrimmed
|
||||
: encodedTrimmed !== encoded && [...input].length === 2 ? encoded
|
||||
: encodedTrimmed)
|
||||
|
||||
console.log("add reaction from matrix:", input, encoded, encodedTrimmed, "chosen:", discordPreferredEncoding)
|
||||
function encodeCustomEmoji(input, shortcode) {
|
||||
// Custom emoji
|
||||
let row = select("emoji", ["emoji_id", "name"], {mxc_url: input}).get()
|
||||
if (!row && shortcode) {
|
||||
// Use the name to try to find a known emoji with the same name.
|
||||
const name = shortcode.replace(/^:|:$/g, "")
|
||||
row = select("emoji", ["emoji_id", "name"], {name: name}).get()
|
||||
}
|
||||
if (!row) {
|
||||
// We don't have this emoji and there's no realistic way to just-in-time upload a new emoji somewhere. Sucks!
|
||||
return null
|
||||
}
|
||||
return encodeURIComponent(`${row.name}:${row.emoji_id}`)
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} input
|
||||
* @returns {Promise<string?>} URL encoded!
|
||||
*/
|
||||
async function encodeDefaultEmoji(input) {
|
||||
// Default emoji
|
||||
|
||||
// Shortcut: If there are ASCII letters then it's not an emoji, it's a freeform Matrix text reaction.
|
||||
// (Regional indicator letters are not ASCII. ASCII digits might be part of an emoji.)
|
||||
if (input.match(/[A-Za-z]/)) return null
|
||||
|
||||
// Check against the dataset
|
||||
const emojis = await emojisp
|
||||
const encoded = encodeURIComponent(input)
|
||||
|
||||
// Best case scenario: they reacted with an exact replica of a valid emoji.
|
||||
if (emojis.includes(input)) return encoded
|
||||
|
||||
// Maybe it has some extraneous \ufe0f or \ufe0e (at the end or in the middle), and it'll be valid if they're removed.
|
||||
const trimmed = input.replace(/\ufe0e|\ufe0f/g, "")
|
||||
const trimmedEncoded = encodeURIComponent(trimmed)
|
||||
if (trimmed !== input) {
|
||||
if (emojis.includes(trimmed)) return trimmedEncoded
|
||||
}
|
||||
|
||||
// Okay, well, maybe it was already missing one and it actually needs an extra \ufe0f, and it'll be valid if that's added.
|
||||
else {
|
||||
const appended = input + "\ufe0f"
|
||||
const appendedEncoded = encodeURIComponent(appended)
|
||||
if (emojis.includes(appended)) return appendedEncoded
|
||||
}
|
||||
|
||||
// Hmm, so adding or removing that from the end didn't help, but maybe there needs to be one in the middle? We can try some heuristics.
|
||||
// These heuristics come from executing scripts/emoji-surrogates-statistics.js.
|
||||
if (trimmedEncoded.length <= 21 && trimmed.match(/^[*#0-9]/)) { // ->19: Keycap digit? 0️⃣ 1️⃣ 2️⃣ 3️⃣ 4️⃣ 5️⃣ 6️⃣ 7️⃣ 8️⃣ 9️⃣ *️⃣ #️⃣
|
||||
const keycap = trimmed[0] + "\ufe0f" + trimmed.slice(1)
|
||||
if (emojis.includes(keycap)) return encodeURIComponent(keycap)
|
||||
} else if (trimmedEncoded.length === 27 && trimmed[0] === "⛹") { // ->45: ⛹️♀️ ⛹️♂️
|
||||
const balling = trimmed[0] + "\ufe0f" + trimmed.slice(1) + "\ufe0f"
|
||||
if (emojis.includes(balling)) return encodeURIComponent(balling)
|
||||
} else if (trimmedEncoded.length === 30) { // ->39: ⛓️💥 ❤️🩹 ❤️🔥 or ->48: 🏳️⚧️ 🏌️♀️ 🕵️♀️ 🏋️♀️ and gender variants
|
||||
const thriving = trimmed[0] + "\ufe0f" + trimmed.slice(1)
|
||||
if (emojis.includes(thriving)) return encodeURIComponent(thriving)
|
||||
const powerful = trimmed.slice(0, 2) + "\ufe0f" + trimmed.slice(2) + "\ufe0f"
|
||||
if (emojis.includes(powerful)) return encodeURIComponent(powerful)
|
||||
} else if (trimmedEncoded.length === 51 && trimmed[3] === "❤") { // ->60: 👩❤️👨 👩❤️👩 👨❤️👨
|
||||
const yellowRomance = trimmed.slice(0, 3) + "❤\ufe0f" + trimmed.slice(4)
|
||||
if (emojis.includes(yellowRomance)) return encodeURIComponent(yellowRomance)
|
||||
}
|
||||
|
||||
// there are a few more longer ones but I got bored
|
||||
return null
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} input
|
||||
* @param {string | null | undefined} shortcode
|
||||
* @returns {Promise<string?>}
|
||||
*/
|
||||
async function encodeEmoji(input, shortcode) {
|
||||
if (input.startsWith("mxc://")) {
|
||||
return encodeCustomEmoji(input, shortcode)
|
||||
} else {
|
||||
return encodeDefaultEmoji(input)
|
||||
}
|
||||
return discordPreferredEncoding
|
||||
}
|
||||
|
||||
module.exports.encodeEmoji = encodeEmoji
|
||||
|
|
52
src/m2d/converters/emoji.test.js
Normal file
52
src/m2d/converters/emoji.test.js
Normal file
|
@ -0,0 +1,52 @@
|
|||
// @ts-check
|
||||
|
||||
const {test} = require("supertape")
|
||||
const {encodeEmoji} = require("./emoji")
|
||||
|
||||
test("emoji: valid", async t => {
|
||||
t.equal(await encodeEmoji("🦄", null), "%F0%9F%A6%84")
|
||||
})
|
||||
|
||||
test("emoji: freeform text", async t => {
|
||||
t.equal(await encodeEmoji("ha", null), null)
|
||||
})
|
||||
|
||||
test("emoji: suspicious unicode", async t => {
|
||||
t.equal(await encodeEmoji("Ⓐ", null), null)
|
||||
})
|
||||
|
||||
test("emoji: needs u+fe0f added", async t => {
|
||||
t.equal(await encodeEmoji("☺", null), "%E2%98%BA%EF%B8%8F")
|
||||
})
|
||||
|
||||
test("emoji: needs u+fe0f removed", async t => {
|
||||
t.equal(await encodeEmoji("⭐️", null), "%E2%AD%90")
|
||||
})
|
||||
|
||||
test("emoji: number key needs u+fe0f in the middle", async t => {
|
||||
t.equal(await encodeEmoji("3⃣", null), "3%EF%B8%8F%E2%83%A3")
|
||||
})
|
||||
|
||||
test("emoji: hash key needs u+fe0f in the middle", async t => {
|
||||
t.equal(await encodeEmoji("#⃣", null), "%23%EF%B8%8F%E2%83%A3")
|
||||
})
|
||||
|
||||
test("emoji: broken chains needs u+fe0f in the middle", async t => {
|
||||
t.equal(await encodeEmoji("⛓💥", null), "%E2%9B%93%EF%B8%8F%E2%80%8D%F0%9F%92%A5")
|
||||
})
|
||||
|
||||
test("emoji: balling needs u+fe0f in the middle", async t => {
|
||||
t.equal(await encodeEmoji("⛹♀", null), "%E2%9B%B9%EF%B8%8F%E2%80%8D%E2%99%80%EF%B8%8F")
|
||||
})
|
||||
|
||||
test("emoji: trans flag needs u+fe0f in the middle", async t => {
|
||||
t.equal(await encodeEmoji("🏳⚧", null), "%F0%9F%8F%B3%EF%B8%8F%E2%80%8D%E2%9A%A7%EF%B8%8F")
|
||||
})
|
||||
|
||||
test("emoji: spy needs u+fe0f in the middle", async t => {
|
||||
t.equal(await encodeEmoji("🕵♀", null), "%F0%9F%95%B5%EF%B8%8F%E2%80%8D%E2%99%80%EF%B8%8F")
|
||||
})
|
||||
|
||||
test("emoji: couple needs u+fe0f in the middle", async t => {
|
||||
t.equal(await encodeEmoji("👩❤👩", null), "%F0%9F%91%A9%E2%80%8D%E2%9D%A4%EF%B8%8F%E2%80%8D%F0%9F%91%A9")
|
||||
})
|
3799
src/m2d/converters/emojis.txt
Normal file
3799
src/m2d/converters/emojis.txt
Normal file
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue