cinny/src/util/markdown.js

319 lines
9.2 KiB
JavaScript
Raw Normal View History

import SimpleMarkdown from '@khanacademy/simple-markdown';
const {
defaultRules, parserFor, outputFor, anyScopeRegex, blockRegex, inlineRegex,
sanitizeText, sanitizeUrl,
} = SimpleMarkdown;
function htmlTag(tagName, content, attributes, isClosed) {
let s = '';
Object.entries(attributes || {}).forEach(([k, v]) => {
if (v !== undefined) {
s += ` ${sanitizeText(k)}`;
if (v !== null) s += `="${sanitizeText(v)}"`;
}
});
s = `<${tagName}${s}>`;
if (isClosed === false) {
return s;
}
return `${s}${content}</${tagName}>`;
}
function mathHtml(wrap, node) {
return htmlTag(wrap, htmlTag('code', sanitizeText(node.content)), { 'data-mx-maths': node.content });
}
const emojiRegex = /^:([\w-]+):/;
const plainRules = {
Array: {
...defaultRules.Array,
plain: (arr, output, state) => arr.map((node) => output(node, state)).join(''),
},
userMention: {
order: defaultRules.em.order - 0.9,
match: inlineRegex(/^(@\S+:\S+)/),
parse: (capture, _, state) => ({
content: state.userNames[capture[1]] ? `@${state.userNames[capture[1]]}` : capture[1],
id: capture[1],
}),
plain: (node) => node.content,
html: (node) => htmlTag('a', sanitizeText(node.content), {
href: `https://matrix.to/#/${encodeURIComponent(node.id)}`,
}),
},
roomMention: {
order: defaultRules.em.order - 0.8,
match: inlineRegex(/^(#\S+:\S+)/), // TODO: Handle line beginning with roomMention (instead of heading)
parse: (capture) => ({ content: capture[1], id: capture[1] }),
plain: (node) => node.content,
html: (node) => htmlTag('a', sanitizeText(node.content), {
href: `https://matrix.to/#/${encodeURIComponent(node.id)}`,
}),
},
emoji: {
order: defaultRules.em.order - 0.1,
match: (source, state) => {
if (!state.inline) return null;
const capture = emojiRegex.exec(source);
if (!capture) return null;
const emoji = state.emojis.get(capture[1]);
if (emoji) return capture;
return null;
},
parse: (capture, _, state) => ({ content: capture[1], emoji: state.emojis.get(capture[1]) }),
plain: ({ emoji }) => (emoji.mxc
? `:${emoji.shortcode}:`
: emoji.unicode),
html: ({ emoji }) => (emoji.mxc
? htmlTag('img', null, {
'data-mx-emoticon': null,
src: emoji.mxc,
alt: `:${emoji.shortcode}:`,
title: `:${emoji.shortcode}:`,
height: 32,
}, false)
: emoji.unicode),
},
newline: {
...defaultRules.newline,
plain: () => '\n',
},
paragraph: {
...defaultRules.paragraph,
plain: (node, output, state) => `${output(node.content, state)}\n\n`,
html: (node, output, state) => htmlTag('p', output(node.content, state)),
},
br: {
...defaultRules.br,
match: anyScopeRegex(/^ *\n/),
plain: () => '\n',
},
text: {
...defaultRules.text,
match: anyScopeRegex(/^[\s\S]+?(?=[^0-9A-Za-z\s\u00c0-\uffff]| *\n|\w+:\S|$)/),
plain: (node) => node.content,
},
};
const markdownRules = {
...defaultRules,
...plainRules,
heading: {
...defaultRules.heading,
plain: (node, output, state) => {
const out = output(node.content, state);
if (node.level <= 2) {
return `${out}\n${(node.level === 1 ? '=' : '-').repeat(out.length)}\n\n`;
}
return `${'#'.repeat(node.level)} ${out}\n\n`;
},
},
hr: {
...defaultRules.hr,
plain: () => '---\n\n',
},
codeBlock: {
...defaultRules.codeBlock,
plain: (node) => `\`\`\`${node.lang || ''}\n${node.content}\n\`\`\``,
},
fence: {
...defaultRules.fence,
match: blockRegex(/^ *(`{3,}|~{3,}) *(?:(\S+) *)?\n([\s\S]+?)\n?\1 *(?:\n *)*\n/),
},
blockQuote: {
...defaultRules.blockQuote,
plain: (node, output, state) => `> ${output(node.content, state).trim().replace(/\n/g, '\n> ')}\n\n`,
},
list: {
...defaultRules.list,
plain: (node, output, state) => `${node.items.map((item, i) => {
const prefix = node.ordered ? `${node.start + i + 1}. ` : '* ';
return prefix + output(item, state).replace(/\n/g, `\n${' '.repeat(prefix.length)}`);
}).join('\n')}\n`,
},
def: undefined,
table: {
...defaultRules.table,
plain: (node, output, state) => {
const header = node.header.map((content) => output(content, state));
function lineWidth(i) {
switch (node.align[i]) {
case 'left':
case 'right':
return 2;
case 'center':
return 3;
default:
return 1;
}
}
const colWidth = header.map((s, i) => Math.max(s.length, lineWidth(i)));
const cells = node.cells.map((row) => row.map((content, i) => {
const s = output(content, state);
if (s.length > colWidth[i]) {
colWidth[i] = s.length;
}
return s;
}));
function pad(s, i) {
switch (node.align[i]) {
case 'right':
return s.padStart(colWidth[i]);
case 'center':
return s
.padStart(s.length + Math.floor((colWidth[i] - s.length) / 2))
.padEnd(colWidth[i]);
default:
return s.padEnd(colWidth[i]);
}
}
const line = colWidth.map((len, i) => {
switch (node.align[i]) {
case 'left':
return `:${'-'.repeat(len - 1)}`;
case 'center':
return `:${'-'.repeat(len - 2)}:`;
case 'right':
return `${'-'.repeat(len - 1)}:`;
default:
return '-'.repeat(len);
}
});
const table = [
header.map(pad),
line,
...cells.map((row) => row.map(pad))];
return table.map((row) => `| ${row.join(' | ')} |\n`).join('');
},
},
displayMath: {
order: defaultRules.table.order + 0.1,
match: blockRegex(/^ *\$\$ *\n?([\s\S]+?)\n?\$\$ *(?:\n *)*\n/),
parse: (capture) => ({ content: capture[1] }),
plain: (node) => (node.content.includes('\n')
? `$$\n${node.content}\n$$\n`
: `$$${node.content}$$\n`),
html: (node) => mathHtml('div', node),
},
shrug: {
order: defaultRules.escape.order - 0.1,
match: inlineRegex(/^¯\\_\(ツ\)_\/¯/),
parse: (capture) => ({ type: 'text', content: capture[0] }),
},
escape: {
...defaultRules.escape,
plain: (node, output, state) => `\\${output(node.content, state)}`,
},
tableSeparator: {
...defaultRules.tableSeparator,
plain: () => ' | ',
},
link: {
...defaultRules.link,
plain: (node, output, state) => {
const out = output(node.content, state);
const target = sanitizeUrl(node.target) || '';
if (out !== target || node.title) {
return `[${out}](${target}${node.title ? ` "${node.title}"` : ''})`;
}
return out;
},
html: (node, output, state) => htmlTag('a', output(node.content, state), {
href: sanitizeUrl(node.target) || '',
title: node.title,
}),
},
image: {
...defaultRules.image,
plain: (node) => `![${node.alt}](${sanitizeUrl(node.target) || ''}${node.title ? ` "${node.title}"` : ''})`,
html: (node) => htmlTag('img', '', {
src: sanitizeUrl(node.target) || '',
alt: node.alt,
title: node.title,
}, false),
},
reflink: undefined,
refimage: undefined,
em: {
...defaultRules.em,
plain: (node, output, state) => `_${output(node.content, state)}_`,
},
strong: {
...defaultRules.strong,
plain: (node, output, state) => `**${output(node.content, state)}**`,
},
u: {
...defaultRules.u,
plain: (node, output, state) => `__${output(node.content, state)}__`,
},
del: {
...defaultRules.del,
plain: (node, output, state) => `~~${output(node.content, state)}~~`,
},
inlineCode: {
...defaultRules.inlineCode,
plain: (node) => `\`${node.content}\``,
},
spoiler: {
order: defaultRules.inlineCode.order + 0.1,
match: inlineRegex(/^\|\|([\s\S]+?)\|\|(?:\(([\s\S]+?)\))?/),
parse: (capture, parse, state) => ({
content: parse(capture[1], state),
reason: capture[2],
}),
plain: (node, output, state) => `[spoiler${node.reason ? `: ${node.reason}` : ''}](${output(node.content, state)})`,
html: (node, output, state) => htmlTag(
'span',
output(node.content, state),
{ 'data-mx-spoiler': node.reason || null },
),
},
inlineMath: {
order: defaultRules.del.order + 0.2,
match: inlineRegex(/^\$(\S[\s\S]+?\S|\S)\$(?!\d)/),
parse: (capture) => ({ content: capture[1] }),
plain: (node) => `$${node.content}$`,
html: (node) => mathHtml('span', node),
},
};
function genOut(rules) {
const parser = parserFor(rules);
const plainOut = outputFor(rules, 'plain');
const htmlOut = outputFor(rules, 'html');
return (source, state) => {
let content = parser(source, state);
if (content.length === 1 && content[0].type === 'paragraph') {
content = content[0].content;
}
const plain = plainOut(content, state).trim();
const html = htmlOut(content, state);
const plainHtml = html.replace(/<br>/g, '\n').replace(/<\/p><p>/g, '\n\n').replace(/<\/?p>/g, '');
const onlyPlain = sanitizeText(plain) === plainHtml;
return {
onlyPlain,
plain,
html,
};
};
}
export const plain = genOut(plainRules);
export const markdown = genOut(markdownRules);