feat(client,challenge-parser): update fill-in-the-blank to support Chinese (#63741)

This commit is contained in:
Huyen Nguyen
2025-11-25 11:02:22 -08:00
committed by GitHub
parent b6fff6e2b7
commit 33325b9002
24 changed files with 964 additions and 176 deletions
@@ -0,0 +1,9 @@
# --fillInTheBlank--
## --sentence--
`BLANK BLANK`
## --blanks--
`你 (nǐ)`
@@ -0,0 +1,17 @@
# --fillInTheBlank--
## --sentence--
`我 (wǒ) BLANK UI 设计师 (shè jì shī) 。`
## --blanks--
`是 (shì)`
### --feedback--
Feedback text.
# --explanation--
Explanation text.
@@ -0,0 +1,9 @@
# --fillInTheBlank--
## --sentence--
`你好 (nǐ hǎo)`
## --blanks--
`你`
@@ -0,0 +1,9 @@
# --fillInTheBlank--
## --sentence--
`BLANK hǎo`
## --blanks--
`nǐ`
@@ -0,0 +1,9 @@
# --fillInTheBlank--
## --sentence--
`BLANK好`
## --blanks--
`你 (nǐ)`
@@ -0,0 +1,13 @@
# --fillInTheBlank--
## --sentence--
`BLANK 好 (hǎo) BLANK`
## --blanks--
`你`
---
`nǐ`
@@ -0,0 +1,46 @@
---
lang: zh-CN
inputType: pinyin-to-hanzi
---
# --fillInTheBlank--
## --sentence--
`BLANK BLANKBLANK 是王华 (shì Wang Hua),请问你 (qǐng wèn nǐ) BLANK 什么名字 (shén me míng zi)`
## --blanks--
`你 (nǐ)`
### --feedback--
Feedback text containing `汉字 (hàn zì)`.
---
`好 (hǎo)`
### --feedback--
This means "good" or "well".
---
`我 (wǒ)`
### --feedback--
This means "I".
---
`叫 (jiào)`
### --feedback--
This means "to be called".
# --explanation--
Explanation text containing `汉字 (hàn zì)`.
@@ -49,4 +49,6 @@ Feedback text.
# --explanation--
Wang Hua uses `请问 (qǐng wèn)` to politely start her question.
`我是 (wǒ shì) Web 开发者 (kāi fā zhě)。` I am a web developer.
`你好 (nǐ hǎo),我是王华 (wǒ shì Wang Hua),请问你叫什么名字 (qǐng wèn nǐ jiào shén me míng zi)` Hello, I am Wang Hua, may I ask what your name is?
@@ -3,8 +3,10 @@ const find = require('unist-util-find');
const visit = require('unist-util-visit');
const { getSection } = require('./utils/get-section');
const getAllBefore = require('./utils/before-heading');
const mdastToHtml = require('./utils/mdast-to-html');
const {
createMdastToHtml,
parseHanziPinyinPairs
} = require('./utils/i18n-stringify');
const { splitOnThematicBreak } = require('./utils/split-on-thematic-break');
const NOT_IN_PARAGRAPHS = `Each inline code block in the fillInTheBlank sentence section must in its own paragraph
@@ -40,19 +42,102 @@ function plugin() {
if (fillInTheBlankNodes.length > 0) {
const fillInTheBlankTree = root(fillInTheBlankNodes);
validateBlanksCount(fillInTheBlankTree);
validateBlanksSectionCount(fillInTheBlankTree);
const sentenceNodes = getSection(fillInTheBlankTree, '--sentence--');
const blanksNodes = getSection(fillInTheBlankTree, '--blanks--');
const fillInTheBlank = getfillInTheBlank(sentenceNodes, blanksNodes);
const lang = file.data.lang;
const inputType = file.data.inputType;
const toHtml = createMdastToHtml(lang);
file.data.fillInTheBlank = fillInTheBlank;
file.data.fillInTheBlank = getFillInTheBlank(sentenceNodes, blanksNodes);
function getFillInTheBlank(sentenceNodes, blanksNodes) {
const sentenceWithoutCodeBlocks = sentenceNodes.map(node => {
node.children.forEach(child => {
if (child.type === 'text' && child.value.trim() === '')
throw Error(NOT_IN_PARAGRAPHS);
if (child.type !== 'inlineCode') throw Error(NOT_IN_CODE_BLOCK);
});
// For Chinese hanzi-pinyin, keep as inlineCode so handler generates ruby elements
if (lang === 'zh-CN') {
const hasChinesePairs = node.children.some(
child =>
child.type === 'inlineCode' &&
parseHanziPinyinPairs(child.value).length > 0
);
if (hasChinesePairs) {
return node;
}
}
// Convert inlineCode to text for non-Chinese content
const children = node.children.map(child => ({
...child,
type: 'text'
}));
return { ...node, children };
});
const sentence = toHtml(sentenceWithoutCodeBlocks);
const blanks = getBlanks(blanksNodes);
if (!sentence)
throw Error('sentence is missing from fill in the blank');
if (!blanks) throw Error('blanks are missing from fill in the blank');
if (sentence.match(/BLANK/g).length !== blanks.length)
throw Error(`Number of BLANKs doesn't match the number of answers.`);
// For 'pinyin-to-hanzi' inputType, all answers must be of type 'hanzi-pinyin'.
// This validation ensures compatibility with the pinyin input in the UI,
// where users type pinyin and the system automatically converts it to hanzi
// if the input value matches the expected pinyin from the answer.
if (inputType === 'pinyin-to-hanzi') {
const allAnswersAreHanziPinyin = blanks.every(
blank => parseHanziPinyinPairs(blank.answer).length === 1
);
if (!allAnswersAreHanziPinyin) {
throw Error(
`When inputType is 'pinyin-to-hanzi', all answers must be in 'hanzi (pinyin)' format.`
);
}
}
return { sentence, blanks, ...(inputType && { inputType }) };
}
function getBlanks(blanksNodes) {
const blanksGroups = splitOnThematicBreak(blanksNodes);
return blanksGroups.map(blanksGroup => {
const blanksTree = root(blanksGroup);
const feedback = find(blanksTree, { value: '--feedback--' });
if (feedback) {
const blanksNodes = getAllBefore(blanksTree, '--feedback--');
const feedbackNodes = getSection(blanksTree, '--feedback--');
return {
answer: blanksNodes[0].children[0].value,
feedback: toHtml(feedbackNodes)
};
}
return {
answer: blanksGroup[0].children[0].value,
feedback: null
};
});
}
}
}
}
function validateBlanksCount(fillInTheBlankTree) {
function validateBlanksSectionCount(fillInTheBlankTree) {
let blanksCount = 0;
visit(fillInTheBlankTree, { value: '--blanks--' }, () => {
blanksCount++;
@@ -64,49 +149,4 @@ function validateBlanksCount(fillInTheBlankTree) {
);
}
function getfillInTheBlank(sentenceNodes, blanksNodes) {
const sentenceWithoutCodeBlocks = sentenceNodes.map(node => {
node.children.forEach(child => {
if (child.type === 'text' && child.value.trim() === '')
throw Error(NOT_IN_PARAGRAPHS);
if (child.type !== 'inlineCode') throw Error(NOT_IN_CODE_BLOCK);
});
const children = node.children.map(child => ({ ...child, type: 'text' }));
return { ...node, children };
});
const sentence = mdastToHtml(sentenceWithoutCodeBlocks);
const blanks = getBlanks(blanksNodes);
if (!sentence) throw Error('sentence is missing from fill in the blank');
if (!blanks) throw Error('blanks are missing from fill in the blank');
if (sentence.match(/BLANK/g).length !== blanks.length)
throw Error(
`Number of underscores in sentence doesn't match the number of blanks`
);
return { sentence, blanks };
}
function getBlanks(blanksNodes) {
const blanksGroups = splitOnThematicBreak(blanksNodes);
return blanksGroups.map(blanksGroup => {
const blanksTree = root(blanksGroup);
const feedback = find(blanksTree, { value: '--feedback--' });
if (feedback) {
const blanksNodes = getAllBefore(blanksTree, '--feedback--');
const feedbackNodes = getSection(blanksTree, '--feedback--');
return {
answer: blanksNodes[0].children[0].value,
feedback: mdastToHtml(feedbackNodes)
};
}
return { answer: blanksGroup[0].children[0].value, feedback: null };
});
}
module.exports = plugin;
@@ -8,7 +8,13 @@ describe('fill-in-the-blanks plugin', () => {
mockFillInTheBlankTwoSentencesAST,
mockFillInTheBlankBadSentence,
mockFillInTheBlankBadParagraph,
mockFillInTheBlankMultipleBlanks;
mockFillInTheBlankMultipleBlanks,
mockChineseFillInTheBlankAST,
mockChineseFillInTheBlankNoPinyinAST,
mockChineseFillInTheBlankNoHanziAST,
mockChineseFillInTheBlankWrongAnswerFormatAST,
mockChineseFillInTheBlankBlankAnswerMismatchAST,
mockChineseFillInTheBlankLatinAST;
const plugin = addFillInTheBlankQuestion();
let file = { data: {} };
@@ -29,6 +35,24 @@ describe('fill-in-the-blanks plugin', () => {
mockFillInTheBlankMultipleBlanks = await parseFixture(
'with-fill-in-the-blank-many-blanks.md'
);
mockChineseFillInTheBlankAST = await parseFixture(
'with-chinese-fill-in-the-blank.md'
);
mockChineseFillInTheBlankNoPinyinAST = await parseFixture(
'with-chinese-fill-in-the-blank-no-pinyin.md'
);
mockChineseFillInTheBlankNoHanziAST = await parseFixture(
'with-chinese-fill-in-the-blank-no-hanzi.md'
);
mockChineseFillInTheBlankWrongAnswerFormatAST = await parseFixture(
'with-chinese-fill-in-the-blank-wrong-answer-format.md'
);
mockChineseFillInTheBlankBlankAnswerMismatchAST = await parseFixture(
'with-chinese-fill-in-the-blank-blank-answer-mismatch.md'
);
mockChineseFillInTheBlankLatinAST = await parseFixture(
'with-chinese-fill-in-the-blank-latin.md'
);
});
beforeEach(() => {
@@ -55,15 +79,15 @@ describe('fill-in-the-blanks plugin', () => {
expect(Array.isArray(testObject.blanks)).toBe(true);
expect(testObject.blanks.length).toBe(3);
expect(testObject.blanks[0]).toHaveProperty('answer');
expect(typeof testObject.blanks[0].answer).toBe('string');
expect(testObject.blanks[0].answer).toEqual('are');
expect(testObject.blanks[0]).toHaveProperty('feedback');
expect(typeof testObject.blanks[0].feedback).toBe('string');
expect(testObject.blanks[1]).toHaveProperty('answer');
expect(typeof testObject.blanks[1].answer).toBe('string');
expect(testObject.blanks[1].answer).toEqual('right');
expect(testObject.blanks[1]).toHaveProperty('feedback');
expect(typeof testObject.blanks[1].feedback).toBe('string');
expect(testObject.blanks[2]).toHaveProperty('answer');
expect(typeof testObject.blanks[2].answer).toBe('string');
expect(testObject.blanks[2].answer).toEqual('Nice');
expect(testObject.blanks[2]).toHaveProperty('feedback');
expect(testObject.blanks[2].feedback).toBeNull();
});
@@ -167,4 +191,86 @@ Example of good formatting:
'<p>The verb <code>to be</code> is an irregular verb. When conjugated with the pronoun <code>you</code>, <code>be</code> becomes <code>are</code>. For example: <code>You are an English learner.</code></p>'
});
});
it('should parse Chinese fill-in-the-blank sentence and answer correctly if they are in `hanzi (pinyin)` format', () => {
file.data.lang = 'zh-CN';
file.data.inputType = 'pinyin-to-hanzi';
plugin(mockChineseFillInTheBlankAST, file);
const testObject = file.data.fillInTheBlank;
expect(testObject.inputType).toBe('pinyin-to-hanzi');
expect(testObject.sentence).toBe(
'<p>BLANK BLANKBLANK <ruby>是王华<rp>(</rp><rt>shì Wang Hua</rt><rp>)</rp></ruby><ruby>请问你<rp>(</rp><rt>qǐng wèn nǐ</rt><rp>)</rp></ruby> BLANK <ruby>什么名字<rp>(</rp><rt>shén me míng zi</rt><rp>)</rp></ruby></p>'
);
expect(testObject.blanks.length).toBe(4);
expect(testObject.blanks[0].answer).toEqual('你 (nǐ)');
expect(testObject.blanks[0].feedback).toBe(
'<p>Feedback text containing <ruby>汉字<rp>(</rp><rt>hàn zì</rt><rp>)</rp></ruby>.</p>'
);
expect(testObject.blanks[1].answer).toEqual('好 (hǎo)');
expect(testObject.blanks[1].feedback).toBe(
'<p>This means "good" or "well".</p>'
);
expect(testObject.blanks[2].answer).toEqual('我 (wǒ)');
expect(testObject.blanks[2].feedback).toBe('<p>This means "I".</p>');
expect(testObject.blanks[3].answer).toEqual('叫 (jiào)');
expect(testObject.blanks[3].feedback).toBe(
'<p>This means "to be called".</p>'
);
});
it('should return sentence as plain text when sentence does not contain pinyin', () => {
file.data.lang = 'zh-CN';
plugin(mockChineseFillInTheBlankNoPinyinAST, file);
const testObject = file.data.fillInTheBlank;
expect(testObject.sentence).toBe('<p>BLANK好</p>');
expect(testObject.blanks[0].answer).toEqual('你 (nǐ)');
});
it('should return sentence as plain text when sentence does not contain hanzi', () => {
file.data.lang = 'zh-CN';
plugin(mockChineseFillInTheBlankNoHanziAST, file);
const testObject = file.data.fillInTheBlank;
expect(testObject.sentence).toBe('<p>BLANK hǎo</p>');
expect(testObject.blanks[0].answer).toEqual('nǐ');
});
it("should throw if the number of blanks in the sentence doesn't match the number of answers", () => {
file.data.lang = 'zh-CN';
expect(() => {
plugin(mockChineseFillInTheBlankBlankAnswerMismatchAST, file);
}).toThrow(`Number of BLANKs doesn't match the number of answers.`);
});
it('should throw error when inputType is pinyin-to-hanzi but answer is not in hanzi-pinyin format', () => {
file.data.lang = 'zh-CN';
file.data.inputType = 'pinyin-to-hanzi';
expect(() => {
plugin(mockChineseFillInTheBlankWrongAnswerFormatAST, file);
}).toThrow(
"When inputType is 'pinyin-to-hanzi', all answers must be in 'hanzi (pinyin)' format."
);
});
it('should separate BLANK and adjacent Latin text in Chinese sentences', () => {
file.data.lang = 'zh-CN';
plugin(mockChineseFillInTheBlankLatinAST, file);
const testObject = file.data.fillInTheBlank;
expect(testObject.sentence).toBe(
'<p><ruby>我<rp>(</rp><rt>wǒ</rt><rp>)</rp></ruby> BLANK UI <ruby>设计师<rp>(</rp><rt>shè jì shī</rt><rp>)</rp></ruby> 。</p>'
);
expect(testObject.blanks.length).toBe(1);
expect(testObject.blanks[0].answer).toEqual('是 (shì)');
expect(testObject.blanks[0].feedback).toBe('<p>Feedback text.</p>');
});
});
@@ -172,7 +172,7 @@ describe('add-text', () => {
'<section id="instructions">\n<p>Instructions containing <ruby>汉字<rp>(</rp><rt>hàn zì</rt><rp>)</rp></ruby>.</p>\n</section>'
);
expect(zhFile.data.explanation).toBe(
'<section id="explanation">\n<p>Wang Hua uses <ruby>请问<rp>(</rp><rt>qǐng wèn</rt><rp>)</rp></ruby> to politely start her question.</p>\n</section>'
'<section id="explanation">\n<p><ruby>我是<rp>(</rp><rt>wǒ shì</rt><rp>)</rp></ruby> Web <ruby>开发者<rp>(</rp><rt>kāi fā zhě</rt><rp>)</rp></ruby>。 I am a web developer.</p>\n<p><ruby>你好<rp>(</rp><rt>nǐ hǎo</rt><rp>)</rp></ruby><ruby>我是王华<rp>(</rp><rt>wǒ shì Wang Hua</rt><rp>)</rp></ruby><ruby>请问你叫什么名字<rp>(</rp><rt>qǐng wèn nǐ jiào shén me míng zi</rt><rp>)</rp></ruby> Hello, I am Wang Hua, may I ask what your name is?</p>\n</section>'
);
});
});
@@ -1,61 +1,94 @@
const mdastToHTML = require('./mdast-to-html');
/**
* Parses Chinese text in format: hanzi (pinyin)
* @param {string} text - Text in format: hanzi (pinyin)
* @returns {{ hanzi: string, pinyin: string } | null} Parsed hanzi and pinyin, or null if not matching
*/
function parseChinesePattern(text) {
const match = text.match(/^(.+?)\s*\((.+?)\)$/);
// Captures hanzi (pinyin) pairs (hanzi, optional whitespace, then pinyin parentheses)
const HANZI_PINYIN_PAIR = '([\u4e00-\u9fff]+)\\s*\\(([^)]+)\\)';
if (!match) {
return null;
// Matches the BLANK placeholder
const BLANK_TOKEN = 'BLANK';
// Matches Chinese and English punctuation
const PUNCTUATION = '[,。?!!?,;:;:、]+';
// Matches Latin text with spaces
const OTHER_TEXT = '([a-zA-Z\\s]+)';
const HANZI_PINYIN_REGEX = new RegExp(
`${HANZI_PINYIN_PAIR}|${BLANK_TOKEN}|${PUNCTUATION}|${OTHER_TEXT}`,
'g'
);
/**
* Parses all hanzi-pinyin pairs from text
* @param {string} text - Text potentially containing multiple hanzi (pinyin) patterns
* @returns {Array<{hanzi: string, pinyin: string}>} Array of parsed pairs
*/
function parseHanziPinyinPairs(text) {
const pairs = [];
const regex = new RegExp(HANZI_PINYIN_REGEX);
let match;
while ((match = regex.exec(text)) !== null) {
if (match[1] && match[2]) {
pairs.push({
hanzi: match[1].trim(),
pinyin: match[2].trim()
});
}
}
return {
hanzi: match[1].trim(),
pinyin: match[2].trim()
};
return pairs;
}
/**
* Custom handler for Chinese inline code to render as ruby elements
* Matches hanzi-pinyin pairs, BLANK, and punctuation as separate elements
* @param {object} state - The state object from mdast-util-to-hast
* @param {object} node - The inlineCode node
* @returns {object} Hast element node
* @returns {object|Array<object>} Hast element node or array of nodes
*/
function chineseInlineCodeHandler(state, node) {
const parsed = parseChinesePattern(node.value);
const rubyPairs = parseHanziPinyinPairs(node.value);
if (parsed) {
return {
type: 'element',
tagName: 'ruby',
properties: {},
children: [
{ type: 'text', value: parsed.hanzi },
{
if (rubyPairs.length > 0) {
const matches = [...node.value.matchAll(HANZI_PINYIN_REGEX)];
const nodes = matches.map(fullMatch => {
if (fullMatch[1] && fullMatch[2]) {
return {
type: 'element',
tagName: 'rp',
tagName: 'ruby',
properties: {},
children: [{ type: 'text', value: '(' }]
},
{
type: 'element',
tagName: 'rt',
properties: {},
children: [{ type: 'text', value: parsed.pinyin }]
},
{
type: 'element',
tagName: 'rp',
properties: {},
children: [{ type: 'text', value: ')' }]
}
]
};
children: [
{ type: 'text', value: fullMatch[1].trim() },
{
type: 'element',
tagName: 'rp',
properties: {},
children: [{ type: 'text', value: '(' }]
},
{
type: 'element',
tagName: 'rt',
properties: {},
children: [{ type: 'text', value: fullMatch[2].trim() }]
},
{
type: 'element',
tagName: 'rp',
properties: {},
children: [{ type: 'text', value: ')' }]
}
]
};
}
// Other captures (BLANK, punctuation, other text including spaces) should preserve exactly
return { type: 'text', value: fullMatch[0] };
});
return nodes.length === 1 ? nodes[0] : nodes;
}
// If static text, return code
return {
type: 'element',
// TODO: change this to span
@@ -75,4 +108,7 @@ const rubyOptions = {
const createMdastToHtml = lang =>
lang == 'zh-CN' ? x => mdastToHTML(x, rubyOptions) : mdastToHTML;
module.exports = { parseChinesePattern, createMdastToHtml };
module.exports = {
parseHanziPinyinPairs,
createMdastToHtml
};
@@ -1,44 +1,56 @@
import { describe, it, expect } from 'vitest';
import { createMdastToHtml, parseChinesePattern } from './i18n-stringify';
import { createMdastToHtml, parseHanziPinyinPairs } from './i18n-stringify';
describe('parseChinesePattern', () => {
it('should parse Chinese text with hanzi and pinyin', () => {
const result = parseChinesePattern('你好 (nǐ hǎo)');
expect(result).toEqual({
describe('parseHanziPinyinPairs', () => {
it('should parse single hanzi-pinyin pair', () => {
const withSpaceSeparator = parseHanziPinyinPairs('你好 (nǐ hǎo)');
expect(withSpaceSeparator).toHaveLength(1);
expect(withSpaceSeparator[0]).toMatchObject({
hanzi: '你好',
pinyin: 'nǐ hǎo'
});
const withoutSpaceSeparator = parseHanziPinyinPairs('你好(nǐ hǎo)');
expect(withoutSpaceSeparator).toHaveLength(1);
expect(withoutSpaceSeparator[0]).toMatchObject({
hanzi: '你好',
pinyin: 'nǐ hǎo'
});
});
it('should handle text without spaces before parentheses', () => {
const result = parseChinesePattern('你好(nǐ hǎo)');
expect(result).toEqual({
it('should parse multiple hanzi-pinyin pairs', () => {
const withSpaceSeparator = parseHanziPinyinPairs(
'你好 (nǐ hǎo),我是王华 (wǒ shì Wang Hua)'
);
expect(withSpaceSeparator).toHaveLength(2);
expect(withSpaceSeparator[0]).toMatchObject({
hanzi: '你好',
pinyin: 'nǐ hǎo'
});
});
expect(withSpaceSeparator[1]).toMatchObject({
hanzi: '我是王华',
pinyin: 'wǒ shì Wang Hua'
});
it('should handle text with multiple spaces', () => {
const result = parseChinesePattern('你好 (nǐ hǎo)');
expect(result).toEqual({
const withoutSpaceSeparator = parseHanziPinyinPairs(
'你好(nǐ hǎo),我是王华(wǒ shì Wang Hua)'
);
expect(withoutSpaceSeparator).toHaveLength(2);
expect(withoutSpaceSeparator[0]).toMatchObject({
hanzi: '你好',
pinyin: 'nǐ hǎo'
});
expect(withoutSpaceSeparator[1]).toMatchObject({
hanzi: '我是王华',
pinyin: 'wǒ shì Wang Hua'
});
});
it('should return null for text without parentheses', () => {
const result = parseChinesePattern('你好');
expect(result).toBeNull();
});
it('should return null for text with only opening parenthesis', () => {
const result = parseChinesePattern('你好 (nǐ hǎo');
expect(result).toBeNull();
});
it('should return null for empty string', () => {
const result = parseChinesePattern('');
expect(result).toBeNull();
it('should return empty array for text without pairs', () => {
const result = parseHanziPinyinPairs('你好');
expect(result).toHaveLength(0);
});
});
@@ -99,6 +111,93 @@ describe('createMdastToHtml', () => {
);
});
it('should render BLANK tokens and punctuation marks as plain text', () => {
const toHtml = createMdastToHtml('zh-CN');
const withoutSpacesAroundBlanks = [
{
type: 'paragraph',
children: [
{
type: 'inlineCode',
value:
'你好 (nǐ hǎo)BLANK是王华 (shì Wang Hua),请问你 (qǐng wèn nǐ)BLANK什么名字 (shén me míng zi)'
}
]
}
];
expect(toHtml(withoutSpacesAroundBlanks)).toBe(
'<p><ruby>你好<rp>(</rp><rt>nǐ hǎo</rt><rp>)</rp></ruby>BLANK<ruby>是王华<rp>(</rp><rt>shì Wang Hua</rt><rp>)</rp></ruby><ruby>请问你<rp>(</rp><rt>qǐng wèn nǐ</rt><rp>)</rp></ruby>BLANK<ruby>什么名字<rp>(</rp><rt>shén me míng zi</rt><rp>)</rp></ruby></p>'
);
const withSpacesAroundBlanks = [
{
type: 'paragraph',
children: [
{
type: 'inlineCode',
value:
'你好 (nǐ hǎo) BLANK 是王华 (shì Wang Hua),请问你 (qǐng wèn nǐ) BLANK 什么名字 (shén me míng zi)'
}
]
}
];
expect(toHtml(withSpacesAroundBlanks)).toBe(
'<p><ruby>你好<rp>(</rp><rt>nǐ hǎo</rt><rp>)</rp></ruby> BLANK <ruby>是王华<rp>(</rp><rt>shì Wang Hua</rt><rp>)</rp></ruby><ruby>请问你<rp>(</rp><rt>qǐng wèn nǐ</rt><rp>)</rp></ruby> BLANK <ruby>什么名字<rp>(</rp><rt>shén me míng zi</rt><rp>)</rp></ruby></p>'
);
});
it('should render Latin words as plain text while applying ruby to hanzi-pinyin pairs', () => {
const toHtml = createMdastToHtml('zh-CN');
const nodes = [
{
type: 'paragraph',
children: [
{
type: 'inlineCode',
value: '我是 (wǒ shì) UI 设计师 (shè jì shī)'
}
]
}
];
const actual = toHtml(nodes);
expect(actual).toBe(
'<p><ruby>我是<rp>(</rp><rt>wǒ shì</rt><rp>)</rp></ruby> UI <ruby>设计师<rp>(</rp><rt>shè jì shī</rt><rp>)</rp></ruby></p>'
);
});
it('should handle BLANK token and Latin word mix', () => {
const toHtml = createMdastToHtml('zh-CN');
const nodes = [
{
type: 'paragraph',
children: [
{
type: 'inlineCode',
value: '我 (wǒ) BLANK UI 设计师 (shè jì shī)'
}
]
}
];
const actual = toHtml(nodes);
expect(actual).toBe(
'<p><ruby>我<rp>(</rp><rt>wǒ</rt><rp>)</rp></ruby> BLANK UI <ruby>设计师<rp>(</rp><rt>shè jì shī</rt><rp>)</rp></ruby></p>'
);
});
it('should render multiple adjacent BLANK tokens in Chinese sentence', () => {
const toHtml = createMdastToHtml('zh-CN');
const nodes = [
{
type: 'paragraph',
children: [{ type: 'inlineCode', value: 'BLANK BLANK,你好 (nǐ hǎo)' }]
}
];
const actual = toHtml(nodes);
expect(actual).toBe(
'<p>BLANK BLANK<ruby>你好<rp>(</rp><rt>nǐ hǎo</rt><rp>)</rp></ruby></p>'
);
});
it('should fallback to code element if pattern does not match', () => {
const toHtml = createMdastToHtml('zh-CN');
const nodes = [
@@ -126,4 +225,16 @@ describe('createMdastToHtml', () => {
const actual = toHtml(nodes);
expect(actual).toBe('<p><code>请问 (qǐng wèn)</code></p>');
});
it('should render as regular code when lang is not defined', () => {
const toHtml = createMdastToHtml();
const nodes = [
{
type: 'paragraph',
children: [{ type: 'inlineCode', value: '请问 (qǐng wèn)' }]
}
];
const actual = toHtml(nodes);
expect(actual).toBe('<p><code>请问 (qǐng wèn)</code></p>');
});
});