mirror of
https://github.com/freeCodeCamp/freeCodeCamp.git
synced 2026-05-28 18:26:54 +00:00
feat(client,challenge-parser): update fill-in-the-blank to support Chinese (#63741)
This commit is contained in:
+9
@@ -0,0 +1,9 @@
|
||||
# --fillInTheBlank--
|
||||
|
||||
## --sentence--
|
||||
|
||||
`BLANK BLANK`
|
||||
|
||||
## --blanks--
|
||||
|
||||
`你 (nǐ)`
|
||||
@@ -0,0 +1,17 @@
|
||||
# --fillInTheBlank--
|
||||
|
||||
## --sentence--
|
||||
|
||||
`我 (wǒ) BLANK UI 设计师 (shè jì shī) 。`
|
||||
|
||||
## --blanks--
|
||||
|
||||
`是 (shì)`
|
||||
|
||||
### --feedback--
|
||||
|
||||
Feedback text.
|
||||
|
||||
# --explanation--
|
||||
|
||||
Explanation text.
|
||||
@@ -0,0 +1,9 @@
|
||||
# --fillInTheBlank--
|
||||
|
||||
## --sentence--
|
||||
|
||||
`你好 (nǐ hǎo)`
|
||||
|
||||
## --blanks--
|
||||
|
||||
`你`
|
||||
@@ -0,0 +1,9 @@
|
||||
# --fillInTheBlank--
|
||||
|
||||
## --sentence--
|
||||
|
||||
`BLANK hǎo`
|
||||
|
||||
## --blanks--
|
||||
|
||||
`nǐ`
|
||||
@@ -0,0 +1,9 @@
|
||||
# --fillInTheBlank--
|
||||
|
||||
## --sentence--
|
||||
|
||||
`BLANK好`
|
||||
|
||||
## --blanks--
|
||||
|
||||
`你 (nǐ)`
|
||||
+13
@@ -0,0 +1,13 @@
|
||||
# --fillInTheBlank--
|
||||
|
||||
## --sentence--
|
||||
|
||||
`BLANK 好 (hǎo) BLANK`
|
||||
|
||||
## --blanks--
|
||||
|
||||
`你`
|
||||
|
||||
---
|
||||
|
||||
`nǐ`
|
||||
@@ -0,0 +1,46 @@
|
||||
---
|
||||
lang: zh-CN
|
||||
inputType: pinyin-to-hanzi
|
||||
---
|
||||
|
||||
# --fillInTheBlank--
|
||||
|
||||
## --sentence--
|
||||
|
||||
`BLANK BLANK,BLANK 是王华 (shì Wang Hua),请问你 (qǐng wèn nǐ) BLANK 什么名字 (shén me míng zi)?`
|
||||
|
||||
## --blanks--
|
||||
|
||||
`你 (nǐ)`
|
||||
|
||||
### --feedback--
|
||||
|
||||
Feedback text containing `汉字 (hàn zì)`.
|
||||
|
||||
---
|
||||
|
||||
`好 (hǎo)`
|
||||
|
||||
### --feedback--
|
||||
|
||||
This means "good" or "well".
|
||||
|
||||
---
|
||||
|
||||
`我 (wǒ)`
|
||||
|
||||
### --feedback--
|
||||
|
||||
This means "I".
|
||||
|
||||
---
|
||||
|
||||
`叫 (jiào)`
|
||||
|
||||
### --feedback--
|
||||
|
||||
This means "to be called".
|
||||
|
||||
# --explanation--
|
||||
|
||||
Explanation text containing `汉字 (hàn zì)`.
|
||||
@@ -49,4 +49,6 @@ Feedback text.
|
||||
|
||||
# --explanation--
|
||||
|
||||
Wang Hua uses `请问 (qǐng wèn)` to politely start her question.
|
||||
`我是 (wǒ shì) Web 开发者 (kāi fā zhě)。` – I am a web developer.
|
||||
|
||||
`你好 (nǐ hǎo),我是王华 (wǒ shì Wang Hua),请问你叫什么名字 (qǐng wèn nǐ jiào shén me míng zi)?` – Hello, I am Wang Hua, may I ask what your name is?
|
||||
@@ -3,8 +3,10 @@ const find = require('unist-util-find');
|
||||
const visit = require('unist-util-visit');
|
||||
const { getSection } = require('./utils/get-section');
|
||||
const getAllBefore = require('./utils/before-heading');
|
||||
const mdastToHtml = require('./utils/mdast-to-html');
|
||||
|
||||
const {
|
||||
createMdastToHtml,
|
||||
parseHanziPinyinPairs
|
||||
} = require('./utils/i18n-stringify');
|
||||
const { splitOnThematicBreak } = require('./utils/split-on-thematic-break');
|
||||
|
||||
const NOT_IN_PARAGRAPHS = `Each inline code block in the fillInTheBlank sentence section must in its own paragraph
|
||||
@@ -40,19 +42,102 @@ function plugin() {
|
||||
if (fillInTheBlankNodes.length > 0) {
|
||||
const fillInTheBlankTree = root(fillInTheBlankNodes);
|
||||
|
||||
validateBlanksCount(fillInTheBlankTree);
|
||||
validateBlanksSectionCount(fillInTheBlankTree);
|
||||
|
||||
const sentenceNodes = getSection(fillInTheBlankTree, '--sentence--');
|
||||
const blanksNodes = getSection(fillInTheBlankTree, '--blanks--');
|
||||
|
||||
const fillInTheBlank = getfillInTheBlank(sentenceNodes, blanksNodes);
|
||||
const lang = file.data.lang;
|
||||
const inputType = file.data.inputType;
|
||||
const toHtml = createMdastToHtml(lang);
|
||||
|
||||
file.data.fillInTheBlank = fillInTheBlank;
|
||||
file.data.fillInTheBlank = getFillInTheBlank(sentenceNodes, blanksNodes);
|
||||
|
||||
function getFillInTheBlank(sentenceNodes, blanksNodes) {
|
||||
const sentenceWithoutCodeBlocks = sentenceNodes.map(node => {
|
||||
node.children.forEach(child => {
|
||||
if (child.type === 'text' && child.value.trim() === '')
|
||||
throw Error(NOT_IN_PARAGRAPHS);
|
||||
if (child.type !== 'inlineCode') throw Error(NOT_IN_CODE_BLOCK);
|
||||
});
|
||||
|
||||
// For Chinese hanzi-pinyin, keep as inlineCode so handler generates ruby elements
|
||||
if (lang === 'zh-CN') {
|
||||
const hasChinesePairs = node.children.some(
|
||||
child =>
|
||||
child.type === 'inlineCode' &&
|
||||
parseHanziPinyinPairs(child.value).length > 0
|
||||
);
|
||||
|
||||
if (hasChinesePairs) {
|
||||
return node;
|
||||
}
|
||||
}
|
||||
|
||||
// Convert inlineCode to text for non-Chinese content
|
||||
const children = node.children.map(child => ({
|
||||
...child,
|
||||
type: 'text'
|
||||
}));
|
||||
return { ...node, children };
|
||||
});
|
||||
|
||||
const sentence = toHtml(sentenceWithoutCodeBlocks);
|
||||
const blanks = getBlanks(blanksNodes);
|
||||
|
||||
if (!sentence)
|
||||
throw Error('sentence is missing from fill in the blank');
|
||||
if (!blanks) throw Error('blanks are missing from fill in the blank');
|
||||
if (sentence.match(/BLANK/g).length !== blanks.length)
|
||||
throw Error(`Number of BLANKs doesn't match the number of answers.`);
|
||||
|
||||
// For 'pinyin-to-hanzi' inputType, all answers must be of type 'hanzi-pinyin'.
|
||||
// This validation ensures compatibility with the pinyin input in the UI,
|
||||
// where users type pinyin and the system automatically converts it to hanzi
|
||||
// if the input value matches the expected pinyin from the answer.
|
||||
if (inputType === 'pinyin-to-hanzi') {
|
||||
const allAnswersAreHanziPinyin = blanks.every(
|
||||
blank => parseHanziPinyinPairs(blank.answer).length === 1
|
||||
);
|
||||
|
||||
if (!allAnswersAreHanziPinyin) {
|
||||
throw Error(
|
||||
`When inputType is 'pinyin-to-hanzi', all answers must be in 'hanzi (pinyin)' format.`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
return { sentence, blanks, ...(inputType && { inputType }) };
|
||||
}
|
||||
|
||||
function getBlanks(blanksNodes) {
|
||||
const blanksGroups = splitOnThematicBreak(blanksNodes);
|
||||
|
||||
return blanksGroups.map(blanksGroup => {
|
||||
const blanksTree = root(blanksGroup);
|
||||
const feedback = find(blanksTree, { value: '--feedback--' });
|
||||
|
||||
if (feedback) {
|
||||
const blanksNodes = getAllBefore(blanksTree, '--feedback--');
|
||||
const feedbackNodes = getSection(blanksTree, '--feedback--');
|
||||
|
||||
return {
|
||||
answer: blanksNodes[0].children[0].value,
|
||||
feedback: toHtml(feedbackNodes)
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
answer: blanksGroup[0].children[0].value,
|
||||
feedback: null
|
||||
};
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function validateBlanksCount(fillInTheBlankTree) {
|
||||
function validateBlanksSectionCount(fillInTheBlankTree) {
|
||||
let blanksCount = 0;
|
||||
visit(fillInTheBlankTree, { value: '--blanks--' }, () => {
|
||||
blanksCount++;
|
||||
@@ -64,49 +149,4 @@ function validateBlanksCount(fillInTheBlankTree) {
|
||||
);
|
||||
}
|
||||
|
||||
function getfillInTheBlank(sentenceNodes, blanksNodes) {
|
||||
const sentenceWithoutCodeBlocks = sentenceNodes.map(node => {
|
||||
node.children.forEach(child => {
|
||||
if (child.type === 'text' && child.value.trim() === '')
|
||||
throw Error(NOT_IN_PARAGRAPHS);
|
||||
if (child.type !== 'inlineCode') throw Error(NOT_IN_CODE_BLOCK);
|
||||
});
|
||||
|
||||
const children = node.children.map(child => ({ ...child, type: 'text' }));
|
||||
return { ...node, children };
|
||||
});
|
||||
const sentence = mdastToHtml(sentenceWithoutCodeBlocks);
|
||||
const blanks = getBlanks(blanksNodes);
|
||||
|
||||
if (!sentence) throw Error('sentence is missing from fill in the blank');
|
||||
if (!blanks) throw Error('blanks are missing from fill in the blank');
|
||||
if (sentence.match(/BLANK/g).length !== blanks.length)
|
||||
throw Error(
|
||||
`Number of underscores in sentence doesn't match the number of blanks`
|
||||
);
|
||||
|
||||
return { sentence, blanks };
|
||||
}
|
||||
|
||||
function getBlanks(blanksNodes) {
|
||||
const blanksGroups = splitOnThematicBreak(blanksNodes);
|
||||
|
||||
return blanksGroups.map(blanksGroup => {
|
||||
const blanksTree = root(blanksGroup);
|
||||
const feedback = find(blanksTree, { value: '--feedback--' });
|
||||
|
||||
if (feedback) {
|
||||
const blanksNodes = getAllBefore(blanksTree, '--feedback--');
|
||||
const feedbackNodes = getSection(blanksTree, '--feedback--');
|
||||
|
||||
return {
|
||||
answer: blanksNodes[0].children[0].value,
|
||||
feedback: mdastToHtml(feedbackNodes)
|
||||
};
|
||||
}
|
||||
|
||||
return { answer: blanksGroup[0].children[0].value, feedback: null };
|
||||
});
|
||||
}
|
||||
|
||||
module.exports = plugin;
|
||||
|
||||
@@ -8,7 +8,13 @@ describe('fill-in-the-blanks plugin', () => {
|
||||
mockFillInTheBlankTwoSentencesAST,
|
||||
mockFillInTheBlankBadSentence,
|
||||
mockFillInTheBlankBadParagraph,
|
||||
mockFillInTheBlankMultipleBlanks;
|
||||
mockFillInTheBlankMultipleBlanks,
|
||||
mockChineseFillInTheBlankAST,
|
||||
mockChineseFillInTheBlankNoPinyinAST,
|
||||
mockChineseFillInTheBlankNoHanziAST,
|
||||
mockChineseFillInTheBlankWrongAnswerFormatAST,
|
||||
mockChineseFillInTheBlankBlankAnswerMismatchAST,
|
||||
mockChineseFillInTheBlankLatinAST;
|
||||
const plugin = addFillInTheBlankQuestion();
|
||||
let file = { data: {} };
|
||||
|
||||
@@ -29,6 +35,24 @@ describe('fill-in-the-blanks plugin', () => {
|
||||
mockFillInTheBlankMultipleBlanks = await parseFixture(
|
||||
'with-fill-in-the-blank-many-blanks.md'
|
||||
);
|
||||
mockChineseFillInTheBlankAST = await parseFixture(
|
||||
'with-chinese-fill-in-the-blank.md'
|
||||
);
|
||||
mockChineseFillInTheBlankNoPinyinAST = await parseFixture(
|
||||
'with-chinese-fill-in-the-blank-no-pinyin.md'
|
||||
);
|
||||
mockChineseFillInTheBlankNoHanziAST = await parseFixture(
|
||||
'with-chinese-fill-in-the-blank-no-hanzi.md'
|
||||
);
|
||||
mockChineseFillInTheBlankWrongAnswerFormatAST = await parseFixture(
|
||||
'with-chinese-fill-in-the-blank-wrong-answer-format.md'
|
||||
);
|
||||
mockChineseFillInTheBlankBlankAnswerMismatchAST = await parseFixture(
|
||||
'with-chinese-fill-in-the-blank-blank-answer-mismatch.md'
|
||||
);
|
||||
mockChineseFillInTheBlankLatinAST = await parseFixture(
|
||||
'with-chinese-fill-in-the-blank-latin.md'
|
||||
);
|
||||
});
|
||||
|
||||
beforeEach(() => {
|
||||
@@ -55,15 +79,15 @@ describe('fill-in-the-blanks plugin', () => {
|
||||
expect(Array.isArray(testObject.blanks)).toBe(true);
|
||||
expect(testObject.blanks.length).toBe(3);
|
||||
expect(testObject.blanks[0]).toHaveProperty('answer');
|
||||
expect(typeof testObject.blanks[0].answer).toBe('string');
|
||||
expect(testObject.blanks[0].answer).toEqual('are');
|
||||
expect(testObject.blanks[0]).toHaveProperty('feedback');
|
||||
expect(typeof testObject.blanks[0].feedback).toBe('string');
|
||||
expect(testObject.blanks[1]).toHaveProperty('answer');
|
||||
expect(typeof testObject.blanks[1].answer).toBe('string');
|
||||
expect(testObject.blanks[1].answer).toEqual('right');
|
||||
expect(testObject.blanks[1]).toHaveProperty('feedback');
|
||||
expect(typeof testObject.blanks[1].feedback).toBe('string');
|
||||
expect(testObject.blanks[2]).toHaveProperty('answer');
|
||||
expect(typeof testObject.blanks[2].answer).toBe('string');
|
||||
expect(testObject.blanks[2].answer).toEqual('Nice');
|
||||
expect(testObject.blanks[2]).toHaveProperty('feedback');
|
||||
expect(testObject.blanks[2].feedback).toBeNull();
|
||||
});
|
||||
@@ -167,4 +191,86 @@ Example of good formatting:
|
||||
'<p>The verb <code>to be</code> is an irregular verb. When conjugated with the pronoun <code>you</code>, <code>be</code> becomes <code>are</code>. For example: <code>You are an English learner.</code></p>'
|
||||
});
|
||||
});
|
||||
|
||||
it('should parse Chinese fill-in-the-blank sentence and answer correctly if they are in `hanzi (pinyin)` format', () => {
|
||||
file.data.lang = 'zh-CN';
|
||||
file.data.inputType = 'pinyin-to-hanzi';
|
||||
plugin(mockChineseFillInTheBlankAST, file);
|
||||
const testObject = file.data.fillInTheBlank;
|
||||
|
||||
expect(testObject.inputType).toBe('pinyin-to-hanzi');
|
||||
|
||||
expect(testObject.sentence).toBe(
|
||||
'<p>BLANK BLANK,BLANK <ruby>是王华<rp>(</rp><rt>shì Wang Hua</rt><rp>)</rp></ruby>,<ruby>请问你<rp>(</rp><rt>qǐng wèn nǐ</rt><rp>)</rp></ruby> BLANK <ruby>什么名字<rp>(</rp><rt>shén me míng zi</rt><rp>)</rp></ruby>?</p>'
|
||||
);
|
||||
expect(testObject.blanks.length).toBe(4);
|
||||
|
||||
expect(testObject.blanks[0].answer).toEqual('你 (nǐ)');
|
||||
expect(testObject.blanks[0].feedback).toBe(
|
||||
'<p>Feedback text containing <ruby>汉字<rp>(</rp><rt>hàn zì</rt><rp>)</rp></ruby>.</p>'
|
||||
);
|
||||
|
||||
expect(testObject.blanks[1].answer).toEqual('好 (hǎo)');
|
||||
expect(testObject.blanks[1].feedback).toBe(
|
||||
'<p>This means "good" or "well".</p>'
|
||||
);
|
||||
|
||||
expect(testObject.blanks[2].answer).toEqual('我 (wǒ)');
|
||||
expect(testObject.blanks[2].feedback).toBe('<p>This means "I".</p>');
|
||||
|
||||
expect(testObject.blanks[3].answer).toEqual('叫 (jiào)');
|
||||
expect(testObject.blanks[3].feedback).toBe(
|
||||
'<p>This means "to be called".</p>'
|
||||
);
|
||||
});
|
||||
|
||||
it('should return sentence as plain text when sentence does not contain pinyin', () => {
|
||||
file.data.lang = 'zh-CN';
|
||||
plugin(mockChineseFillInTheBlankNoPinyinAST, file);
|
||||
const testObject = file.data.fillInTheBlank;
|
||||
|
||||
expect(testObject.sentence).toBe('<p>BLANK好</p>');
|
||||
expect(testObject.blanks[0].answer).toEqual('你 (nǐ)');
|
||||
});
|
||||
|
||||
it('should return sentence as plain text when sentence does not contain hanzi', () => {
|
||||
file.data.lang = 'zh-CN';
|
||||
plugin(mockChineseFillInTheBlankNoHanziAST, file);
|
||||
const testObject = file.data.fillInTheBlank;
|
||||
|
||||
expect(testObject.sentence).toBe('<p>BLANK hǎo</p>');
|
||||
expect(testObject.blanks[0].answer).toEqual('nǐ');
|
||||
});
|
||||
|
||||
it("should throw if the number of blanks in the sentence doesn't match the number of answers", () => {
|
||||
file.data.lang = 'zh-CN';
|
||||
expect(() => {
|
||||
plugin(mockChineseFillInTheBlankBlankAnswerMismatchAST, file);
|
||||
}).toThrow(`Number of BLANKs doesn't match the number of answers.`);
|
||||
});
|
||||
|
||||
it('should throw error when inputType is pinyin-to-hanzi but answer is not in hanzi-pinyin format', () => {
|
||||
file.data.lang = 'zh-CN';
|
||||
file.data.inputType = 'pinyin-to-hanzi';
|
||||
|
||||
expect(() => {
|
||||
plugin(mockChineseFillInTheBlankWrongAnswerFormatAST, file);
|
||||
}).toThrow(
|
||||
"When inputType is 'pinyin-to-hanzi', all answers must be in 'hanzi (pinyin)' format."
|
||||
);
|
||||
});
|
||||
|
||||
it('should separate BLANK and adjacent Latin text in Chinese sentences', () => {
|
||||
file.data.lang = 'zh-CN';
|
||||
plugin(mockChineseFillInTheBlankLatinAST, file);
|
||||
const testObject = file.data.fillInTheBlank;
|
||||
|
||||
expect(testObject.sentence).toBe(
|
||||
'<p><ruby>我<rp>(</rp><rt>wǒ</rt><rp>)</rp></ruby> BLANK UI <ruby>设计师<rp>(</rp><rt>shè jì shī</rt><rp>)</rp></ruby> 。</p>'
|
||||
);
|
||||
expect(testObject.blanks.length).toBe(1);
|
||||
|
||||
expect(testObject.blanks[0].answer).toEqual('是 (shì)');
|
||||
expect(testObject.blanks[0].feedback).toBe('<p>Feedback text.</p>');
|
||||
});
|
||||
});
|
||||
|
||||
@@ -172,7 +172,7 @@ describe('add-text', () => {
|
||||
'<section id="instructions">\n<p>Instructions containing <ruby>汉字<rp>(</rp><rt>hàn zì</rt><rp>)</rp></ruby>.</p>\n</section>'
|
||||
);
|
||||
expect(zhFile.data.explanation).toBe(
|
||||
'<section id="explanation">\n<p>Wang Hua uses <ruby>请问<rp>(</rp><rt>qǐng wèn</rt><rp>)</rp></ruby> to politely start her question.</p>\n</section>'
|
||||
'<section id="explanation">\n<p><ruby>我是<rp>(</rp><rt>wǒ shì</rt><rp>)</rp></ruby> Web <ruby>开发者<rp>(</rp><rt>kāi fā zhě</rt><rp>)</rp></ruby>。 – I am a web developer.</p>\n<p><ruby>你好<rp>(</rp><rt>nǐ hǎo</rt><rp>)</rp></ruby>,<ruby>我是王华<rp>(</rp><rt>wǒ shì Wang Hua</rt><rp>)</rp></ruby>,<ruby>请问你叫什么名字<rp>(</rp><rt>qǐng wèn nǐ jiào shén me míng zi</rt><rp>)</rp></ruby>? – Hello, I am Wang Hua, may I ask what your name is?</p>\n</section>'
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,61 +1,94 @@
|
||||
const mdastToHTML = require('./mdast-to-html');
|
||||
|
||||
/**
|
||||
* Parses Chinese text in format: hanzi (pinyin)
|
||||
* @param {string} text - Text in format: hanzi (pinyin)
|
||||
* @returns {{ hanzi: string, pinyin: string } | null} Parsed hanzi and pinyin, or null if not matching
|
||||
*/
|
||||
function parseChinesePattern(text) {
|
||||
const match = text.match(/^(.+?)\s*\((.+?)\)$/);
|
||||
// Captures hanzi (pinyin) pairs (hanzi, optional whitespace, then pinyin parentheses)
|
||||
const HANZI_PINYIN_PAIR = '([\u4e00-\u9fff]+)\\s*\\(([^)]+)\\)';
|
||||
|
||||
if (!match) {
|
||||
return null;
|
||||
// Matches the BLANK placeholder
|
||||
const BLANK_TOKEN = 'BLANK';
|
||||
|
||||
// Matches Chinese and English punctuation
|
||||
const PUNCTUATION = '[,。?!!?,;:;:、]+';
|
||||
|
||||
// Matches Latin text with spaces
|
||||
const OTHER_TEXT = '([a-zA-Z\\s]+)';
|
||||
|
||||
const HANZI_PINYIN_REGEX = new RegExp(
|
||||
`${HANZI_PINYIN_PAIR}|${BLANK_TOKEN}|${PUNCTUATION}|${OTHER_TEXT}`,
|
||||
'g'
|
||||
);
|
||||
|
||||
/**
|
||||
* Parses all hanzi-pinyin pairs from text
|
||||
* @param {string} text - Text potentially containing multiple hanzi (pinyin) patterns
|
||||
* @returns {Array<{hanzi: string, pinyin: string}>} Array of parsed pairs
|
||||
*/
|
||||
function parseHanziPinyinPairs(text) {
|
||||
const pairs = [];
|
||||
const regex = new RegExp(HANZI_PINYIN_REGEX);
|
||||
let match;
|
||||
|
||||
while ((match = regex.exec(text)) !== null) {
|
||||
if (match[1] && match[2]) {
|
||||
pairs.push({
|
||||
hanzi: match[1].trim(),
|
||||
pinyin: match[2].trim()
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
hanzi: match[1].trim(),
|
||||
pinyin: match[2].trim()
|
||||
};
|
||||
return pairs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Custom handler for Chinese inline code to render as ruby elements
|
||||
* Matches hanzi-pinyin pairs, BLANK, and punctuation as separate elements
|
||||
* @param {object} state - The state object from mdast-util-to-hast
|
||||
* @param {object} node - The inlineCode node
|
||||
* @returns {object} Hast element node
|
||||
* @returns {object|Array<object>} Hast element node or array of nodes
|
||||
*/
|
||||
function chineseInlineCodeHandler(state, node) {
|
||||
const parsed = parseChinesePattern(node.value);
|
||||
const rubyPairs = parseHanziPinyinPairs(node.value);
|
||||
|
||||
if (parsed) {
|
||||
return {
|
||||
type: 'element',
|
||||
tagName: 'ruby',
|
||||
properties: {},
|
||||
children: [
|
||||
{ type: 'text', value: parsed.hanzi },
|
||||
{
|
||||
if (rubyPairs.length > 0) {
|
||||
const matches = [...node.value.matchAll(HANZI_PINYIN_REGEX)];
|
||||
const nodes = matches.map(fullMatch => {
|
||||
if (fullMatch[1] && fullMatch[2]) {
|
||||
return {
|
||||
type: 'element',
|
||||
tagName: 'rp',
|
||||
tagName: 'ruby',
|
||||
properties: {},
|
||||
children: [{ type: 'text', value: '(' }]
|
||||
},
|
||||
{
|
||||
type: 'element',
|
||||
tagName: 'rt',
|
||||
properties: {},
|
||||
children: [{ type: 'text', value: parsed.pinyin }]
|
||||
},
|
||||
{
|
||||
type: 'element',
|
||||
tagName: 'rp',
|
||||
properties: {},
|
||||
children: [{ type: 'text', value: ')' }]
|
||||
}
|
||||
]
|
||||
};
|
||||
children: [
|
||||
{ type: 'text', value: fullMatch[1].trim() },
|
||||
{
|
||||
type: 'element',
|
||||
tagName: 'rp',
|
||||
properties: {},
|
||||
children: [{ type: 'text', value: '(' }]
|
||||
},
|
||||
{
|
||||
type: 'element',
|
||||
tagName: 'rt',
|
||||
properties: {},
|
||||
children: [{ type: 'text', value: fullMatch[2].trim() }]
|
||||
},
|
||||
{
|
||||
type: 'element',
|
||||
tagName: 'rp',
|
||||
properties: {},
|
||||
children: [{ type: 'text', value: ')' }]
|
||||
}
|
||||
]
|
||||
};
|
||||
}
|
||||
|
||||
// Other captures (BLANK, punctuation, other text including spaces) should preserve exactly
|
||||
return { type: 'text', value: fullMatch[0] };
|
||||
});
|
||||
|
||||
return nodes.length === 1 ? nodes[0] : nodes;
|
||||
}
|
||||
|
||||
// If static text, return code
|
||||
return {
|
||||
type: 'element',
|
||||
// TODO: change this to span
|
||||
@@ -75,4 +108,7 @@ const rubyOptions = {
|
||||
const createMdastToHtml = lang =>
|
||||
lang == 'zh-CN' ? x => mdastToHTML(x, rubyOptions) : mdastToHTML;
|
||||
|
||||
module.exports = { parseChinesePattern, createMdastToHtml };
|
||||
module.exports = {
|
||||
parseHanziPinyinPairs,
|
||||
createMdastToHtml
|
||||
};
|
||||
|
||||
@@ -1,44 +1,56 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { createMdastToHtml, parseChinesePattern } from './i18n-stringify';
|
||||
import { createMdastToHtml, parseHanziPinyinPairs } from './i18n-stringify';
|
||||
|
||||
describe('parseChinesePattern', () => {
|
||||
it('should parse Chinese text with hanzi and pinyin', () => {
|
||||
const result = parseChinesePattern('你好 (nǐ hǎo)');
|
||||
expect(result).toEqual({
|
||||
describe('parseHanziPinyinPairs', () => {
|
||||
it('should parse single hanzi-pinyin pair', () => {
|
||||
const withSpaceSeparator = parseHanziPinyinPairs('你好 (nǐ hǎo)');
|
||||
|
||||
expect(withSpaceSeparator).toHaveLength(1);
|
||||
expect(withSpaceSeparator[0]).toMatchObject({
|
||||
hanzi: '你好',
|
||||
pinyin: 'nǐ hǎo'
|
||||
});
|
||||
|
||||
const withoutSpaceSeparator = parseHanziPinyinPairs('你好(nǐ hǎo)');
|
||||
|
||||
expect(withoutSpaceSeparator).toHaveLength(1);
|
||||
expect(withoutSpaceSeparator[0]).toMatchObject({
|
||||
hanzi: '你好',
|
||||
pinyin: 'nǐ hǎo'
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle text without spaces before parentheses', () => {
|
||||
const result = parseChinesePattern('你好(nǐ hǎo)');
|
||||
expect(result).toEqual({
|
||||
it('should parse multiple hanzi-pinyin pairs', () => {
|
||||
const withSpaceSeparator = parseHanziPinyinPairs(
|
||||
'你好 (nǐ hǎo),我是王华 (wǒ shì Wang Hua)'
|
||||
);
|
||||
expect(withSpaceSeparator).toHaveLength(2);
|
||||
expect(withSpaceSeparator[0]).toMatchObject({
|
||||
hanzi: '你好',
|
||||
pinyin: 'nǐ hǎo'
|
||||
});
|
||||
});
|
||||
expect(withSpaceSeparator[1]).toMatchObject({
|
||||
hanzi: '我是王华',
|
||||
pinyin: 'wǒ shì Wang Hua'
|
||||
});
|
||||
|
||||
it('should handle text with multiple spaces', () => {
|
||||
const result = parseChinesePattern('你好 (nǐ hǎo)');
|
||||
expect(result).toEqual({
|
||||
const withoutSpaceSeparator = parseHanziPinyinPairs(
|
||||
'你好(nǐ hǎo),我是王华(wǒ shì Wang Hua)'
|
||||
);
|
||||
expect(withoutSpaceSeparator).toHaveLength(2);
|
||||
expect(withoutSpaceSeparator[0]).toMatchObject({
|
||||
hanzi: '你好',
|
||||
pinyin: 'nǐ hǎo'
|
||||
});
|
||||
expect(withoutSpaceSeparator[1]).toMatchObject({
|
||||
hanzi: '我是王华',
|
||||
pinyin: 'wǒ shì Wang Hua'
|
||||
});
|
||||
});
|
||||
|
||||
it('should return null for text without parentheses', () => {
|
||||
const result = parseChinesePattern('你好');
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
|
||||
it('should return null for text with only opening parenthesis', () => {
|
||||
const result = parseChinesePattern('你好 (nǐ hǎo');
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
|
||||
it('should return null for empty string', () => {
|
||||
const result = parseChinesePattern('');
|
||||
expect(result).toBeNull();
|
||||
it('should return empty array for text without pairs', () => {
|
||||
const result = parseHanziPinyinPairs('你好');
|
||||
expect(result).toHaveLength(0);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -99,6 +111,93 @@ describe('createMdastToHtml', () => {
|
||||
);
|
||||
});
|
||||
|
||||
it('should render BLANK tokens and punctuation marks as plain text', () => {
|
||||
const toHtml = createMdastToHtml('zh-CN');
|
||||
const withoutSpacesAroundBlanks = [
|
||||
{
|
||||
type: 'paragraph',
|
||||
children: [
|
||||
{
|
||||
type: 'inlineCode',
|
||||
value:
|
||||
'你好 (nǐ hǎo),BLANK是王华 (shì Wang Hua),请问你 (qǐng wèn nǐ)BLANK什么名字 (shén me míng zi)?'
|
||||
}
|
||||
]
|
||||
}
|
||||
];
|
||||
expect(toHtml(withoutSpacesAroundBlanks)).toBe(
|
||||
'<p><ruby>你好<rp>(</rp><rt>nǐ hǎo</rt><rp>)</rp></ruby>,BLANK<ruby>是王华<rp>(</rp><rt>shì Wang Hua</rt><rp>)</rp></ruby>,<ruby>请问你<rp>(</rp><rt>qǐng wèn nǐ</rt><rp>)</rp></ruby>BLANK<ruby>什么名字<rp>(</rp><rt>shén me míng zi</rt><rp>)</rp></ruby>?</p>'
|
||||
);
|
||||
|
||||
const withSpacesAroundBlanks = [
|
||||
{
|
||||
type: 'paragraph',
|
||||
children: [
|
||||
{
|
||||
type: 'inlineCode',
|
||||
value:
|
||||
'你好 (nǐ hǎo), BLANK 是王华 (shì Wang Hua),请问你 (qǐng wèn nǐ) BLANK 什么名字 (shén me míng zi)?'
|
||||
}
|
||||
]
|
||||
}
|
||||
];
|
||||
expect(toHtml(withSpacesAroundBlanks)).toBe(
|
||||
'<p><ruby>你好<rp>(</rp><rt>nǐ hǎo</rt><rp>)</rp></ruby>, BLANK <ruby>是王华<rp>(</rp><rt>shì Wang Hua</rt><rp>)</rp></ruby>,<ruby>请问你<rp>(</rp><rt>qǐng wèn nǐ</rt><rp>)</rp></ruby> BLANK <ruby>什么名字<rp>(</rp><rt>shén me míng zi</rt><rp>)</rp></ruby>?</p>'
|
||||
);
|
||||
});
|
||||
|
||||
it('should render Latin words as plain text while applying ruby to hanzi-pinyin pairs', () => {
|
||||
const toHtml = createMdastToHtml('zh-CN');
|
||||
const nodes = [
|
||||
{
|
||||
type: 'paragraph',
|
||||
children: [
|
||||
{
|
||||
type: 'inlineCode',
|
||||
value: '我是 (wǒ shì) UI 设计师 (shè jì shī)'
|
||||
}
|
||||
]
|
||||
}
|
||||
];
|
||||
const actual = toHtml(nodes);
|
||||
expect(actual).toBe(
|
||||
'<p><ruby>我是<rp>(</rp><rt>wǒ shì</rt><rp>)</rp></ruby> UI <ruby>设计师<rp>(</rp><rt>shè jì shī</rt><rp>)</rp></ruby></p>'
|
||||
);
|
||||
});
|
||||
|
||||
it('should handle BLANK token and Latin word mix', () => {
|
||||
const toHtml = createMdastToHtml('zh-CN');
|
||||
const nodes = [
|
||||
{
|
||||
type: 'paragraph',
|
||||
children: [
|
||||
{
|
||||
type: 'inlineCode',
|
||||
value: '我 (wǒ) BLANK UI 设计师 (shè jì shī)'
|
||||
}
|
||||
]
|
||||
}
|
||||
];
|
||||
const actual = toHtml(nodes);
|
||||
expect(actual).toBe(
|
||||
'<p><ruby>我<rp>(</rp><rt>wǒ</rt><rp>)</rp></ruby> BLANK UI <ruby>设计师<rp>(</rp><rt>shè jì shī</rt><rp>)</rp></ruby></p>'
|
||||
);
|
||||
});
|
||||
|
||||
it('should render multiple adjacent BLANK tokens in Chinese sentence', () => {
|
||||
const toHtml = createMdastToHtml('zh-CN');
|
||||
const nodes = [
|
||||
{
|
||||
type: 'paragraph',
|
||||
children: [{ type: 'inlineCode', value: 'BLANK BLANK,你好 (nǐ hǎo)' }]
|
||||
}
|
||||
];
|
||||
const actual = toHtml(nodes);
|
||||
expect(actual).toBe(
|
||||
'<p>BLANK BLANK,<ruby>你好<rp>(</rp><rt>nǐ hǎo</rt><rp>)</rp></ruby></p>'
|
||||
);
|
||||
});
|
||||
|
||||
it('should fallback to code element if pattern does not match', () => {
|
||||
const toHtml = createMdastToHtml('zh-CN');
|
||||
const nodes = [
|
||||
@@ -126,4 +225,16 @@ describe('createMdastToHtml', () => {
|
||||
const actual = toHtml(nodes);
|
||||
expect(actual).toBe('<p><code>请问 (qǐng wèn)</code></p>');
|
||||
});
|
||||
|
||||
it('should render as regular code when lang is not defined', () => {
|
||||
const toHtml = createMdastToHtml();
|
||||
const nodes = [
|
||||
{
|
||||
type: 'paragraph',
|
||||
children: [{ type: 'inlineCode', value: '请问 (qǐng wèn)' }]
|
||||
}
|
||||
];
|
||||
const actual = toHtml(nodes);
|
||||
expect(actual).toBe('<p><code>请问 (qǐng wèn)</code></p>');
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user