feat(client,challenge-parser): update fill-in-the-blank to support Chinese (#63741)

2026-05-28 18:26:54 +00:00 · 2025-11-25 11:02:22 -08:00
parent b6fff6e2b7
commit 33325b9002
24 changed files with 964 additions and 176 deletions
@@ -0,0 +1,9 @@
+# --fillInTheBlank--
+
+## --sentence--
+
+`BLANK BLANK`
+
+## --blanks--
+
+`你 (nǐ)`
@@ -0,0 +1,17 @@
+# --fillInTheBlank--
+
+## --sentence--
+
+`我 (wǒ) BLANK UI 设计师 (shè jì shī) 。`
+
+## --blanks--
+
+`是 (shì)`
+
+### --feedback--
+
+Feedback text.
+
+# --explanation--
+
+Explanation text.
@@ -0,0 +1,9 @@
+# --fillInTheBlank--
+
+## --sentence--
+
+`你好 (nǐ hǎo)`
+
+## --blanks--
+
+`你`
@@ -0,0 +1,9 @@
+# --fillInTheBlank--
+
+## --sentence--
+
+`BLANK hǎo`
+
+## --blanks--
+
+`nǐ`
@@ -0,0 +1,9 @@
+# --fillInTheBlank--
+
+## --sentence--
+
+`BLANK好`
+
+## --blanks--
+
+`你 (nǐ)`
@@ -0,0 +1,13 @@
+# --fillInTheBlank--
+
+## --sentence--
+
+`BLANK 好 (hǎo) BLANK`
+
+## --blanks--
+
+`你`
+
+---
+
+`nǐ`
@@ -0,0 +1,46 @@
+---
+lang: zh-CN
+inputType: pinyin-to-hanzi
+---
+
+# --fillInTheBlank--
+
+## --sentence--
+
+`BLANK BLANK，BLANK 是王华 (shì Wang Hua)，请问你 (qǐng wèn nǐ) BLANK 什么名字 (shén me míng zi)？`
+
+## --blanks--
+
+`你 (nǐ)`
+
+### --feedback--
+
+Feedback text containing `汉字 (hàn zì)`.
+
+---
+
+`好 (hǎo)`
+
+### --feedback--
+
+This means "good" or "well".
+
+---
+
+`我 (wǒ)`
+
+### --feedback--
+
+This means "I".
+
+---
+
+`叫 (jiào)`
+
+### --feedback--
+
+This means "to be called".
+
+# --explanation--
+
+Explanation text containing `汉字 (hàn zì)`.
@@ -49,4 +49,6 @@ Feedback text.

 # --explanation--

-Wang Hua uses `请问 (qǐng wèn)` to politely start her question.
+`我是 (wǒ shì) Web 开发者 (kāi fā zhě)。` – I am a web developer.
+
+`你好 (nǐ hǎo)，我是王华 (wǒ shì Wang Hua)，请问你叫什么名字 (qǐng wèn nǐ jiào shén me míng zi)？` – Hello, I am Wang Hua, may I ask what your name is?
@@ -3,8 +3,10 @@ const find = require('unist-util-find');
 const visit = require('unist-util-visit');
 const { getSection } = require('./utils/get-section');
 const getAllBefore = require('./utils/before-heading');
-const mdastToHtml = require('./utils/mdast-to-html');
-
+const {
+  createMdastToHtml,
+  parseHanziPinyinPairs
+} = require('./utils/i18n-stringify');
 const { splitOnThematicBreak } = require('./utils/split-on-thematic-break');

 const NOT_IN_PARAGRAPHS = `Each inline code block in the fillInTheBlank sentence section must in its own paragraph
@@ -40,19 +42,102 @@ function plugin() {
    if (fillInTheBlankNodes.length > 0) {
      const fillInTheBlankTree = root(fillInTheBlankNodes);

-      validateBlanksCount(fillInTheBlankTree);
+      validateBlanksSectionCount(fillInTheBlankTree);

      const sentenceNodes = getSection(fillInTheBlankTree, '--sentence--');
      const blanksNodes = getSection(fillInTheBlankTree, '--blanks--');

-      const fillInTheBlank = getfillInTheBlank(sentenceNodes, blanksNodes);
+      const lang = file.data.lang;
+      const inputType = file.data.inputType;
+      const toHtml = createMdastToHtml(lang);

-      file.data.fillInTheBlank = fillInTheBlank;
+      file.data.fillInTheBlank = getFillInTheBlank(sentenceNodes, blanksNodes);
+
+      function getFillInTheBlank(sentenceNodes, blanksNodes) {
+        const sentenceWithoutCodeBlocks = sentenceNodes.map(node => {
+          node.children.forEach(child => {
+            if (child.type === 'text' && child.value.trim() === '')
+              throw Error(NOT_IN_PARAGRAPHS);
+            if (child.type !== 'inlineCode') throw Error(NOT_IN_CODE_BLOCK);
+          });
+
+          // For Chinese hanzi-pinyin, keep as inlineCode so handler generates ruby elements
+          if (lang === 'zh-CN') {
+            const hasChinesePairs = node.children.some(
+              child =>
+                child.type === 'inlineCode' &&
+                parseHanziPinyinPairs(child.value).length > 0
+            );
+
+            if (hasChinesePairs) {
+              return node;
+            }
+          }
+
+          // Convert inlineCode to text for non-Chinese content
+          const children = node.children.map(child => ({
+            ...child,
+            type: 'text'
+          }));
+          return { ...node, children };
+        });
+
+        const sentence = toHtml(sentenceWithoutCodeBlocks);
+        const blanks = getBlanks(blanksNodes);
+
+        if (!sentence)
+          throw Error('sentence is missing from fill in the blank');
+        if (!blanks) throw Error('blanks are missing from fill in the blank');
+        if (sentence.match(/BLANK/g).length !== blanks.length)
+          throw Error(`Number of BLANKs doesn't match the number of answers.`);
+
+        // For 'pinyin-to-hanzi' inputType, all answers must be of type 'hanzi-pinyin'.
+        // This validation ensures compatibility with the pinyin input in the UI,
+        // where users type pinyin and the system automatically converts it to hanzi
+        // if the input value matches the expected pinyin from the answer.
+        if (inputType === 'pinyin-to-hanzi') {
+          const allAnswersAreHanziPinyin = blanks.every(
+            blank => parseHanziPinyinPairs(blank.answer).length === 1
+          );
+
+          if (!allAnswersAreHanziPinyin) {
+            throw Error(
+              `When inputType is 'pinyin-to-hanzi', all answers must be in 'hanzi (pinyin)' format.`
+            );
+          }
+        }
+
+        return { sentence, blanks, ...(inputType && { inputType }) };
+      }
+
+      function getBlanks(blanksNodes) {
+        const blanksGroups = splitOnThematicBreak(blanksNodes);
+
+        return blanksGroups.map(blanksGroup => {
+          const blanksTree = root(blanksGroup);
+          const feedback = find(blanksTree, { value: '--feedback--' });
+
+          if (feedback) {
+            const blanksNodes = getAllBefore(blanksTree, '--feedback--');
+            const feedbackNodes = getSection(blanksTree, '--feedback--');
+
+            return {
+              answer: blanksNodes[0].children[0].value,
+              feedback: toHtml(feedbackNodes)
+            };
+          }
+
+          return {
+            answer: blanksGroup[0].children[0].value,
+            feedback: null
+          };
+        });
+      }
    }
  }
 }

-function validateBlanksCount(fillInTheBlankTree) {
+function validateBlanksSectionCount(fillInTheBlankTree) {
  let blanksCount = 0;
  visit(fillInTheBlankTree, { value: '--blanks--' }, () => {
    blanksCount++;
@@ -64,49 +149,4 @@ function validateBlanksCount(fillInTheBlankTree) {
    );
 }

-function getfillInTheBlank(sentenceNodes, blanksNodes) {
-  const sentenceWithoutCodeBlocks = sentenceNodes.map(node => {
-    node.children.forEach(child => {
-      if (child.type === 'text' && child.value.trim() === '')
-        throw Error(NOT_IN_PARAGRAPHS);
-      if (child.type !== 'inlineCode') throw Error(NOT_IN_CODE_BLOCK);
-    });
-
-    const children = node.children.map(child => ({ ...child, type: 'text' }));
-    return { ...node, children };
-  });
-  const sentence = mdastToHtml(sentenceWithoutCodeBlocks);
-  const blanks = getBlanks(blanksNodes);
-
-  if (!sentence) throw Error('sentence is missing from fill in the blank');
-  if (!blanks) throw Error('blanks are missing from fill in the blank');
-  if (sentence.match(/BLANK/g).length !== blanks.length)
-    throw Error(
-      `Number of underscores in sentence doesn't match the number of blanks`
-    );
-
-  return { sentence, blanks };
-}
-
-function getBlanks(blanksNodes) {
-  const blanksGroups = splitOnThematicBreak(blanksNodes);
-
-  return blanksGroups.map(blanksGroup => {
-    const blanksTree = root(blanksGroup);
-    const feedback = find(blanksTree, { value: '--feedback--' });
-
-    if (feedback) {
-      const blanksNodes = getAllBefore(blanksTree, '--feedback--');
-      const feedbackNodes = getSection(blanksTree, '--feedback--');
-
-      return {
-        answer: blanksNodes[0].children[0].value,
-        feedback: mdastToHtml(feedbackNodes)
-      };
-    }
-
-    return { answer: blanksGroup[0].children[0].value, feedback: null };
-  });
-}
-
 module.exports = plugin;
@@ -8,7 +8,13 @@ describe('fill-in-the-blanks plugin', () => {
    mockFillInTheBlankTwoSentencesAST,
    mockFillInTheBlankBadSentence,
    mockFillInTheBlankBadParagraph,
-    mockFillInTheBlankMultipleBlanks;
+    mockFillInTheBlankMultipleBlanks,
+    mockChineseFillInTheBlankAST,
+    mockChineseFillInTheBlankNoPinyinAST,
+    mockChineseFillInTheBlankNoHanziAST,
+    mockChineseFillInTheBlankWrongAnswerFormatAST,
+    mockChineseFillInTheBlankBlankAnswerMismatchAST,
+    mockChineseFillInTheBlankLatinAST;
  const plugin = addFillInTheBlankQuestion();
  let file = { data: {} };

@@ -29,6 +35,24 @@ describe('fill-in-the-blanks plugin', () => {
    mockFillInTheBlankMultipleBlanks = await parseFixture(
      'with-fill-in-the-blank-many-blanks.md'
    );
+    mockChineseFillInTheBlankAST = await parseFixture(
+      'with-chinese-fill-in-the-blank.md'
+    );
+    mockChineseFillInTheBlankNoPinyinAST = await parseFixture(
+      'with-chinese-fill-in-the-blank-no-pinyin.md'
+    );
+    mockChineseFillInTheBlankNoHanziAST = await parseFixture(
+      'with-chinese-fill-in-the-blank-no-hanzi.md'
+    );
+    mockChineseFillInTheBlankWrongAnswerFormatAST = await parseFixture(
+      'with-chinese-fill-in-the-blank-wrong-answer-format.md'
+    );
+    mockChineseFillInTheBlankBlankAnswerMismatchAST = await parseFixture(
+      'with-chinese-fill-in-the-blank-blank-answer-mismatch.md'
+    );
+    mockChineseFillInTheBlankLatinAST = await parseFixture(
+      'with-chinese-fill-in-the-blank-latin.md'
+    );
  });

  beforeEach(() => {
@@ -55,15 +79,15 @@ describe('fill-in-the-blanks plugin', () => {
    expect(Array.isArray(testObject.blanks)).toBe(true);
    expect(testObject.blanks.length).toBe(3);
    expect(testObject.blanks[0]).toHaveProperty('answer');
-    expect(typeof testObject.blanks[0].answer).toBe('string');
+    expect(testObject.blanks[0].answer).toEqual('are');
    expect(testObject.blanks[0]).toHaveProperty('feedback');
    expect(typeof testObject.blanks[0].feedback).toBe('string');
    expect(testObject.blanks[1]).toHaveProperty('answer');
-    expect(typeof testObject.blanks[1].answer).toBe('string');
+    expect(testObject.blanks[1].answer).toEqual('right');
    expect(testObject.blanks[1]).toHaveProperty('feedback');
    expect(typeof testObject.blanks[1].feedback).toBe('string');
    expect(testObject.blanks[2]).toHaveProperty('answer');
-    expect(typeof testObject.blanks[2].answer).toBe('string');
+    expect(testObject.blanks[2].answer).toEqual('Nice');
    expect(testObject.blanks[2]).toHaveProperty('feedback');
    expect(testObject.blanks[2].feedback).toBeNull();
  });
@@ -167,4 +191,86 @@ Example of good formatting:
        '<p>The verb <code>to be</code> is an irregular verb. When conjugated with the pronoun <code>you</code>, <code>be</code> becomes <code>are</code>. For example: <code>You are an English learner.</code></p>'
    });
  });
+
+  it('should parse Chinese fill-in-the-blank sentence and answer correctly if they are in `hanzi (pinyin)` format', () => {
+    file.data.lang = 'zh-CN';
+    file.data.inputType = 'pinyin-to-hanzi';
+    plugin(mockChineseFillInTheBlankAST, file);
+    const testObject = file.data.fillInTheBlank;
+
+    expect(testObject.inputType).toBe('pinyin-to-hanzi');
+
+    expect(testObject.sentence).toBe(
+      '<p>BLANK BLANK，BLANK <ruby>是王华<rp>(</rp><rt>shì Wang Hua</rt><rp>)</rp></ruby>，<ruby>请问你<rp>(</rp><rt>qǐng wèn nǐ</rt><rp>)</rp></ruby> BLANK <ruby>什么名字<rp>(</rp><rt>shén me míng zi</rt><rp>)</rp></ruby>？</p>'
+    );
+    expect(testObject.blanks.length).toBe(4);
+
+    expect(testObject.blanks[0].answer).toEqual('你 (nǐ)');
+    expect(testObject.blanks[0].feedback).toBe(
+      '<p>Feedback text containing <ruby>汉字<rp>(</rp><rt>hàn zì</rt><rp>)</rp></ruby>.</p>'
+    );
+
+    expect(testObject.blanks[1].answer).toEqual('好 (hǎo)');
+    expect(testObject.blanks[1].feedback).toBe(
+      '<p>This means "good" or "well".</p>'
+    );
+
+    expect(testObject.blanks[2].answer).toEqual('我 (wǒ)');
+    expect(testObject.blanks[2].feedback).toBe('<p>This means "I".</p>');
+
+    expect(testObject.blanks[3].answer).toEqual('叫 (jiào)');
+    expect(testObject.blanks[3].feedback).toBe(
+      '<p>This means "to be called".</p>'
+    );
+  });
+
+  it('should return sentence as plain text when sentence does not contain pinyin', () => {
+    file.data.lang = 'zh-CN';
+    plugin(mockChineseFillInTheBlankNoPinyinAST, file);
+    const testObject = file.data.fillInTheBlank;
+
+    expect(testObject.sentence).toBe('<p>BLANK好</p>');
+    expect(testObject.blanks[0].answer).toEqual('你 (nǐ)');
+  });
+
+  it('should return sentence as plain text when sentence does not contain hanzi', () => {
+    file.data.lang = 'zh-CN';
+    plugin(mockChineseFillInTheBlankNoHanziAST, file);
+    const testObject = file.data.fillInTheBlank;
+
+    expect(testObject.sentence).toBe('<p>BLANK hǎo</p>');
+    expect(testObject.blanks[0].answer).toEqual('nǐ');
+  });
+
+  it("should throw if the number of blanks in the sentence doesn't match the number of answers", () => {
+    file.data.lang = 'zh-CN';
+    expect(() => {
+      plugin(mockChineseFillInTheBlankBlankAnswerMismatchAST, file);
+    }).toThrow(`Number of BLANKs doesn't match the number of answers.`);
+  });
+
+  it('should throw error when inputType is pinyin-to-hanzi but answer is not in hanzi-pinyin format', () => {
+    file.data.lang = 'zh-CN';
+    file.data.inputType = 'pinyin-to-hanzi';
+
+    expect(() => {
+      plugin(mockChineseFillInTheBlankWrongAnswerFormatAST, file);
+    }).toThrow(
+      "When inputType is 'pinyin-to-hanzi', all answers must be in 'hanzi (pinyin)' format."
+    );
+  });
+
+  it('should separate BLANK and adjacent Latin text in Chinese sentences', () => {
+    file.data.lang = 'zh-CN';
+    plugin(mockChineseFillInTheBlankLatinAST, file);
+    const testObject = file.data.fillInTheBlank;
+
+    expect(testObject.sentence).toBe(
+      '<p><ruby>我<rp>(</rp><rt>wǒ</rt><rp>)</rp></ruby> BLANK UI <ruby>设计师<rp>(</rp><rt>shè jì shī</rt><rp>)</rp></ruby> 。</p>'
+    );
+    expect(testObject.blanks.length).toBe(1);
+
+    expect(testObject.blanks[0].answer).toEqual('是 (shì)');
+    expect(testObject.blanks[0].feedback).toBe('<p>Feedback text.</p>');
+  });
 });
@@ -172,7 +172,7 @@ describe('add-text', () => {
      '<section id="instructions">\n<p>Instructions containing <ruby>汉字<rp>(</rp><rt>hàn zì</rt><rp>)</rp></ruby>.</p>\n</section>'
    );
    expect(zhFile.data.explanation).toBe(
-      '<section id="explanation">\n<p>Wang Hua uses <ruby>请问<rp>(</rp><rt>qǐng wèn</rt><rp>)</rp></ruby> to politely start her question.</p>\n</section>'
+      '<section id="explanation">\n<p><ruby>我是<rp>(</rp><rt>wǒ shì</rt><rp>)</rp></ruby> Web <ruby>开发者<rp>(</rp><rt>kāi fā zhě</rt><rp>)</rp></ruby>。 – I am a web developer.</p>\n<p><ruby>你好<rp>(</rp><rt>nǐ hǎo</rt><rp>)</rp></ruby>，<ruby>我是王华<rp>(</rp><rt>wǒ shì Wang Hua</rt><rp>)</rp></ruby>，<ruby>请问你叫什么名字<rp>(</rp><rt>qǐng wèn nǐ jiào shén me míng zi</rt><rp>)</rp></ruby>？ – Hello, I am Wang Hua, may I ask what your name is?</p>\n</section>'
    );
  });
 });
@@ -1,61 +1,94 @@
 const mdastToHTML = require('./mdast-to-html');

-/**
- * Parses Chinese text in format: hanzi (pinyin)
- * @param {string} text - Text in format: hanzi (pinyin)
- * @returns {{ hanzi: string, pinyin: string } | null} Parsed hanzi and pinyin, or null if not matching
- */
-function parseChinesePattern(text) {
-  const match = text.match(/^(.+?)\s*\((.+?)\)$/);
+// Captures hanzi (pinyin) pairs (hanzi, optional whitespace, then pinyin parentheses)
+const HANZI_PINYIN_PAIR = '([\u4e00-\u9fff]+)\\s*\\(([^)]+)\\)';

-  if (!match) {
-    return null;
+// Matches the BLANK placeholder
+const BLANK_TOKEN = 'BLANK';
+
+// Matches Chinese and English punctuation
+const PUNCTUATION = '[，。？！!?,;:；：、]+';
+
+// Matches Latin text with spaces
+const OTHER_TEXT = '([a-zA-Z\\s]+)';
+
+const HANZI_PINYIN_REGEX = new RegExp(
+  `${HANZI_PINYIN_PAIR}|${BLANK_TOKEN}|${PUNCTUATION}|${OTHER_TEXT}`,
+  'g'
+);
+
+/**
+ * Parses all hanzi-pinyin pairs from text
+ * @param {string} text - Text potentially containing multiple hanzi (pinyin) patterns
+ * @returns {Array<{hanzi: string, pinyin: string}>} Array of parsed pairs
+ */
+function parseHanziPinyinPairs(text) {
+  const pairs = [];
+  const regex = new RegExp(HANZI_PINYIN_REGEX);
+  let match;
+
+  while ((match = regex.exec(text)) !== null) {
+    if (match[1] && match[2]) {
+      pairs.push({
+        hanzi: match[1].trim(),
+        pinyin: match[2].trim()
+      });
+    }
  }

-  return {
-    hanzi: match[1].trim(),
-    pinyin: match[2].trim()
-  };
+  return pairs;
 }

 /**
 * Custom handler for Chinese inline code to render as ruby elements
+ * Matches hanzi-pinyin pairs, BLANK, and punctuation as separate elements
 * @param {object} state - The state object from mdast-util-to-hast
 * @param {object} node - The inlineCode node
- * @returns {object} Hast element node
+ * @returns {object|Array<object>} Hast element node or array of nodes
 */
 function chineseInlineCodeHandler(state, node) {
-  const parsed = parseChinesePattern(node.value);
+  const rubyPairs = parseHanziPinyinPairs(node.value);

-  if (parsed) {
-    return {
-      type: 'element',
-      tagName: 'ruby',
-      properties: {},
-      children: [
-        { type: 'text', value: parsed.hanzi },
-        {
+  if (rubyPairs.length > 0) {
+    const matches = [...node.value.matchAll(HANZI_PINYIN_REGEX)];
+    const nodes = matches.map(fullMatch => {
+      if (fullMatch[1] && fullMatch[2]) {
+        return {
          type: 'element',
-          tagName: 'rp',
+          tagName: 'ruby',
          properties: {},
-          children: [{ type: 'text', value: '(' }]
-        },
-        {
-          type: 'element',
-          tagName: 'rt',
-          properties: {},
-          children: [{ type: 'text', value: parsed.pinyin }]
-        },
-        {
-          type: 'element',
-          tagName: 'rp',
-          properties: {},
-          children: [{ type: 'text', value: ')' }]
-        }
-      ]
-    };
+          children: [
+            { type: 'text', value: fullMatch[1].trim() },
+            {
+              type: 'element',
+              tagName: 'rp',
+              properties: {},
+              children: [{ type: 'text', value: '(' }]
+            },
+            {
+              type: 'element',
+              tagName: 'rt',
+              properties: {},
+              children: [{ type: 'text', value: fullMatch[2].trim() }]
+            },
+            {
+              type: 'element',
+              tagName: 'rp',
+              properties: {},
+              children: [{ type: 'text', value: ')' }]
+            }
+          ]
+        };
+      }
+
+      // Other captures (BLANK, punctuation, other text including spaces) should preserve exactly
+      return { type: 'text', value: fullMatch[0] };
+    });
+
+    return nodes.length === 1 ? nodes[0] : nodes;
  }

+  // If static text, return code
  return {
    type: 'element',
    // TODO: change this to span
@@ -75,4 +108,7 @@ const rubyOptions = {
 const createMdastToHtml = lang =>
  lang == 'zh-CN' ? x => mdastToHTML(x, rubyOptions) : mdastToHTML;

-module.exports = { parseChinesePattern, createMdastToHtml };
+module.exports = {
+  parseHanziPinyinPairs,
+  createMdastToHtml
+};
@@ -1,44 +1,56 @@
 import { describe, it, expect } from 'vitest';
-import { createMdastToHtml, parseChinesePattern } from './i18n-stringify';
+import { createMdastToHtml, parseHanziPinyinPairs } from './i18n-stringify';

-describe('parseChinesePattern', () => {
-  it('should parse Chinese text with hanzi and pinyin', () => {
-    const result = parseChinesePattern('你好 (nǐ hǎo)');
-    expect(result).toEqual({
+describe('parseHanziPinyinPairs', () => {
+  it('should parse single hanzi-pinyin pair', () => {
+    const withSpaceSeparator = parseHanziPinyinPairs('你好 (nǐ hǎo)');
+
+    expect(withSpaceSeparator).toHaveLength(1);
+    expect(withSpaceSeparator[0]).toMatchObject({
+      hanzi: '你好',
+      pinyin: 'nǐ hǎo'
+    });
+
+    const withoutSpaceSeparator = parseHanziPinyinPairs('你好(nǐ hǎo)');
+
+    expect(withoutSpaceSeparator).toHaveLength(1);
+    expect(withoutSpaceSeparator[0]).toMatchObject({
      hanzi: '你好',
      pinyin: 'nǐ hǎo'
    });
  });

-  it('should handle text without spaces before parentheses', () => {
-    const result = parseChinesePattern('你好(nǐ hǎo)');
-    expect(result).toEqual({
+  it('should parse multiple hanzi-pinyin pairs', () => {
+    const withSpaceSeparator = parseHanziPinyinPairs(
+      '你好 (nǐ hǎo)，我是王华 (wǒ shì Wang Hua)'
+    );
+    expect(withSpaceSeparator).toHaveLength(2);
+    expect(withSpaceSeparator[0]).toMatchObject({
      hanzi: '你好',
      pinyin: 'nǐ hǎo'
    });
-  });
+    expect(withSpaceSeparator[1]).toMatchObject({
+      hanzi: '我是王华',
+      pinyin: 'wǒ shì Wang Hua'
+    });

-  it('should handle text with multiple spaces', () => {
-    const result = parseChinesePattern('你好   (nǐ hǎo)');
-    expect(result).toEqual({
+    const withoutSpaceSeparator = parseHanziPinyinPairs(
+      '你好(nǐ hǎo)，我是王华(wǒ shì Wang Hua)'
+    );
+    expect(withoutSpaceSeparator).toHaveLength(2);
+    expect(withoutSpaceSeparator[0]).toMatchObject({
      hanzi: '你好',
      pinyin: 'nǐ hǎo'
    });
+    expect(withoutSpaceSeparator[1]).toMatchObject({
+      hanzi: '我是王华',
+      pinyin: 'wǒ shì Wang Hua'
+    });
  });

-  it('should return null for text without parentheses', () => {
-    const result = parseChinesePattern('你好');
-    expect(result).toBeNull();
-  });
-
-  it('should return null for text with only opening parenthesis', () => {
-    const result = parseChinesePattern('你好 (nǐ hǎo');
-    expect(result).toBeNull();
-  });
-
-  it('should return null for empty string', () => {
-    const result = parseChinesePattern('');
-    expect(result).toBeNull();
+  it('should return empty array for text without pairs', () => {
+    const result = parseHanziPinyinPairs('你好');
+    expect(result).toHaveLength(0);
  });
 });

@@ -99,6 +111,93 @@ describe('createMdastToHtml', () => {
    );
  });

+  it('should render BLANK tokens and punctuation marks as plain text', () => {
+    const toHtml = createMdastToHtml('zh-CN');
+    const withoutSpacesAroundBlanks = [
+      {
+        type: 'paragraph',
+        children: [
+          {
+            type: 'inlineCode',
+            value:
+              '你好 (nǐ hǎo)，BLANK是王华 (shì Wang Hua)，请问你 (qǐng wèn nǐ)BLANK什么名字 (shén me míng zi)？'
+          }
+        ]
+      }
+    ];
+    expect(toHtml(withoutSpacesAroundBlanks)).toBe(
+      '<p><ruby>你好<rp>(</rp><rt>nǐ hǎo</rt><rp>)</rp></ruby>，BLANK<ruby>是王华<rp>(</rp><rt>shì Wang Hua</rt><rp>)</rp></ruby>，<ruby>请问你<rp>(</rp><rt>qǐng wèn nǐ</rt><rp>)</rp></ruby>BLANK<ruby>什么名字<rp>(</rp><rt>shén me míng zi</rt><rp>)</rp></ruby>？</p>'
+    );
+
+    const withSpacesAroundBlanks = [
+      {
+        type: 'paragraph',
+        children: [
+          {
+            type: 'inlineCode',
+            value:
+              '你好 (nǐ hǎo)， BLANK 是王华 (shì Wang Hua)，请问你 (qǐng wèn nǐ) BLANK 什么名字 (shén me míng zi)？'
+          }
+        ]
+      }
+    ];
+    expect(toHtml(withSpacesAroundBlanks)).toBe(
+      '<p><ruby>你好<rp>(</rp><rt>nǐ hǎo</rt><rp>)</rp></ruby>， BLANK <ruby>是王华<rp>(</rp><rt>shì Wang Hua</rt><rp>)</rp></ruby>，<ruby>请问你<rp>(</rp><rt>qǐng wèn nǐ</rt><rp>)</rp></ruby> BLANK <ruby>什么名字<rp>(</rp><rt>shén me míng zi</rt><rp>)</rp></ruby>？</p>'
+    );
+  });
+
+  it('should render Latin words as plain text while applying ruby to hanzi-pinyin pairs', () => {
+    const toHtml = createMdastToHtml('zh-CN');
+    const nodes = [
+      {
+        type: 'paragraph',
+        children: [
+          {
+            type: 'inlineCode',
+            value: '我是 (wǒ shì) UI 设计师 (shè jì shī)'
+          }
+        ]
+      }
+    ];
+    const actual = toHtml(nodes);
+    expect(actual).toBe(
+      '<p><ruby>我是<rp>(</rp><rt>wǒ shì</rt><rp>)</rp></ruby> UI <ruby>设计师<rp>(</rp><rt>shè jì shī</rt><rp>)</rp></ruby></p>'
+    );
+  });
+
+  it('should handle BLANK token and Latin word mix', () => {
+    const toHtml = createMdastToHtml('zh-CN');
+    const nodes = [
+      {
+        type: 'paragraph',
+        children: [
+          {
+            type: 'inlineCode',
+            value: '我 (wǒ) BLANK UI 设计师 (shè jì shī)'
+          }
+        ]
+      }
+    ];
+    const actual = toHtml(nodes);
+    expect(actual).toBe(
+      '<p><ruby>我<rp>(</rp><rt>wǒ</rt><rp>)</rp></ruby> BLANK UI <ruby>设计师<rp>(</rp><rt>shè jì shī</rt><rp>)</rp></ruby></p>'
+    );
+  });
+
+  it('should render multiple adjacent BLANK tokens in Chinese sentence', () => {
+    const toHtml = createMdastToHtml('zh-CN');
+    const nodes = [
+      {
+        type: 'paragraph',
+        children: [{ type: 'inlineCode', value: 'BLANK BLANK，你好 (nǐ hǎo)' }]
+      }
+    ];
+    const actual = toHtml(nodes);
+    expect(actual).toBe(
+      '<p>BLANK BLANK，<ruby>你好<rp>(</rp><rt>nǐ hǎo</rt><rp>)</rp></ruby></p>'
+    );
+  });
+
  it('should fallback to code element if pattern does not match', () => {
    const toHtml = createMdastToHtml('zh-CN');
    const nodes = [
@@ -126,4 +225,16 @@ describe('createMdastToHtml', () => {
    const actual = toHtml(nodes);
    expect(actual).toBe('<p><code>请问 (qǐng wèn)</code></p>');
  });
+
+  it('should render as regular code when lang is not defined', () => {
+    const toHtml = createMdastToHtml();
+    const nodes = [
+      {
+        type: 'paragraph',
+        children: [{ type: 'inlineCode', value: '请问 (qǐng wèn)' }]
+      }
+    ];
+    const actual = toHtml(nodes);
+    expect(actual).toBe('<p><code>请问 (qǐng wèn)</code></p>');
+  });
 });