feat(client,challenge-parser): update fill-in-the-blank to support Chinese (#63741)

2026-05-28 18:26:54 +00:00 · 2025-11-25 11:02:22 -08:00
parent b6fff6e2b7
commit 33325b9002
24 changed files with 964 additions and 176 deletions
@@ -410,6 +410,7 @@ exports.createSchemaCustomization = ({ actions }) => {
    type FillInTheBlank {
      sentence: String
      blanks: [Blank]
+      inputType: String
    }
    type Blank {
      answer: String
@@ -49,6 +49,7 @@ export type Question = {
 export type FillInTheBlank = {
  sentence: string;
  blanks: MultipleChoiceAnswer[];
+  inputType?: 'pinyin-tone' | 'pinyin-to-hanzi';
 };

 export type Fields = {
@@ -2,7 +2,7 @@ import React from 'react';
 import { useTranslation } from 'react-i18next';
 import { Spacer } from '@freecodecamp/ui';

-import { parseBlanks } from '../fill-in-the-blank/parse-blanks';
+import { parseBlanks, parseAnswer } from '../fill-in-the-blank/parse-blanks';
 import PrismFormatted from '../components/prism-formatted';
 import { FillInTheBlank } from '../../../redux/prop-types';
 import ChallengeHeading from './challenge-heading';
@@ -16,6 +16,23 @@ type FillInTheBlankProps = {
  handleInputChange: (inputIndex: number, value: string) => void;
 };

+const AnswerText = ({ answer }: { answer: string }) => {
+  const parsedAnswer = parseAnswer(answer);
+
+  if (typeof parsedAnswer === 'string') {
+    return <span className='correct-blank-answer'>{parsedAnswer}</span>;
+  }
+
+  return (
+    <ruby className='correct-blank-answer'>
+      {parsedAnswer.hanzi}
+      <rp>(</rp>
+      <rt>{parsedAnswer.pinyin}</rt>
+      <rp>)</rp>
+    </ruby>
+  );
+};
+
 function FillInTheBlanks({
  fillInTheBlank: { sentence, blanks },
  answersCorrect,
@@ -36,6 +53,17 @@ function FillInTheBlanks({
    return cls;
  };

+  const getAnswerLength = (answer: string): number => {
+    const parsedAnswer = parseAnswer(answer);
+
+    if (typeof parsedAnswer === 'string') {
+      return parsedAnswer.length;
+    }
+
+    // TODO: This is a simplification. Revisit later to account for tones and spaces.
+    return parsedAnswer.pinyin.length;
+  };
+
  const paragraphs = parseBlanks(sentence);
  const blankAnswers = blanks.map(b => b.answer);

@@ -55,25 +83,35 @@ function FillInTheBlanks({
                  return value;
                }

-                // If a blank is answered correctly, render the answer as part of the sentence.
-                if (type === 'blank' && answersCorrect[value] === true) {
+                if (type === 'hanzi-pinyin') {
+                  const { hanzi, pinyin } = value;
                  return (
-                    <span key={j} className='correct-blank-answer'>
-                      {blankAnswers[value]}
-                    </span>
+                    <ruby key={j}>
+                      {hanzi}
+                      <rp>(</rp>
+                      <rt>{pinyin}</rt>
+                      <rp>)</rp>
+                    </ruby>
                  );
                }

+                // If a blank is answered correctly, render the answer as part of the sentence.
+                if (type === 'blank' && answersCorrect[value] === true) {
+                  return <AnswerText key={j} answer={blankAnswers[value]} />;
+                }
+
+                const answerLength = getAnswerLength(blankAnswers[value]);
+
                return (
                  <input
                    key={j}
                    type='text'
-                    maxLength={blankAnswers[value].length + 3}
+                    maxLength={answerLength + 3}
                    className={getInputClass(value)}
                    onChange={e =>
                      handleInputChange(node.value, e.target.value)
                    }
-                    size={blankAnswers[value].length}
+                    size={answerLength}
                    autoComplete='off'
                    aria-label={t('learn.fill-in-the-blank.blank')}
                    {...(answersCorrect[value] === false
@@ -1,5 +1,9 @@
 import { describe, it, expect } from 'vitest';
-import { parseBlanks } from './parse-blanks';
+import {
+  parseBlanks,
+  parseHanziPinyinPairs,
+  parseAnswer
+} from './parse-blanks';

 describe('parseBlanks', () => {
  it('handles strings without blanks', () => {
@@ -129,4 +133,221 @@ describe('parseBlanks', () => {
    expect(() => parseBlanks('<p>hello BLANK!</p>hello BLANK!')).toThrow();
    expect(() => parseBlanks('hello BLANK!<p>hello</p>')).toThrow();
  });
+
+  it('handles Chinese with single BLANK', () => {
+    expect(
+      parseBlanks('<p>BLANK<ruby>好<rp>(</rp><rt>hǎo</rt><rp>)</rp></ruby></p>')
+    ).toEqual([
+      [
+        { type: 'blank', value: 0 },
+        {
+          type: 'hanzi-pinyin',
+          value: { hanzi: '好', pinyin: 'hǎo' }
+        }
+      ]
+    ]);
+  });
+
+  it('handles Chinese without pinyin', () => {
+    expect(parseBlanks('<p>你BLANK好</p>')).toEqual([
+      [
+        { type: 'text', value: '你' },
+        { type: 'blank', value: 0 },
+        { type: 'text', value: '好' }
+      ]
+    ]);
+  });
+
+  it('handles Chinese with multiple BLANKs', () => {
+    expect(
+      parseBlanks(
+        '<p>BLANK<ruby>好<rp>(</rp><rt>hǎo</rt><rp>)</rp></ruby>，BLANK<ruby>是王华<rp>(</rp><rt>shì Wang Hua</rt><rp>)</rp></ruby></p>'
+      )
+    ).toEqual([
+      [
+        { type: 'blank', value: 0 },
+        {
+          type: 'hanzi-pinyin',
+          value: { hanzi: '好', pinyin: 'hǎo' }
+        },
+        { type: 'text', value: '，' },
+        { type: 'blank', value: 1 },
+        {
+          type: 'hanzi-pinyin',
+          value: { hanzi: '是王华', pinyin: 'shì Wang Hua' }
+        }
+      ]
+    ]);
+  });
+
+  it('handles Chinese with multiple adjacent BLANKs', () => {
+    expect(
+      parseBlanks(
+        '<p>BLANK BLANK<ruby>好<rp>(</rp><rt>hǎo</rt><rp>)</rp></ruby></p>'
+      )
+    ).toEqual([
+      [
+        { type: 'blank', value: 0 },
+        { type: 'text', value: ' ' },
+        { type: 'blank', value: 1 },
+        {
+          type: 'hanzi-pinyin',
+          value: { hanzi: '好', pinyin: 'hǎo' }
+        }
+      ]
+    ]);
+  });
+
+  it('handles Chinese with BLANK at the end', () => {
+    expect(
+      parseBlanks(
+        '<p><ruby>你好<rp>(</rp><rt>nǐ hǎo</rt><rp>)</rp></ruby>BLANK</p>'
+      )
+    ).toEqual([
+      [
+        {
+          type: 'hanzi-pinyin',
+          value: { hanzi: '你好', pinyin: 'nǐ hǎo' }
+        },
+        { type: 'blank', value: 0 }
+      ]
+    ]);
+  });
+
+  it('handles Chinese with spaces around BLANK', () => {
+    expect(
+      parseBlanks(
+        '<p><ruby>你<rp>(</rp><rt>nǐ</rt><rp>)</rp></ruby> BLANK <ruby>我<rp>(</rp><rt>wǒ</rt><rp>)</rp></ruby></p>'
+      )
+    ).toEqual([
+      [
+        {
+          type: 'hanzi-pinyin',
+          value: { hanzi: '你', pinyin: 'nǐ' }
+        },
+        { type: 'text', value: ' ' },
+        { type: 'blank', value: 0 },
+        { type: 'text', value: ' ' },
+        {
+          type: 'hanzi-pinyin',
+          value: { hanzi: '我', pinyin: 'wǒ' }
+        }
+      ]
+    ]);
+  });
+
+  it('handles Latin text adjacent to BLANK', () => {
+    expect(
+      parseBlanks(
+        '<p><ruby>我<rp>(</rp><rt>wǒ</rt><rp>)</rp></ruby> BLANK UI <ruby>设计师<rp>(</rp><rt>shè jì shī</rt><rp>)</rp></ruby> 。</p>'
+      )
+    ).toEqual([
+      [
+        {
+          type: 'hanzi-pinyin',
+          value: { hanzi: '我', pinyin: 'wǒ' }
+        },
+        { type: 'text', value: ' ' },
+        { type: 'blank', value: 0 },
+        { type: 'text', value: ' UI ' },
+        {
+          type: 'hanzi-pinyin',
+          value: { hanzi: '设计师', pinyin: 'shè jì shī' }
+        },
+        { type: 'text', value: ' 。' }
+      ]
+    ]);
+  });
+
+  it('handles Chinese with multiple separate groups', () => {
+    expect(
+      parseBlanks(
+        '<p>BLANK<ruby>好<rp>(</rp><rt>hǎo</rt><rp>)</rp></ruby>，<ruby>我是王华<rp>(</rp><rt>wǒ shì Wang Hua</rt><rp>)</rp></ruby>，<ruby>请问你<rp>(</rp><rt>qǐng wèn nǐ</rt><rp>)</rp></ruby>BLANK<ruby>什么名字<rp>(</rp><rt>shén me míng zi</rt><rp>)</rp></ruby>？</p>'
+      )
+    ).toEqual([
+      [
+        { type: 'blank', value: 0 },
+        {
+          type: 'hanzi-pinyin',
+          value: { hanzi: '好', pinyin: 'hǎo' }
+        },
+        { type: 'text', value: '，' },
+        {
+          type: 'hanzi-pinyin',
+          value: { hanzi: '我是王华', pinyin: 'wǒ shì Wang Hua' }
+        },
+        { type: 'text', value: '，' },
+        {
+          type: 'hanzi-pinyin',
+          value: { hanzi: '请问你', pinyin: 'qǐng wèn nǐ' }
+        },
+        { type: 'blank', value: 1 },
+        {
+          type: 'hanzi-pinyin',
+          value: { hanzi: '什么名字', pinyin: 'shén me míng zi' }
+        },
+        { type: 'text', value: '？' }
+      ]
+    ]);
+  });
+
+  it('handles Chinese ruby with trailing punctuation', () => {
+    expect(
+      parseBlanks(
+        '<p><ruby>你是刘明吗<rp>(</rp><rt>nǐ shì Liu Ming ma</rt><rp>)</rp></ruby>？</p>'
+      )
+    ).toEqual([
+      [
+        {
+          type: 'hanzi-pinyin',
+          value: { hanzi: '你是刘明吗', pinyin: 'nǐ shì Liu Ming ma' }
+        },
+        { type: 'text', value: '？' }
+      ]
+    ]);
+  });
+});
+
+describe('parseHanziPinyinPairs', () => {
+  it('parseHanziPinyinPairs returns array with one pair for well-formed input', () => {
+    const result = parseHanziPinyinPairs('你好 (nǐ hǎo)');
+    expect(result).toHaveLength(1);
+    expect(result[0]).toEqual({
+      hanzi: '你好',
+      pinyin: 'nǐ hǎo'
+    });
+  });
+
+  it('parseHanziPinyinPairs handles parentheses without a space', () => {
+    const result = parseHanziPinyinPairs('你好(nǐ hǎo)');
+    expect(result).toHaveLength(1);
+    expect(result[0]).toEqual({
+      hanzi: '你好',
+      pinyin: 'nǐ hǎo'
+    });
+  });
+
+  it('parseHanziPinyinPairs returns empty array for non-matching input', () => {
+    expect(parseHanziPinyinPairs('hello')).toEqual([]);
+  });
+
+  it('parseAnswer returns parsed object when pattern matches', () => {
+    expect(parseAnswer('你好 (nǐ hǎo)')).toEqual({
+      hanzi: '你好',
+      pinyin: 'nǐ hǎo'
+    });
+  });
+});
+
+describe('parseAnswer', () => {
+  it('parseAnswer returns hanzi-pinyin string when pattern matches', () => {
+    expect(parseAnswer('你好(nǐ hǎo)')).toEqual({
+      hanzi: '你好',
+      pinyin: 'nǐ hǎo'
+    });
+  });
+
+  it('parseAnswer returns original string when pattern does not match', () => {
+    expect(parseAnswer('just some text')).toBe('just some text');
+  });
 });
@@ -1,6 +1,48 @@
-type TextNode = { type: 'text'; value: string };
+type PlainTextNode = {
+  type: 'text';
+  value: string;
+};
+
+// Hanzi/pinyin node representing an inline pronunciation pair
+type HanziPinyinNode = {
+  type: 'hanzi-pinyin';
+  value: { hanzi: string; pinyin: string };
+};
+
 type BlankNode = { type: 'blank'; value: number };
-type ParagraphElement = TextNode | BlankNode;
+
+type ParagraphElement = PlainTextNode | BlankNode | HanziPinyinNode;
+
+/**
+ * Parses all hanzi-pinyin pairs from text
+ * @param text - Text potentially containing hanzi (pinyin) patterns
+ * @returns Array of parsed hanzi and pinyin pairs
+ */
+export function parseHanziPinyinPairs(
+  text: string
+): Array<{ hanzi: string; pinyin: string }> {
+  const pairs: Array<{ hanzi: string; pinyin: string }> = [];
+  const regex = /([^()]+?)\s*\(([^)]+)\)/g;
+  let match;
+
+  while ((match = regex.exec(text)) !== null) {
+    pairs.push({
+      hanzi: match[1].trim(),
+      pinyin: match[2].trim()
+    });
+  }
+
+  return pairs;
+}
+
+export function parseAnswer(
+  text: string
+): { hanzi: string; pinyin: string } | string {
+  const pairs = parseHanziPinyinPairs(text);
+  const hanziPinyin = pairs.length === 1 ? pairs[0] : null;
+
+  return hanziPinyin || text;
+}

 export const parseBlanks = (text: string) => {
  const trimmed = text.trim();
@@ -19,27 +61,14 @@ to be wrapped in <p> tags`);

  const { paragraphs } = rawParagraphs.reduce(
    (acc, p) => {
-      const splitByBlank = p.split('BLANK');
+      const containsRuby = /<ruby>/.test(p);
+      const { elements, blankCount } = containsRuby
+        ? parseChineseParagraph(p, acc.count)
+        : parsePlainParagraph(p, acc.count);

-      const parsedParagraph = splitByBlank
-        .map<ParagraphElement[]>((text, i) => [
-          { type: 'text', value: text },
-          { type: 'blank', value: acc.count + i }
-        ])
-        .flat();
-      parsedParagraph.pop(); // remove last blank
-
-      const paragraph = parsedParagraph.filter(p => {
-        // remove empty strings
-        if (p.type === 'text') {
-          return p.value;
-        } else {
-          return true;
-        }
-      });
      return {
-        count: acc.count + splitByBlank.length - 1,
-        paragraphs: [...acc.paragraphs, paragraph]
+        count: acc.count + blankCount,
+        paragraphs: [...acc.paragraphs, elements]
      };
    },
    { count: 0, paragraphs: [] } as {
@@ -50,3 +79,84 @@ to be wrapped in <p> tags`);

  return paragraphs;
 };
+
+/**
+ * Parses a paragraph that contains ruby HTML elements (Chinese hanzi-pinyin)
+ * Handles multiple ruby elements separated by text and BLANK tokens
+ */
+function parseChineseParagraph(
+  paragraph: string,
+  startingBlankIndex: number
+): { elements: ParagraphElement[]; blankCount: number } {
+  const elements: ParagraphElement[] = [];
+  let blankIndex = startingBlankIndex;
+
+  // First, split the paragraph on BLANK tokens so we can add blanks between segments
+  const segments = paragraph.split('BLANK');
+
+  for (let s = 0; s < segments.length; s++) {
+    const segment = segments[s];
+
+    // Split the segment into text and ruby parts. Capturing group keeps the ruby tags.
+    const parts = segment.split(/(<ruby>.*?<\/ruby>)/g).filter(Boolean);
+
+    for (const part of parts) {
+      if (part.startsWith('<ruby>')) {
+        const rubyMatch = part.match(
+          /^<ruby>([^<]+)<rp>\(<\/rp><rt>([^<]+)<\/rt><rp>\)<\/rp><\/ruby>$/
+        );
+        if (rubyMatch) {
+          elements.push({
+            type: 'hanzi-pinyin',
+            value: { hanzi: rubyMatch[1], pinyin: rubyMatch[2] }
+          });
+        }
+      } else if (part) {
+        elements.push({ type: 'text', value: part });
+      }
+    }
+
+    // After each segment except the last, insert a blank node.
+    if (s < segments.length - 1) {
+      elements.push({ type: 'blank', value: blankIndex });
+      blankIndex++;
+    }
+  }
+
+  return {
+    elements,
+    blankCount: blankIndex - startingBlankIndex
+  };
+}
+
+/**
+ * Parses a plain (non-Chinese) paragraph
+ */
+function parsePlainParagraph(
+  paragraph: string,
+  startingBlankIndex: number
+): { elements: ParagraphElement[]; blankCount: number } {
+  const splitByBlank = paragraph.split('BLANK');
+
+  const parsedParagraph = splitByBlank
+    .map<ParagraphElement[]>((text, i) => [
+      { type: 'text', value: text },
+      { type: 'blank', value: startingBlankIndex + i }
+    ])
+    .flat();
+
+  // remove last blank inserted by the mapping
+  parsedParagraph.pop();
+
+  const elements = parsedParagraph.filter(p => {
+    if (p.type === 'text') {
+      return p.value;
+    }
+    return true;
+  });
+
+  return {
+    elements,
+    blankCount: splitByBlank.length - 1
+  };
+}
@@ -35,6 +35,7 @@ import { SceneSubject } from '../components/scene/scene-subject';
 import { getChallengePaths } from '../utils/challenge-paths';
 import { isChallengeCompletedSelector } from '../redux/selectors';
 import { replaceAppleQuotes } from '../../../utils/replace-apple-quotes';
+import { parseHanziPinyinPairs } from './parse-blanks';

 import './show.css';

@@ -135,12 +136,27 @@ const ShowFillInTheBlank = ({
  const handleSubmit = () => {
    const blankAnswers = fillInTheBlank.blanks.map(b => b.answer);

-    const newAnswersCorrect = userAnswers.map(
-      (userAnswer, i) =>
-        !!userAnswer &&
-        replaceAppleQuotes(userAnswer.trim()).toLowerCase() ===
-          blankAnswers[i].toLowerCase()
-    );
+    const newAnswersCorrect = userAnswers.map((userAnswer, i) => {
+      if (!userAnswer) return false;
+
+      const answer = blankAnswers[i];
+      const normalizedUserAnswer = replaceAppleQuotes(
+        userAnswer.trim()
+      ).toLowerCase();
+
+      const pairs = parseHanziPinyinPairs(answer);
+      const hanziPinyin = pairs.length === 1 ? pairs[0] : null;
+
+      if (hanziPinyin) {
+        const { hanzi } = hanziPinyin;
+        // TODO: Implement full hanzi-pinyin validation logic
+        // https://github.com/freeCodeCamp/language-curricula/issues/18
+        return normalizedUserAnswer === hanzi;
+      }
+
+      return normalizedUserAnswer === answer.toLowerCase();
+    });
+
    setAnswersCorrect(newAnswersCorrect);
    const hasWrongAnswer = newAnswersCorrect.some(a => a === false);
    if (!hasWrongAnswer) {
@@ -294,6 +310,7 @@ export const query = graphql`
            answer
            feedback
          }
+          inputType
        }
        tests {
          text