From 33325b9002533d6bc3abb94e876dfce8818dfc73 Mon Sep 17 00:00:00 2001
From: Huyen Nguyen <25715018+huyenltnguyen@users.noreply.github.com>
Date: Tue, 25 Nov 2025 11:02:22 -0800
Subject: [PATCH] feat(client,challenge-parser): update fill-in-the-blank to
support Chinese (#63741)
---
client/gatsby-node.js | 1 +
client/src/redux/prop-types.ts | 1 +
.../components/fill-in-the-blanks.tsx | 54 ++++-
.../fill-in-the-blank/parse-blanks.test.ts | 223 +++++++++++++++++-
.../fill-in-the-blank/parse-blanks.ts | 152 ++++++++++--
.../Challenges/fill-in-the-blank/show.tsx | 29 ++-
.../6904b3933a383d68ec0e5f0d.md | 2 +-
.../6904bfa12f761c705b37b377.md | 10 +-
.../6904c3aaa0b7a0757ffc4d2b.md | 10 +-
.../6904c51ee5d1fb78335b71bf.md | 2 +-
curriculum/schema/challenge-schema.js | 3 +-
...fill-in-the-blank-blank-answer-mismatch.md | 9 +
.../with-chinese-fill-in-the-blank-latin.md | 17 ++
...ith-chinese-fill-in-the-blank-no-blanks.md | 9 +
...with-chinese-fill-in-the-blank-no-hanzi.md | 9 +
...ith-chinese-fill-in-the-blank-no-pinyin.md | 9 +
...e-fill-in-the-blank-wrong-answer-format.md | 13 +
.../with-chinese-fill-in-the-blank.md | 46 ++++
.../parser/__fixtures__/with-chinese-mcq.md | 4 +-
.../parser/plugins/add-fill-in-the-blank.js | 142 +++++++----
.../plugins/add-fill-in-the-blank.test.js | 114 ++++++++-
.../parser/plugins/add-text.test.js | 2 +-
.../parser/plugins/utils/i18n-stringify.js | 118 +++++----
.../plugins/utils/i18n-stringify.test.js | 161 +++++++++++--
24 files changed, 964 insertions(+), 176 deletions(-)
create mode 100644 tools/challenge-parser/parser/__fixtures__/with-chinese-fill-in-the-blank-blank-answer-mismatch.md
create mode 100644 tools/challenge-parser/parser/__fixtures__/with-chinese-fill-in-the-blank-latin.md
create mode 100644 tools/challenge-parser/parser/__fixtures__/with-chinese-fill-in-the-blank-no-blanks.md
create mode 100644 tools/challenge-parser/parser/__fixtures__/with-chinese-fill-in-the-blank-no-hanzi.md
create mode 100644 tools/challenge-parser/parser/__fixtures__/with-chinese-fill-in-the-blank-no-pinyin.md
create mode 100644 tools/challenge-parser/parser/__fixtures__/with-chinese-fill-in-the-blank-wrong-answer-format.md
create mode 100644 tools/challenge-parser/parser/__fixtures__/with-chinese-fill-in-the-blank.md
diff --git a/client/gatsby-node.js b/client/gatsby-node.js
index 71d03b712c5..9742b286812 100644
--- a/client/gatsby-node.js
+++ b/client/gatsby-node.js
@@ -410,6 +410,7 @@ exports.createSchemaCustomization = ({ actions }) => {
type FillInTheBlank {
sentence: String
blanks: [Blank]
+ inputType: String
}
type Blank {
answer: String
diff --git a/client/src/redux/prop-types.ts b/client/src/redux/prop-types.ts
index c403ac1d218..4a953cce415 100644
--- a/client/src/redux/prop-types.ts
+++ b/client/src/redux/prop-types.ts
@@ -49,6 +49,7 @@ export type Question = {
export type FillInTheBlank = {
sentence: string;
blanks: MultipleChoiceAnswer[];
+ inputType?: 'pinyin-tone' | 'pinyin-to-hanzi';
};
export type Fields = {
diff --git a/client/src/templates/Challenges/components/fill-in-the-blanks.tsx b/client/src/templates/Challenges/components/fill-in-the-blanks.tsx
index a8ce4ae2073..41a840e6c7d 100644
--- a/client/src/templates/Challenges/components/fill-in-the-blanks.tsx
+++ b/client/src/templates/Challenges/components/fill-in-the-blanks.tsx
@@ -2,7 +2,7 @@ import React from 'react';
import { useTranslation } from 'react-i18next';
import { Spacer } from '@freecodecamp/ui';
-import { parseBlanks } from '../fill-in-the-blank/parse-blanks';
+import { parseBlanks, parseAnswer } from '../fill-in-the-blank/parse-blanks';
import PrismFormatted from '../components/prism-formatted';
import { FillInTheBlank } from '../../../redux/prop-types';
import ChallengeHeading from './challenge-heading';
@@ -16,6 +16,23 @@ type FillInTheBlankProps = {
handleInputChange: (inputIndex: number, value: string) => void;
};
+const AnswerText = ({ answer }: { answer: string }) => {
+ const parsedAnswer = parseAnswer(answer);
+
+ if (typeof parsedAnswer === 'string') {
+ return {parsedAnswer} ;
+ }
+
+ return (
+
+ {parsedAnswer.hanzi}
+ (
+ {parsedAnswer.pinyin}
+ )
+
+ );
+};
+
function FillInTheBlanks({
fillInTheBlank: { sentence, blanks },
answersCorrect,
@@ -36,6 +53,17 @@ function FillInTheBlanks({
return cls;
};
+ const getAnswerLength = (answer: string): number => {
+ const parsedAnswer = parseAnswer(answer);
+
+ if (typeof parsedAnswer === 'string') {
+ return parsedAnswer.length;
+ }
+
+ // TODO: This is a simplification. Revisit later to account for tones and spaces.
+ return parsedAnswer.pinyin.length;
+ };
+
const paragraphs = parseBlanks(sentence);
const blankAnswers = blanks.map(b => b.answer);
@@ -55,25 +83,35 @@ function FillInTheBlanks({
return value;
}
- // If a blank is answered correctly, render the answer as part of the sentence.
- if (type === 'blank' && answersCorrect[value] === true) {
+ if (type === 'hanzi-pinyin') {
+ const { hanzi, pinyin } = value;
return (
-
- {blankAnswers[value]}
-
+
+ {hanzi}
+ (
+ {pinyin}
+ )
+
);
}
+ // If a blank is answered correctly, render the answer as part of the sentence.
+ if (type === 'blank' && answersCorrect[value] === true) {
+ return ;
+ }
+
+ const answerLength = getAnswerLength(blankAnswers[value]);
+
return (
handleInputChange(node.value, e.target.value)
}
- size={blankAnswers[value].length}
+ size={answerLength}
autoComplete='off'
aria-label={t('learn.fill-in-the-blank.blank')}
{...(answersCorrect[value] === false
diff --git a/client/src/templates/Challenges/fill-in-the-blank/parse-blanks.test.ts b/client/src/templates/Challenges/fill-in-the-blank/parse-blanks.test.ts
index 7b5196c0aa1..4acf583d6f2 100644
--- a/client/src/templates/Challenges/fill-in-the-blank/parse-blanks.test.ts
+++ b/client/src/templates/Challenges/fill-in-the-blank/parse-blanks.test.ts
@@ -1,5 +1,9 @@
import { describe, it, expect } from 'vitest';
-import { parseBlanks } from './parse-blanks';
+import {
+ parseBlanks,
+ parseHanziPinyinPairs,
+ parseAnswer
+} from './parse-blanks';
describe('parseBlanks', () => {
it('handles strings without blanks', () => {
@@ -129,4 +133,221 @@ describe('parseBlanks', () => {
expect(() => parseBlanks('
hello BLANK!
hello BLANK!')).toThrow();
expect(() => parseBlanks('hello BLANK!hello
')).toThrow();
});
+
+ it('handles Chinese with single BLANK', () => {
+ expect(
+ parseBlanks('BLANK好( hǎo )
')
+ ).toEqual([
+ [
+ { type: 'blank', value: 0 },
+ {
+ type: 'hanzi-pinyin',
+ value: { hanzi: '好', pinyin: 'hǎo' }
+ }
+ ]
+ ]);
+ });
+
+ it('handles Chinese without pinyin', () => {
+ expect(parseBlanks('你BLANK好
')).toEqual([
+ [
+ { type: 'text', value: '你' },
+ { type: 'blank', value: 0 },
+ { type: 'text', value: '好' }
+ ]
+ ]);
+ });
+
+ it('handles Chinese with multiple BLANKs', () => {
+ expect(
+ parseBlanks(
+ 'BLANK好( hǎo ) ,BLANK是王华( shì Wang Hua )
'
+ )
+ ).toEqual([
+ [
+ { type: 'blank', value: 0 },
+ {
+ type: 'hanzi-pinyin',
+ value: { hanzi: '好', pinyin: 'hǎo' }
+ },
+ { type: 'text', value: ',' },
+ { type: 'blank', value: 1 },
+ {
+ type: 'hanzi-pinyin',
+ value: { hanzi: '是王华', pinyin: 'shì Wang Hua' }
+ }
+ ]
+ ]);
+ });
+
+ it('handles Chinese with multiple adjacent BLANKs', () => {
+ expect(
+ parseBlanks(
+ 'BLANK BLANK好( hǎo )
'
+ )
+ ).toEqual([
+ [
+ { type: 'blank', value: 0 },
+ { type: 'text', value: ' ' },
+ { type: 'blank', value: 1 },
+ {
+ type: 'hanzi-pinyin',
+ value: { hanzi: '好', pinyin: 'hǎo' }
+ }
+ ]
+ ]);
+ });
+
+ it('handles Chinese with BLANK at the end', () => {
+ expect(
+ parseBlanks(
+ '你好( nǐ hǎo ) BLANK
'
+ )
+ ).toEqual([
+ [
+ {
+ type: 'hanzi-pinyin',
+ value: { hanzi: '你好', pinyin: 'nǐ hǎo' }
+ },
+ { type: 'blank', value: 0 }
+ ]
+ ]);
+ });
+
+ it('handles Chinese with spaces around BLANK', () => {
+ expect(
+ parseBlanks(
+ '你( nǐ ) BLANK 我( wǒ )
'
+ )
+ ).toEqual([
+ [
+ {
+ type: 'hanzi-pinyin',
+ value: { hanzi: '你', pinyin: 'nǐ' }
+ },
+ { type: 'text', value: ' ' },
+ { type: 'blank', value: 0 },
+ { type: 'text', value: ' ' },
+ {
+ type: 'hanzi-pinyin',
+ value: { hanzi: '我', pinyin: 'wǒ' }
+ }
+ ]
+ ]);
+ });
+
+ it('handles Latin text adjacent to BLANK', () => {
+ expect(
+ parseBlanks(
+ '我( wǒ ) BLANK UI 设计师( shè jì shī ) 。
'
+ )
+ ).toEqual([
+ [
+ {
+ type: 'hanzi-pinyin',
+ value: { hanzi: '我', pinyin: 'wǒ' }
+ },
+ { type: 'text', value: ' ' },
+ { type: 'blank', value: 0 },
+ { type: 'text', value: ' UI ' },
+ {
+ type: 'hanzi-pinyin',
+ value: { hanzi: '设计师', pinyin: 'shè jì shī' }
+ },
+ { type: 'text', value: ' 。' }
+ ]
+ ]);
+ });
+
+ it('handles Chinese with multiple separate groups', () => {
+ expect(
+ parseBlanks(
+ 'BLANK好( hǎo ) ,我是王华( wǒ shì Wang Hua ) ,请问你( qǐng wèn nǐ ) BLANK什么名字( shén me míng zi ) ?
'
+ )
+ ).toEqual([
+ [
+ { type: 'blank', value: 0 },
+ {
+ type: 'hanzi-pinyin',
+ value: { hanzi: '好', pinyin: 'hǎo' }
+ },
+ { type: 'text', value: ',' },
+ {
+ type: 'hanzi-pinyin',
+ value: { hanzi: '我是王华', pinyin: 'wǒ shì Wang Hua' }
+ },
+ { type: 'text', value: ',' },
+ {
+ type: 'hanzi-pinyin',
+ value: { hanzi: '请问你', pinyin: 'qǐng wèn nǐ' }
+ },
+ { type: 'blank', value: 1 },
+ {
+ type: 'hanzi-pinyin',
+ value: { hanzi: '什么名字', pinyin: 'shén me míng zi' }
+ },
+ { type: 'text', value: '?' }
+ ]
+ ]);
+ });
+
+ it('handles Chinese ruby with trailing punctuation', () => {
+ expect(
+ parseBlanks(
+ '你是刘明吗( nǐ shì Liu Ming ma ) ?
'
+ )
+ ).toEqual([
+ [
+ {
+ type: 'hanzi-pinyin',
+ value: { hanzi: '你是刘明吗', pinyin: 'nǐ shì Liu Ming ma' }
+ },
+ { type: 'text', value: '?' }
+ ]
+ ]);
+ });
+});
+
+describe('parseHanziPinyinPairs', () => {
+ it('parseHanziPinyinPairs returns array with one pair for well-formed input', () => {
+ const result = parseHanziPinyinPairs('你好 (nǐ hǎo)');
+ expect(result).toHaveLength(1);
+ expect(result[0]).toEqual({
+ hanzi: '你好',
+ pinyin: 'nǐ hǎo'
+ });
+ });
+
+ it('parseHanziPinyinPairs handles parentheses without a space', () => {
+ const result = parseHanziPinyinPairs('你好(nǐ hǎo)');
+ expect(result).toHaveLength(1);
+ expect(result[0]).toEqual({
+ hanzi: '你好',
+ pinyin: 'nǐ hǎo'
+ });
+ });
+
+ it('parseHanziPinyinPairs returns empty array for non-matching input', () => {
+ expect(parseHanziPinyinPairs('hello')).toEqual([]);
+ });
+
+ it('parseAnswer returns parsed object when pattern matches', () => {
+ expect(parseAnswer('你好 (nǐ hǎo)')).toEqual({
+ hanzi: '你好',
+ pinyin: 'nǐ hǎo'
+ });
+ });
+});
+
+describe('parseAnswer', () => {
+ it('parseAnswer returns hanzi-pinyin string when pattern matches', () => {
+ expect(parseAnswer('你好(nǐ hǎo)')).toEqual({
+ hanzi: '你好',
+ pinyin: 'nǐ hǎo'
+ });
+ });
+
+ it('parseAnswer returns original string when pattern does not match', () => {
+ expect(parseAnswer('just some text')).toBe('just some text');
+ });
});
diff --git a/client/src/templates/Challenges/fill-in-the-blank/parse-blanks.ts b/client/src/templates/Challenges/fill-in-the-blank/parse-blanks.ts
index ddd6cb678d1..54358a8a737 100644
--- a/client/src/templates/Challenges/fill-in-the-blank/parse-blanks.ts
+++ b/client/src/templates/Challenges/fill-in-the-blank/parse-blanks.ts
@@ -1,6 +1,48 @@
-type TextNode = { type: 'text'; value: string };
+type PlainTextNode = {
+ type: 'text';
+ value: string;
+};
+
+// Hanzi/pinyin node representing an inline pronunciation pair
+type HanziPinyinNode = {
+ type: 'hanzi-pinyin';
+ value: { hanzi: string; pinyin: string };
+};
+
type BlankNode = { type: 'blank'; value: number };
-type ParagraphElement = TextNode | BlankNode;
+
+type ParagraphElement = PlainTextNode | BlankNode | HanziPinyinNode;
+
+/**
+ * Parses all hanzi-pinyin pairs from text
+ * @param text - Text potentially containing hanzi (pinyin) patterns
+ * @returns Array of parsed hanzi and pinyin pairs
+ */
+export function parseHanziPinyinPairs(
+ text: string
+): Array<{ hanzi: string; pinyin: string }> {
+ const pairs: Array<{ hanzi: string; pinyin: string }> = [];
+ const regex = /([^()]+?)\s*\(([^)]+)\)/g;
+ let match;
+
+ while ((match = regex.exec(text)) !== null) {
+ pairs.push({
+ hanzi: match[1].trim(),
+ pinyin: match[2].trim()
+ });
+ }
+
+ return pairs;
+}
+
+export function parseAnswer(
+ text: string
+): { hanzi: string; pinyin: string } | string {
+ const pairs = parseHanziPinyinPairs(text);
+ const hanziPinyin = pairs.length === 1 ? pairs[0] : null;
+
+ return hanziPinyin || text;
+}
export const parseBlanks = (text: string) => {
const trimmed = text.trim();
@@ -19,27 +61,14 @@ to be wrapped in tags`);
const { paragraphs } = rawParagraphs.reduce(
(acc, p) => {
- const splitByBlank = p.split('BLANK');
+ const containsRuby = //.test(p);
+ const { elements, blankCount } = containsRuby
+ ? parseChineseParagraph(p, acc.count)
+ : parsePlainParagraph(p, acc.count);
- const parsedParagraph = splitByBlank
- .map((text, i) => [
- { type: 'text', value: text },
- { type: 'blank', value: acc.count + i }
- ])
- .flat();
- parsedParagraph.pop(); // remove last blank
-
- const paragraph = parsedParagraph.filter(p => {
- // remove empty strings
- if (p.type === 'text') {
- return p.value;
- } else {
- return true;
- }
- });
return {
- count: acc.count + splitByBlank.length - 1,
- paragraphs: [...acc.paragraphs, paragraph]
+ count: acc.count + blankCount,
+ paragraphs: [...acc.paragraphs, elements]
};
},
{ count: 0, paragraphs: [] } as {
@@ -50,3 +79,84 @@ to be wrapped in tags`);
return paragraphs;
};
+
+/**
+ * Parses a paragraph that contains ruby HTML elements (Chinese hanzi-pinyin)
+ * Handles multiple ruby elements separated by text and BLANK tokens
+ */
+function parseChineseParagraph(
+ paragraph: string,
+ startingBlankIndex: number
+): { elements: ParagraphElement[]; blankCount: number } {
+ const elements: ParagraphElement[] = [];
+ let blankIndex = startingBlankIndex;
+
+ // First, split the paragraph on BLANK tokens so we can add blanks between segments
+ const segments = paragraph.split('BLANK');
+
+ for (let s = 0; s < segments.length; s++) {
+ const segment = segments[s];
+
+ // Split the segment into text and ruby parts. Capturing group keeps the ruby tags.
+ const parts = segment.split(/(.*?<\/ruby>)/g).filter(Boolean);
+
+ for (const part of parts) {
+ if (part.startsWith('')) {
+ const rubyMatch = part.match(
+ /^([^<]+)\(<\/rp>([^<]+)<\/rt>\)<\/rp><\/ruby>$/
+ );
+ if (rubyMatch) {
+ elements.push({
+ type: 'hanzi-pinyin',
+ value: { hanzi: rubyMatch[1], pinyin: rubyMatch[2] }
+ });
+ }
+ } else if (part) {
+ elements.push({ type: 'text', value: part });
+ }
+ }
+
+ // After each segment except the last, insert a blank node.
+ if (s < segments.length - 1) {
+ elements.push({ type: 'blank', value: blankIndex });
+ blankIndex++;
+ }
+ }
+
+ return {
+ elements,
+ blankCount: blankIndex - startingBlankIndex
+ };
+}
+
+/**
+ * Parses a plain (non-Chinese) paragraph
+ */
+function parsePlainParagraph(
+ paragraph: string,
+ startingBlankIndex: number
+): { elements: ParagraphElement[]; blankCount: number } {
+ const splitByBlank = paragraph.split('BLANK');
+
+ const parsedParagraph = splitByBlank
+ .map((text, i) => [
+ { type: 'text', value: text },
+ { type: 'blank', value: startingBlankIndex + i }
+ ])
+ .flat();
+
+ // remove last blank inserted by the mapping
+ parsedParagraph.pop();
+
+ const elements = parsedParagraph.filter(p => {
+ if (p.type === 'text') {
+ return p.value;
+ }
+ return true;
+ });
+
+ return {
+ elements,
+ blankCount: splitByBlank.length - 1
+ };
+}
diff --git a/client/src/templates/Challenges/fill-in-the-blank/show.tsx b/client/src/templates/Challenges/fill-in-the-blank/show.tsx
index e8325a14c99..4c95ab32630 100644
--- a/client/src/templates/Challenges/fill-in-the-blank/show.tsx
+++ b/client/src/templates/Challenges/fill-in-the-blank/show.tsx
@@ -35,6 +35,7 @@ import { SceneSubject } from '../components/scene/scene-subject';
import { getChallengePaths } from '../utils/challenge-paths';
import { isChallengeCompletedSelector } from '../redux/selectors';
import { replaceAppleQuotes } from '../../../utils/replace-apple-quotes';
+import { parseHanziPinyinPairs } from './parse-blanks';
import './show.css';
@@ -135,12 +136,27 @@ const ShowFillInTheBlank = ({
const handleSubmit = () => {
const blankAnswers = fillInTheBlank.blanks.map(b => b.answer);
- const newAnswersCorrect = userAnswers.map(
- (userAnswer, i) =>
- !!userAnswer &&
- replaceAppleQuotes(userAnswer.trim()).toLowerCase() ===
- blankAnswers[i].toLowerCase()
- );
+ const newAnswersCorrect = userAnswers.map((userAnswer, i) => {
+ if (!userAnswer) return false;
+
+ const answer = blankAnswers[i];
+ const normalizedUserAnswer = replaceAppleQuotes(
+ userAnswer.trim()
+ ).toLowerCase();
+
+ const pairs = parseHanziPinyinPairs(answer);
+ const hanziPinyin = pairs.length === 1 ? pairs[0] : null;
+
+ if (hanziPinyin) {
+ const { hanzi } = hanziPinyin;
+ // TODO: Implement full hanzi-pinyin validation logic
+ // https://github.com/freeCodeCamp/language-curricula/issues/18
+ return normalizedUserAnswer === hanzi;
+ }
+
+ return normalizedUserAnswer === answer.toLowerCase();
+ });
+
setAnswersCorrect(newAnswersCorrect);
const hasWrongAnswer = newAnswersCorrect.some(a => a === false);
if (!hasWrongAnswer) {
@@ -294,6 +310,7 @@ export const query = graphql`
answer
feedback
}
+ inputType
}
tests {
text
diff --git a/curriculum/challenges/english/blocks/zh-a1-learn-understanding-the-questions-and-answers/6904b3933a383d68ec0e5f0d.md b/curriculum/challenges/english/blocks/zh-a1-learn-understanding-the-questions-and-answers/6904b3933a383d68ec0e5f0d.md
index 14dc7a8748d..1df94dbe2a5 100644
--- a/curriculum/challenges/english/blocks/zh-a1-learn-understanding-the-questions-and-answers/6904b3933a383d68ec0e5f0d.md
+++ b/curriculum/challenges/english/blocks/zh-a1-learn-understanding-the-questions-and-answers/6904b3933a383d68ec0e5f0d.md
@@ -54,4 +54,4 @@ That is part of the question, but not how she politely begins it.
`请问 (qǐng wèn)` means "excuse me". It's often used at the start of a question to sound polite. For example:
-`请问你是刘明吗?(qǐng wèn nǐ shì Liu Ming ma)` – Excuse me, are you Liu Ming?
+`请问你是刘明吗 (qǐng wèn nǐ shì Liu Ming ma)?` – Excuse me, are you Liu Ming?
diff --git a/curriculum/challenges/english/blocks/zh-a1-learn-understanding-the-questions-and-answers/6904bfa12f761c705b37b377.md b/curriculum/challenges/english/blocks/zh-a1-learn-understanding-the-questions-and-answers/6904bfa12f761c705b37b377.md
index faa5f99786d..8922c28d1ee 100644
--- a/curriculum/challenges/english/blocks/zh-a1-learn-understanding-the-questions-and-answers/6904bfa12f761c705b37b377.md
+++ b/curriculum/challenges/english/blocks/zh-a1-learn-understanding-the-questions-and-answers/6904bfa12f761c705b37b377.md
@@ -21,24 +21,20 @@ Listen to the audio and complete the sentence below.
## --sentence--
-`你好,我是王华,请问BLANK叫什么名字?(nǐ hǎo wǒ shì Wang Hua qǐng wèn BLANK jiào shén me míng zi)`
+`你好 (nǐ hǎo),我是王华 (wǒ shì Wang Hua),请问 (qǐng wèn) BLANK 叫什么名字 (jiào shén me míng zi)?`
## --blanks--
-`你`
+`你 (nǐ)`
### --feedback--
This word means "you" and refers to someone you are speaking to.
----
-
-`nǐ`
-
# --explanation--
`你 (nǐ)` means "you". It's used to talk directly to another person. For example:
-`你是刘明吗?(nǐ shì Liu Ming ma)` – Are you Liu Ming?
+`你是刘明吗 (nǐ shì Liu Ming ma)?` – Are you Liu Ming?
You've learned how to use `我 (wǒ)` to refer to yourself. Both `我 (wǒ)` and `你 (nǐ)` are **personal pronouns**, which means they are used to refer to people.
diff --git a/curriculum/challenges/english/blocks/zh-a1-learn-understanding-the-questions-and-answers/6904c3aaa0b7a0757ffc4d2b.md b/curriculum/challenges/english/blocks/zh-a1-learn-understanding-the-questions-and-answers/6904c3aaa0b7a0757ffc4d2b.md
index ef67435256e..b0d8e56a384 100644
--- a/curriculum/challenges/english/blocks/zh-a1-learn-understanding-the-questions-and-answers/6904c3aaa0b7a0757ffc4d2b.md
+++ b/curriculum/challenges/english/blocks/zh-a1-learn-understanding-the-questions-and-answers/6904c3aaa0b7a0757ffc4d2b.md
@@ -21,22 +21,18 @@ Listen to the audio and complete the sentence below.
## --sentence--
-`你好,我是王华,请问你BLANK什么名字? (nǐ hǎo wǒ shì Wang Hua qǐng wèn nǐ BLANK shén me míng zi)`
+`你好 (nǐ hǎo),我是王华 (wǒ shì Wang Hua),请问你 (qǐng wèn nǐ) BLANK 什么名字 (shén me míng zi)?`
## --blanks--
-`叫`
+`叫 (jiào)`
### --feedback--
This character means "to be called" or "to be named".
----
-
-`jiào`
-
# --explanation--
`叫 (jiào)` means "to be called". It's often used after a subject to introduce a name. For example:
-`我叫王华。(wǒ jiào Wang Hua)` – I am called Wang Hua.
+`我叫王华 (wǒ jiào Wang Hua)。` – I am called Wang Hua.
diff --git a/curriculum/challenges/english/blocks/zh-a1-learn-understanding-the-questions-and-answers/6904c51ee5d1fb78335b71bf.md b/curriculum/challenges/english/blocks/zh-a1-learn-understanding-the-questions-and-answers/6904c51ee5d1fb78335b71bf.md
index 880c466d548..75277146904 100644
--- a/curriculum/challenges/english/blocks/zh-a1-learn-understanding-the-questions-and-answers/6904c51ee5d1fb78335b71bf.md
+++ b/curriculum/challenges/english/blocks/zh-a1-learn-understanding-the-questions-and-answers/6904c51ee5d1fb78335b71bf.md
@@ -52,4 +52,4 @@ She isn't asking where the person is from.
# --explanation--
-`什么名字 (shén me míng zi)` means "what name". `你叫什么名字?(nǐ jiào shén me míng zi)` means "what is your name?". Wang Hua is asking for the other person's name.
+`什么名字 (shén me míng zi)` means "what name". `你叫什么名字 (nǐ jiào shén me míng zi)?` means "what is your name?". Wang Hua is asking for the other person's name.
diff --git a/curriculum/schema/challenge-schema.js b/curriculum/schema/challenge-schema.js
index ed447a34750..b4b34250ac6 100644
--- a/curriculum/schema/challenge-schema.js
+++ b/curriculum/schema/challenge-schema.js
@@ -228,7 +228,8 @@ const schema = Joi.object().keys({
feedback: Joi.string().allow(null)
})
)
- .required()
+ .required(),
+ inputType: Joi.string().valid('pinyin-tone', 'pinyin-to-hanzi').optional()
}),
forumTopicId: Joi.number(),
id: Joi.objectId().required(),
diff --git a/tools/challenge-parser/parser/__fixtures__/with-chinese-fill-in-the-blank-blank-answer-mismatch.md b/tools/challenge-parser/parser/__fixtures__/with-chinese-fill-in-the-blank-blank-answer-mismatch.md
new file mode 100644
index 00000000000..8872947d889
--- /dev/null
+++ b/tools/challenge-parser/parser/__fixtures__/with-chinese-fill-in-the-blank-blank-answer-mismatch.md
@@ -0,0 +1,9 @@
+# --fillInTheBlank--
+
+## --sentence--
+
+`BLANK BLANK`
+
+## --blanks--
+
+`你 (nǐ)`
\ No newline at end of file
diff --git a/tools/challenge-parser/parser/__fixtures__/with-chinese-fill-in-the-blank-latin.md b/tools/challenge-parser/parser/__fixtures__/with-chinese-fill-in-the-blank-latin.md
new file mode 100644
index 00000000000..ada003169f1
--- /dev/null
+++ b/tools/challenge-parser/parser/__fixtures__/with-chinese-fill-in-the-blank-latin.md
@@ -0,0 +1,17 @@
+# --fillInTheBlank--
+
+## --sentence--
+
+`我 (wǒ) BLANK UI 设计师 (shè jì shī) 。`
+
+## --blanks--
+
+`是 (shì)`
+
+### --feedback--
+
+Feedback text.
+
+# --explanation--
+
+Explanation text.
\ No newline at end of file
diff --git a/tools/challenge-parser/parser/__fixtures__/with-chinese-fill-in-the-blank-no-blanks.md b/tools/challenge-parser/parser/__fixtures__/with-chinese-fill-in-the-blank-no-blanks.md
new file mode 100644
index 00000000000..b084fde5b4a
--- /dev/null
+++ b/tools/challenge-parser/parser/__fixtures__/with-chinese-fill-in-the-blank-no-blanks.md
@@ -0,0 +1,9 @@
+# --fillInTheBlank--
+
+## --sentence--
+
+`你好 (nǐ hǎo)`
+
+## --blanks--
+
+`你`
\ No newline at end of file
diff --git a/tools/challenge-parser/parser/__fixtures__/with-chinese-fill-in-the-blank-no-hanzi.md b/tools/challenge-parser/parser/__fixtures__/with-chinese-fill-in-the-blank-no-hanzi.md
new file mode 100644
index 00000000000..ce334bee91c
--- /dev/null
+++ b/tools/challenge-parser/parser/__fixtures__/with-chinese-fill-in-the-blank-no-hanzi.md
@@ -0,0 +1,9 @@
+# --fillInTheBlank--
+
+## --sentence--
+
+`BLANK hǎo`
+
+## --blanks--
+
+`nǐ`
\ No newline at end of file
diff --git a/tools/challenge-parser/parser/__fixtures__/with-chinese-fill-in-the-blank-no-pinyin.md b/tools/challenge-parser/parser/__fixtures__/with-chinese-fill-in-the-blank-no-pinyin.md
new file mode 100644
index 00000000000..386c7c272d7
--- /dev/null
+++ b/tools/challenge-parser/parser/__fixtures__/with-chinese-fill-in-the-blank-no-pinyin.md
@@ -0,0 +1,9 @@
+# --fillInTheBlank--
+
+## --sentence--
+
+`BLANK好`
+
+## --blanks--
+
+`你 (nǐ)`
\ No newline at end of file
diff --git a/tools/challenge-parser/parser/__fixtures__/with-chinese-fill-in-the-blank-wrong-answer-format.md b/tools/challenge-parser/parser/__fixtures__/with-chinese-fill-in-the-blank-wrong-answer-format.md
new file mode 100644
index 00000000000..ca01d3dc71b
--- /dev/null
+++ b/tools/challenge-parser/parser/__fixtures__/with-chinese-fill-in-the-blank-wrong-answer-format.md
@@ -0,0 +1,13 @@
+# --fillInTheBlank--
+
+## --sentence--
+
+`BLANK 好 (hǎo) BLANK`
+
+## --blanks--
+
+`你`
+
+---
+
+`nǐ`
\ No newline at end of file
diff --git a/tools/challenge-parser/parser/__fixtures__/with-chinese-fill-in-the-blank.md b/tools/challenge-parser/parser/__fixtures__/with-chinese-fill-in-the-blank.md
new file mode 100644
index 00000000000..eb7d1a58e5b
--- /dev/null
+++ b/tools/challenge-parser/parser/__fixtures__/with-chinese-fill-in-the-blank.md
@@ -0,0 +1,46 @@
+---
+lang: zh-CN
+inputType: pinyin-to-hanzi
+---
+
+# --fillInTheBlank--
+
+## --sentence--
+
+`BLANK BLANK,BLANK 是王华 (shì Wang Hua),请问你 (qǐng wèn nǐ) BLANK 什么名字 (shén me míng zi)?`
+
+## --blanks--
+
+`你 (nǐ)`
+
+### --feedback--
+
+Feedback text containing `汉字 (hàn zì)`.
+
+---
+
+`好 (hǎo)`
+
+### --feedback--
+
+This means "good" or "well".
+
+---
+
+`我 (wǒ)`
+
+### --feedback--
+
+This means "I".
+
+---
+
+`叫 (jiào)`
+
+### --feedback--
+
+This means "to be called".
+
+# --explanation--
+
+Explanation text containing `汉字 (hàn zì)`.
diff --git a/tools/challenge-parser/parser/__fixtures__/with-chinese-mcq.md b/tools/challenge-parser/parser/__fixtures__/with-chinese-mcq.md
index beb0c4df2f1..fceeae221d3 100644
--- a/tools/challenge-parser/parser/__fixtures__/with-chinese-mcq.md
+++ b/tools/challenge-parser/parser/__fixtures__/with-chinese-mcq.md
@@ -49,4 +49,6 @@ Feedback text.
# --explanation--
-Wang Hua uses `请问 (qǐng wèn)` to politely start her question.
\ No newline at end of file
+`我是 (wǒ shì) Web 开发者 (kāi fā zhě)。` – I am a web developer.
+
+`你好 (nǐ hǎo),我是王华 (wǒ shì Wang Hua),请问你叫什么名字 (qǐng wèn nǐ jiào shén me míng zi)?` – Hello, I am Wang Hua, may I ask what your name is?
\ No newline at end of file
diff --git a/tools/challenge-parser/parser/plugins/add-fill-in-the-blank.js b/tools/challenge-parser/parser/plugins/add-fill-in-the-blank.js
index 5647c26a6b7..ce35d6204db 100644
--- a/tools/challenge-parser/parser/plugins/add-fill-in-the-blank.js
+++ b/tools/challenge-parser/parser/plugins/add-fill-in-the-blank.js
@@ -3,8 +3,10 @@ const find = require('unist-util-find');
const visit = require('unist-util-visit');
const { getSection } = require('./utils/get-section');
const getAllBefore = require('./utils/before-heading');
-const mdastToHtml = require('./utils/mdast-to-html');
-
+const {
+ createMdastToHtml,
+ parseHanziPinyinPairs
+} = require('./utils/i18n-stringify');
const { splitOnThematicBreak } = require('./utils/split-on-thematic-break');
const NOT_IN_PARAGRAPHS = `Each inline code block in the fillInTheBlank sentence section must in its own paragraph
@@ -40,19 +42,102 @@ function plugin() {
if (fillInTheBlankNodes.length > 0) {
const fillInTheBlankTree = root(fillInTheBlankNodes);
- validateBlanksCount(fillInTheBlankTree);
+ validateBlanksSectionCount(fillInTheBlankTree);
const sentenceNodes = getSection(fillInTheBlankTree, '--sentence--');
const blanksNodes = getSection(fillInTheBlankTree, '--blanks--');
- const fillInTheBlank = getfillInTheBlank(sentenceNodes, blanksNodes);
+ const lang = file.data.lang;
+ const inputType = file.data.inputType;
+ const toHtml = createMdastToHtml(lang);
- file.data.fillInTheBlank = fillInTheBlank;
+ file.data.fillInTheBlank = getFillInTheBlank(sentenceNodes, blanksNodes);
+
+ function getFillInTheBlank(sentenceNodes, blanksNodes) {
+ const sentenceWithoutCodeBlocks = sentenceNodes.map(node => {
+ node.children.forEach(child => {
+ if (child.type === 'text' && child.value.trim() === '')
+ throw Error(NOT_IN_PARAGRAPHS);
+ if (child.type !== 'inlineCode') throw Error(NOT_IN_CODE_BLOCK);
+ });
+
+ // For Chinese hanzi-pinyin, keep as inlineCode so handler generates ruby elements
+ if (lang === 'zh-CN') {
+ const hasChinesePairs = node.children.some(
+ child =>
+ child.type === 'inlineCode' &&
+ parseHanziPinyinPairs(child.value).length > 0
+ );
+
+ if (hasChinesePairs) {
+ return node;
+ }
+ }
+
+ // Convert inlineCode to text for non-Chinese content
+ const children = node.children.map(child => ({
+ ...child,
+ type: 'text'
+ }));
+ return { ...node, children };
+ });
+
+ const sentence = toHtml(sentenceWithoutCodeBlocks);
+ const blanks = getBlanks(blanksNodes);
+
+ if (!sentence)
+ throw Error('sentence is missing from fill in the blank');
+ if (!blanks) throw Error('blanks are missing from fill in the blank');
+ if (sentence.match(/BLANK/g).length !== blanks.length)
+ throw Error(`Number of BLANKs doesn't match the number of answers.`);
+
+ // For 'pinyin-to-hanzi' inputType, all answers must be of type 'hanzi-pinyin'.
+ // This validation ensures compatibility with the pinyin input in the UI,
+ // where users type pinyin and the system automatically converts it to hanzi
+ // if the input value matches the expected pinyin from the answer.
+ if (inputType === 'pinyin-to-hanzi') {
+ const allAnswersAreHanziPinyin = blanks.every(
+ blank => parseHanziPinyinPairs(blank.answer).length === 1
+ );
+
+ if (!allAnswersAreHanziPinyin) {
+ throw Error(
+ `When inputType is 'pinyin-to-hanzi', all answers must be in 'hanzi (pinyin)' format.`
+ );
+ }
+ }
+
+ return { sentence, blanks, ...(inputType && { inputType }) };
+ }
+
+ function getBlanks(blanksNodes) {
+ const blanksGroups = splitOnThematicBreak(blanksNodes);
+
+ return blanksGroups.map(blanksGroup => {
+ const blanksTree = root(blanksGroup);
+ const feedback = find(blanksTree, { value: '--feedback--' });
+
+ if (feedback) {
+ const blanksNodes = getAllBefore(blanksTree, '--feedback--');
+ const feedbackNodes = getSection(blanksTree, '--feedback--');
+
+ return {
+ answer: blanksNodes[0].children[0].value,
+ feedback: toHtml(feedbackNodes)
+ };
+ }
+
+ return {
+ answer: blanksGroup[0].children[0].value,
+ feedback: null
+ };
+ });
+ }
}
}
}
-function validateBlanksCount(fillInTheBlankTree) {
+function validateBlanksSectionCount(fillInTheBlankTree) {
let blanksCount = 0;
visit(fillInTheBlankTree, { value: '--blanks--' }, () => {
blanksCount++;
@@ -64,49 +149,4 @@ function validateBlanksCount(fillInTheBlankTree) {
);
}
-function getfillInTheBlank(sentenceNodes, blanksNodes) {
- const sentenceWithoutCodeBlocks = sentenceNodes.map(node => {
- node.children.forEach(child => {
- if (child.type === 'text' && child.value.trim() === '')
- throw Error(NOT_IN_PARAGRAPHS);
- if (child.type !== 'inlineCode') throw Error(NOT_IN_CODE_BLOCK);
- });
-
- const children = node.children.map(child => ({ ...child, type: 'text' }));
- return { ...node, children };
- });
- const sentence = mdastToHtml(sentenceWithoutCodeBlocks);
- const blanks = getBlanks(blanksNodes);
-
- if (!sentence) throw Error('sentence is missing from fill in the blank');
- if (!blanks) throw Error('blanks are missing from fill in the blank');
- if (sentence.match(/BLANK/g).length !== blanks.length)
- throw Error(
- `Number of underscores in sentence doesn't match the number of blanks`
- );
-
- return { sentence, blanks };
-}
-
-function getBlanks(blanksNodes) {
- const blanksGroups = splitOnThematicBreak(blanksNodes);
-
- return blanksGroups.map(blanksGroup => {
- const blanksTree = root(blanksGroup);
- const feedback = find(blanksTree, { value: '--feedback--' });
-
- if (feedback) {
- const blanksNodes = getAllBefore(blanksTree, '--feedback--');
- const feedbackNodes = getSection(blanksTree, '--feedback--');
-
- return {
- answer: blanksNodes[0].children[0].value,
- feedback: mdastToHtml(feedbackNodes)
- };
- }
-
- return { answer: blanksGroup[0].children[0].value, feedback: null };
- });
-}
-
module.exports = plugin;
diff --git a/tools/challenge-parser/parser/plugins/add-fill-in-the-blank.test.js b/tools/challenge-parser/parser/plugins/add-fill-in-the-blank.test.js
index 9b1cc8b921d..f3332f10caf 100644
--- a/tools/challenge-parser/parser/plugins/add-fill-in-the-blank.test.js
+++ b/tools/challenge-parser/parser/plugins/add-fill-in-the-blank.test.js
@@ -8,7 +8,13 @@ describe('fill-in-the-blanks plugin', () => {
mockFillInTheBlankTwoSentencesAST,
mockFillInTheBlankBadSentence,
mockFillInTheBlankBadParagraph,
- mockFillInTheBlankMultipleBlanks;
+ mockFillInTheBlankMultipleBlanks,
+ mockChineseFillInTheBlankAST,
+ mockChineseFillInTheBlankNoPinyinAST,
+ mockChineseFillInTheBlankNoHanziAST,
+ mockChineseFillInTheBlankWrongAnswerFormatAST,
+ mockChineseFillInTheBlankBlankAnswerMismatchAST,
+ mockChineseFillInTheBlankLatinAST;
const plugin = addFillInTheBlankQuestion();
let file = { data: {} };
@@ -29,6 +35,24 @@ describe('fill-in-the-blanks plugin', () => {
mockFillInTheBlankMultipleBlanks = await parseFixture(
'with-fill-in-the-blank-many-blanks.md'
);
+ mockChineseFillInTheBlankAST = await parseFixture(
+ 'with-chinese-fill-in-the-blank.md'
+ );
+ mockChineseFillInTheBlankNoPinyinAST = await parseFixture(
+ 'with-chinese-fill-in-the-blank-no-pinyin.md'
+ );
+ mockChineseFillInTheBlankNoHanziAST = await parseFixture(
+ 'with-chinese-fill-in-the-blank-no-hanzi.md'
+ );
+ mockChineseFillInTheBlankWrongAnswerFormatAST = await parseFixture(
+ 'with-chinese-fill-in-the-blank-wrong-answer-format.md'
+ );
+ mockChineseFillInTheBlankBlankAnswerMismatchAST = await parseFixture(
+ 'with-chinese-fill-in-the-blank-blank-answer-mismatch.md'
+ );
+ mockChineseFillInTheBlankLatinAST = await parseFixture(
+ 'with-chinese-fill-in-the-blank-latin.md'
+ );
});
beforeEach(() => {
@@ -55,15 +79,15 @@ describe('fill-in-the-blanks plugin', () => {
expect(Array.isArray(testObject.blanks)).toBe(true);
expect(testObject.blanks.length).toBe(3);
expect(testObject.blanks[0]).toHaveProperty('answer');
- expect(typeof testObject.blanks[0].answer).toBe('string');
+ expect(testObject.blanks[0].answer).toEqual('are');
expect(testObject.blanks[0]).toHaveProperty('feedback');
expect(typeof testObject.blanks[0].feedback).toBe('string');
expect(testObject.blanks[1]).toHaveProperty('answer');
- expect(typeof testObject.blanks[1].answer).toBe('string');
+ expect(testObject.blanks[1].answer).toEqual('right');
expect(testObject.blanks[1]).toHaveProperty('feedback');
expect(typeof testObject.blanks[1].feedback).toBe('string');
expect(testObject.blanks[2]).toHaveProperty('answer');
- expect(typeof testObject.blanks[2].answer).toBe('string');
+ expect(testObject.blanks[2].answer).toEqual('Nice');
expect(testObject.blanks[2]).toHaveProperty('feedback');
expect(testObject.blanks[2].feedback).toBeNull();
});
@@ -167,4 +191,86 @@ Example of good formatting:
'The verb to be is an irregular verb. When conjugated with the pronoun you, be becomes are. For example: You are an English learner.
'
});
});
+
+ it('should parse Chinese fill-in-the-blank sentence and answer correctly if they are in `hanzi (pinyin)` format', () => {
+ file.data.lang = 'zh-CN';
+ file.data.inputType = 'pinyin-to-hanzi';
+ plugin(mockChineseFillInTheBlankAST, file);
+ const testObject = file.data.fillInTheBlank;
+
+ expect(testObject.inputType).toBe('pinyin-to-hanzi');
+
+ expect(testObject.sentence).toBe(
+ 'BLANK BLANK,BLANK 是王华( shì Wang Hua ) ,请问你( qǐng wèn nǐ ) BLANK 什么名字( shén me míng zi ) ?
'
+ );
+ expect(testObject.blanks.length).toBe(4);
+
+ expect(testObject.blanks[0].answer).toEqual('你 (nǐ)');
+ expect(testObject.blanks[0].feedback).toBe(
+ 'Feedback text containing 汉字( hàn zì ) .
'
+ );
+
+ expect(testObject.blanks[1].answer).toEqual('好 (hǎo)');
+ expect(testObject.blanks[1].feedback).toBe(
+ 'This means "good" or "well".
'
+ );
+
+ expect(testObject.blanks[2].answer).toEqual('我 (wǒ)');
+ expect(testObject.blanks[2].feedback).toBe('This means "I".
');
+
+ expect(testObject.blanks[3].answer).toEqual('叫 (jiào)');
+ expect(testObject.blanks[3].feedback).toBe(
+ 'This means "to be called".
'
+ );
+ });
+
+ it('should return sentence as plain text when sentence does not contain pinyin', () => {
+ file.data.lang = 'zh-CN';
+ plugin(mockChineseFillInTheBlankNoPinyinAST, file);
+ const testObject = file.data.fillInTheBlank;
+
+ expect(testObject.sentence).toBe('BLANK好
');
+ expect(testObject.blanks[0].answer).toEqual('你 (nǐ)');
+ });
+
+ it('should return sentence as plain text when sentence does not contain hanzi', () => {
+ file.data.lang = 'zh-CN';
+ plugin(mockChineseFillInTheBlankNoHanziAST, file);
+ const testObject = file.data.fillInTheBlank;
+
+ expect(testObject.sentence).toBe('BLANK hǎo
');
+ expect(testObject.blanks[0].answer).toEqual('nǐ');
+ });
+
+ it("should throw if the number of blanks in the sentence doesn't match the number of answers", () => {
+ file.data.lang = 'zh-CN';
+ expect(() => {
+ plugin(mockChineseFillInTheBlankBlankAnswerMismatchAST, file);
+ }).toThrow(`Number of BLANKs doesn't match the number of answers.`);
+ });
+
+ it('should throw error when inputType is pinyin-to-hanzi but answer is not in hanzi-pinyin format', () => {
+ file.data.lang = 'zh-CN';
+ file.data.inputType = 'pinyin-to-hanzi';
+
+ expect(() => {
+ plugin(mockChineseFillInTheBlankWrongAnswerFormatAST, file);
+ }).toThrow(
+ "When inputType is 'pinyin-to-hanzi', all answers must be in 'hanzi (pinyin)' format."
+ );
+ });
+
+ it('should separate BLANK and adjacent Latin text in Chinese sentences', () => {
+ file.data.lang = 'zh-CN';
+ plugin(mockChineseFillInTheBlankLatinAST, file);
+ const testObject = file.data.fillInTheBlank;
+
+ expect(testObject.sentence).toBe(
+ '我( wǒ ) BLANK UI 设计师( shè jì shī ) 。
'
+ );
+ expect(testObject.blanks.length).toBe(1);
+
+ expect(testObject.blanks[0].answer).toEqual('是 (shì)');
+ expect(testObject.blanks[0].feedback).toBe('Feedback text.
');
+ });
});
diff --git a/tools/challenge-parser/parser/plugins/add-text.test.js b/tools/challenge-parser/parser/plugins/add-text.test.js
index 2a76a53c9cd..9bb839d7351 100644
--- a/tools/challenge-parser/parser/plugins/add-text.test.js
+++ b/tools/challenge-parser/parser/plugins/add-text.test.js
@@ -172,7 +172,7 @@ describe('add-text', () => {
'\nInstructions containing 汉字( hàn zì ) .
\n '
);
expect(zhFile.data.explanation).toBe(
- '\nWang Hua uses 请问( qǐng wèn ) to politely start her question.
\n '
+ '\n我是( wǒ shì ) Web 开发者( kāi fā zhě ) 。 – I am a web developer.
\n你好( nǐ hǎo ) ,我是王华( wǒ shì Wang Hua ) ,请问你叫什么名字( qǐng wèn nǐ jiào shén me míng zi ) ? – Hello, I am Wang Hua, may I ask what your name is?
\n '
);
});
});
diff --git a/tools/challenge-parser/parser/plugins/utils/i18n-stringify.js b/tools/challenge-parser/parser/plugins/utils/i18n-stringify.js
index 0e10a036ff4..8f4ab2bcfc5 100644
--- a/tools/challenge-parser/parser/plugins/utils/i18n-stringify.js
+++ b/tools/challenge-parser/parser/plugins/utils/i18n-stringify.js
@@ -1,61 +1,94 @@
const mdastToHTML = require('./mdast-to-html');
-/**
- * Parses Chinese text in format: hanzi (pinyin)
- * @param {string} text - Text in format: hanzi (pinyin)
- * @returns {{ hanzi: string, pinyin: string } | null} Parsed hanzi and pinyin, or null if not matching
- */
-function parseChinesePattern(text) {
- const match = text.match(/^(.+?)\s*\((.+?)\)$/);
+// Captures hanzi (pinyin) pairs (hanzi, optional whitespace, then pinyin parentheses)
+const HANZI_PINYIN_PAIR = '([\u4e00-\u9fff]+)\\s*\\(([^)]+)\\)';
- if (!match) {
- return null;
+// Matches the BLANK placeholder
+const BLANK_TOKEN = 'BLANK';
+
+// Matches Chinese and English punctuation
+const PUNCTUATION = '[,。?!!?,;:;:、]+';
+
+// Matches Latin text with spaces
+const OTHER_TEXT = '([a-zA-Z\\s]+)';
+
+const HANZI_PINYIN_REGEX = new RegExp(
+ `${HANZI_PINYIN_PAIR}|${BLANK_TOKEN}|${PUNCTUATION}|${OTHER_TEXT}`,
+ 'g'
+);
+
+/**
+ * Parses all hanzi-pinyin pairs from text
+ * @param {string} text - Text potentially containing multiple hanzi (pinyin) patterns
+ * @returns {Array<{hanzi: string, pinyin: string}>} Array of parsed pairs
+ */
+function parseHanziPinyinPairs(text) {
+ const pairs = [];
+ const regex = new RegExp(HANZI_PINYIN_REGEX);
+ let match;
+
+ while ((match = regex.exec(text)) !== null) {
+ if (match[1] && match[2]) {
+ pairs.push({
+ hanzi: match[1].trim(),
+ pinyin: match[2].trim()
+ });
+ }
}
- return {
- hanzi: match[1].trim(),
- pinyin: match[2].trim()
- };
+ return pairs;
}
/**
* Custom handler for Chinese inline code to render as ruby elements
+ * Matches hanzi-pinyin pairs, BLANK, and punctuation as separate elements
* @param {object} state - The state object from mdast-util-to-hast
* @param {object} node - The inlineCode node
- * @returns {object} Hast element node
+ * @returns {object|Array} Hast element node or array of nodes
*/
function chineseInlineCodeHandler(state, node) {
- const parsed = parseChinesePattern(node.value);
+ const rubyPairs = parseHanziPinyinPairs(node.value);
- if (parsed) {
- return {
- type: 'element',
- tagName: 'ruby',
- properties: {},
- children: [
- { type: 'text', value: parsed.hanzi },
- {
+ if (rubyPairs.length > 0) {
+ const matches = [...node.value.matchAll(HANZI_PINYIN_REGEX)];
+ const nodes = matches.map(fullMatch => {
+ if (fullMatch[1] && fullMatch[2]) {
+ return {
type: 'element',
- tagName: 'rp',
+ tagName: 'ruby',
properties: {},
- children: [{ type: 'text', value: '(' }]
- },
- {
- type: 'element',
- tagName: 'rt',
- properties: {},
- children: [{ type: 'text', value: parsed.pinyin }]
- },
- {
- type: 'element',
- tagName: 'rp',
- properties: {},
- children: [{ type: 'text', value: ')' }]
- }
- ]
- };
+ children: [
+ { type: 'text', value: fullMatch[1].trim() },
+ {
+ type: 'element',
+ tagName: 'rp',
+ properties: {},
+ children: [{ type: 'text', value: '(' }]
+ },
+ {
+ type: 'element',
+ tagName: 'rt',
+ properties: {},
+ children: [{ type: 'text', value: fullMatch[2].trim() }]
+ },
+ {
+ type: 'element',
+ tagName: 'rp',
+ properties: {},
+ children: [{ type: 'text', value: ')' }]
+ }
+ ]
+ };
+ }
+
+ // Other captures (BLANK, punctuation, other text including spaces) should preserve exactly
+ return { type: 'text', value: fullMatch[0] };
+ });
+
+ return nodes.length === 1 ? nodes[0] : nodes;
}
+ // If static text, return code
return {
type: 'element',
// TODO: change this to span
@@ -75,4 +108,7 @@ const rubyOptions = {
const createMdastToHtml = lang =>
lang == 'zh-CN' ? x => mdastToHTML(x, rubyOptions) : mdastToHTML;
-module.exports = { parseChinesePattern, createMdastToHtml };
+module.exports = {
+ parseHanziPinyinPairs,
+ createMdastToHtml
+};
diff --git a/tools/challenge-parser/parser/plugins/utils/i18n-stringify.test.js b/tools/challenge-parser/parser/plugins/utils/i18n-stringify.test.js
index 64631528c58..aff2e3b9110 100644
--- a/tools/challenge-parser/parser/plugins/utils/i18n-stringify.test.js
+++ b/tools/challenge-parser/parser/plugins/utils/i18n-stringify.test.js
@@ -1,44 +1,56 @@
import { describe, it, expect } from 'vitest';
-import { createMdastToHtml, parseChinesePattern } from './i18n-stringify';
+import { createMdastToHtml, parseHanziPinyinPairs } from './i18n-stringify';
-describe('parseChinesePattern', () => {
- it('should parse Chinese text with hanzi and pinyin', () => {
- const result = parseChinesePattern('你好 (nǐ hǎo)');
- expect(result).toEqual({
+describe('parseHanziPinyinPairs', () => {
+ it('should parse single hanzi-pinyin pair', () => {
+ const withSpaceSeparator = parseHanziPinyinPairs('你好 (nǐ hǎo)');
+
+ expect(withSpaceSeparator).toHaveLength(1);
+ expect(withSpaceSeparator[0]).toMatchObject({
+ hanzi: '你好',
+ pinyin: 'nǐ hǎo'
+ });
+
+ const withoutSpaceSeparator = parseHanziPinyinPairs('你好(nǐ hǎo)');
+
+ expect(withoutSpaceSeparator).toHaveLength(1);
+ expect(withoutSpaceSeparator[0]).toMatchObject({
hanzi: '你好',
pinyin: 'nǐ hǎo'
});
});
- it('should handle text without spaces before parentheses', () => {
- const result = parseChinesePattern('你好(nǐ hǎo)');
- expect(result).toEqual({
+ it('should parse multiple hanzi-pinyin pairs', () => {
+ const withSpaceSeparator = parseHanziPinyinPairs(
+ '你好 (nǐ hǎo),我是王华 (wǒ shì Wang Hua)'
+ );
+ expect(withSpaceSeparator).toHaveLength(2);
+ expect(withSpaceSeparator[0]).toMatchObject({
hanzi: '你好',
pinyin: 'nǐ hǎo'
});
- });
+ expect(withSpaceSeparator[1]).toMatchObject({
+ hanzi: '我是王华',
+ pinyin: 'wǒ shì Wang Hua'
+ });
- it('should handle text with multiple spaces', () => {
- const result = parseChinesePattern('你好 (nǐ hǎo)');
- expect(result).toEqual({
+ const withoutSpaceSeparator = parseHanziPinyinPairs(
+ '你好(nǐ hǎo),我是王华(wǒ shì Wang Hua)'
+ );
+ expect(withoutSpaceSeparator).toHaveLength(2);
+ expect(withoutSpaceSeparator[0]).toMatchObject({
hanzi: '你好',
pinyin: 'nǐ hǎo'
});
+ expect(withoutSpaceSeparator[1]).toMatchObject({
+ hanzi: '我是王华',
+ pinyin: 'wǒ shì Wang Hua'
+ });
});
- it('should return null for text without parentheses', () => {
- const result = parseChinesePattern('你好');
- expect(result).toBeNull();
- });
-
- it('should return null for text with only opening parenthesis', () => {
- const result = parseChinesePattern('你好 (nǐ hǎo');
- expect(result).toBeNull();
- });
-
- it('should return null for empty string', () => {
- const result = parseChinesePattern('');
- expect(result).toBeNull();
+ it('should return empty array for text without pairs', () => {
+ const result = parseHanziPinyinPairs('你好');
+ expect(result).toHaveLength(0);
});
});
@@ -99,6 +111,93 @@ describe('createMdastToHtml', () => {
);
});
+ it('should render BLANK tokens and punctuation marks as plain text', () => {
+ const toHtml = createMdastToHtml('zh-CN');
+ const withoutSpacesAroundBlanks = [
+ {
+ type: 'paragraph',
+ children: [
+ {
+ type: 'inlineCode',
+ value:
+ '你好 (nǐ hǎo),BLANK是王华 (shì Wang Hua),请问你 (qǐng wèn nǐ)BLANK什么名字 (shén me míng zi)?'
+ }
+ ]
+ }
+ ];
+ expect(toHtml(withoutSpacesAroundBlanks)).toBe(
+ '你好( nǐ hǎo ) ,BLANK是王华( shì Wang Hua ) ,请问你( qǐng wèn nǐ ) BLANK什么名字( shén me míng zi ) ?
'
+ );
+
+ const withSpacesAroundBlanks = [
+ {
+ type: 'paragraph',
+ children: [
+ {
+ type: 'inlineCode',
+ value:
+ '你好 (nǐ hǎo), BLANK 是王华 (shì Wang Hua),请问你 (qǐng wèn nǐ) BLANK 什么名字 (shén me míng zi)?'
+ }
+ ]
+ }
+ ];
+ expect(toHtml(withSpacesAroundBlanks)).toBe(
+ '你好( nǐ hǎo ) , BLANK 是王华( shì Wang Hua ) ,请问你( qǐng wèn nǐ ) BLANK 什么名字( shén me míng zi ) ?
'
+ );
+ });
+
+ it('should render Latin words as plain text while applying ruby to hanzi-pinyin pairs', () => {
+ const toHtml = createMdastToHtml('zh-CN');
+ const nodes = [
+ {
+ type: 'paragraph',
+ children: [
+ {
+ type: 'inlineCode',
+ value: '我是 (wǒ shì) UI 设计师 (shè jì shī)'
+ }
+ ]
+ }
+ ];
+ const actual = toHtml(nodes);
+ expect(actual).toBe(
+ '我是( wǒ shì ) UI 设计师( shè jì shī )
'
+ );
+ });
+
+ it('should handle BLANK token and Latin word mix', () => {
+ const toHtml = createMdastToHtml('zh-CN');
+ const nodes = [
+ {
+ type: 'paragraph',
+ children: [
+ {
+ type: 'inlineCode',
+ value: '我 (wǒ) BLANK UI 设计师 (shè jì shī)'
+ }
+ ]
+ }
+ ];
+ const actual = toHtml(nodes);
+ expect(actual).toBe(
+ '我( wǒ ) BLANK UI 设计师( shè jì shī )
'
+ );
+ });
+
+ it('should render multiple adjacent BLANK tokens in Chinese sentence', () => {
+ const toHtml = createMdastToHtml('zh-CN');
+ const nodes = [
+ {
+ type: 'paragraph',
+ children: [{ type: 'inlineCode', value: 'BLANK BLANK,你好 (nǐ hǎo)' }]
+ }
+ ];
+ const actual = toHtml(nodes);
+ expect(actual).toBe(
+ 'BLANK BLANK,你好( nǐ hǎo )
'
+ );
+ });
+
it('should fallback to code element if pattern does not match', () => {
const toHtml = createMdastToHtml('zh-CN');
const nodes = [
@@ -126,4 +225,16 @@ describe('createMdastToHtml', () => {
const actual = toHtml(nodes);
expect(actual).toBe('请问 (qǐng wèn)
');
});
+
+ it('should render as regular code when lang is not defined', () => {
+ const toHtml = createMdastToHtml();
+ const nodes = [
+ {
+ type: 'paragraph',
+ children: [{ type: 'inlineCode', value: '请问 (qǐng wèn)' }]
+ }
+ ];
+ const actual = toHtml(nodes);
+ expect(actual).toBe('请问 (qǐng wèn)
');
+ });
});