From 33325b9002533d6bc3abb94e876dfce8818dfc73 Mon Sep 17 00:00:00 2001 From: Huyen Nguyen <25715018+huyenltnguyen@users.noreply.github.com> Date: Tue, 25 Nov 2025 11:02:22 -0800 Subject: [PATCH] feat(client,challenge-parser): update fill-in-the-blank to support Chinese (#63741) --- client/gatsby-node.js | 1 + client/src/redux/prop-types.ts | 1 + .../components/fill-in-the-blanks.tsx | 54 ++++- .../fill-in-the-blank/parse-blanks.test.ts | 223 +++++++++++++++++- .../fill-in-the-blank/parse-blanks.ts | 152 ++++++++++-- .../Challenges/fill-in-the-blank/show.tsx | 29 ++- .../6904b3933a383d68ec0e5f0d.md | 2 +- .../6904bfa12f761c705b37b377.md | 10 +- .../6904c3aaa0b7a0757ffc4d2b.md | 10 +- .../6904c51ee5d1fb78335b71bf.md | 2 +- curriculum/schema/challenge-schema.js | 3 +- ...fill-in-the-blank-blank-answer-mismatch.md | 9 + .../with-chinese-fill-in-the-blank-latin.md | 17 ++ ...ith-chinese-fill-in-the-blank-no-blanks.md | 9 + ...with-chinese-fill-in-the-blank-no-hanzi.md | 9 + ...ith-chinese-fill-in-the-blank-no-pinyin.md | 9 + ...e-fill-in-the-blank-wrong-answer-format.md | 13 + .../with-chinese-fill-in-the-blank.md | 46 ++++ .../parser/__fixtures__/with-chinese-mcq.md | 4 +- .../parser/plugins/add-fill-in-the-blank.js | 142 +++++++---- .../plugins/add-fill-in-the-blank.test.js | 114 ++++++++- .../parser/plugins/add-text.test.js | 2 +- .../parser/plugins/utils/i18n-stringify.js | 118 +++++---- .../plugins/utils/i18n-stringify.test.js | 161 +++++++++++-- 24 files changed, 964 insertions(+), 176 deletions(-) create mode 100644 tools/challenge-parser/parser/__fixtures__/with-chinese-fill-in-the-blank-blank-answer-mismatch.md create mode 100644 tools/challenge-parser/parser/__fixtures__/with-chinese-fill-in-the-blank-latin.md create mode 100644 tools/challenge-parser/parser/__fixtures__/with-chinese-fill-in-the-blank-no-blanks.md create mode 100644 tools/challenge-parser/parser/__fixtures__/with-chinese-fill-in-the-blank-no-hanzi.md create mode 100644 tools/challenge-parser/parser/__fixtures__/with-chinese-fill-in-the-blank-no-pinyin.md create mode 100644 tools/challenge-parser/parser/__fixtures__/with-chinese-fill-in-the-blank-wrong-answer-format.md create mode 100644 tools/challenge-parser/parser/__fixtures__/with-chinese-fill-in-the-blank.md diff --git a/client/gatsby-node.js b/client/gatsby-node.js index 71d03b712c5..9742b286812 100644 --- a/client/gatsby-node.js +++ b/client/gatsby-node.js @@ -410,6 +410,7 @@ exports.createSchemaCustomization = ({ actions }) => { type FillInTheBlank { sentence: String blanks: [Blank] + inputType: String } type Blank { answer: String diff --git a/client/src/redux/prop-types.ts b/client/src/redux/prop-types.ts index c403ac1d218..4a953cce415 100644 --- a/client/src/redux/prop-types.ts +++ b/client/src/redux/prop-types.ts @@ -49,6 +49,7 @@ export type Question = { export type FillInTheBlank = { sentence: string; blanks: MultipleChoiceAnswer[]; + inputType?: 'pinyin-tone' | 'pinyin-to-hanzi'; }; export type Fields = { diff --git a/client/src/templates/Challenges/components/fill-in-the-blanks.tsx b/client/src/templates/Challenges/components/fill-in-the-blanks.tsx index a8ce4ae2073..41a840e6c7d 100644 --- a/client/src/templates/Challenges/components/fill-in-the-blanks.tsx +++ b/client/src/templates/Challenges/components/fill-in-the-blanks.tsx @@ -2,7 +2,7 @@ import React from 'react'; import { useTranslation } from 'react-i18next'; import { Spacer } from '@freecodecamp/ui'; -import { parseBlanks } from '../fill-in-the-blank/parse-blanks'; +import { parseBlanks, parseAnswer } from '../fill-in-the-blank/parse-blanks'; import PrismFormatted from '../components/prism-formatted'; import { FillInTheBlank } from '../../../redux/prop-types'; import ChallengeHeading from './challenge-heading'; @@ -16,6 +16,23 @@ type FillInTheBlankProps = { handleInputChange: (inputIndex: number, value: string) => void; }; +const AnswerText = ({ answer }: { answer: string }) => { + const parsedAnswer = parseAnswer(answer); + + if (typeof parsedAnswer === 'string') { + return {parsedAnswer}; + } + + return ( + + {parsedAnswer.hanzi} + ( + {parsedAnswer.pinyin} + ) + + ); +}; + function FillInTheBlanks({ fillInTheBlank: { sentence, blanks }, answersCorrect, @@ -36,6 +53,17 @@ function FillInTheBlanks({ return cls; }; + const getAnswerLength = (answer: string): number => { + const parsedAnswer = parseAnswer(answer); + + if (typeof parsedAnswer === 'string') { + return parsedAnswer.length; + } + + // TODO: This is a simplification. Revisit later to account for tones and spaces. + return parsedAnswer.pinyin.length; + }; + const paragraphs = parseBlanks(sentence); const blankAnswers = blanks.map(b => b.answer); @@ -55,25 +83,35 @@ function FillInTheBlanks({ return value; } - // If a blank is answered correctly, render the answer as part of the sentence. - if (type === 'blank' && answersCorrect[value] === true) { + if (type === 'hanzi-pinyin') { + const { hanzi, pinyin } = value; return ( - - {blankAnswers[value]} - + + {hanzi} + ( + {pinyin} + ) + ); } + // If a blank is answered correctly, render the answer as part of the sentence. + if (type === 'blank' && answersCorrect[value] === true) { + return ; + } + + const answerLength = getAnswerLength(blankAnswers[value]); + return ( handleInputChange(node.value, e.target.value) } - size={blankAnswers[value].length} + size={answerLength} autoComplete='off' aria-label={t('learn.fill-in-the-blank.blank')} {...(answersCorrect[value] === false diff --git a/client/src/templates/Challenges/fill-in-the-blank/parse-blanks.test.ts b/client/src/templates/Challenges/fill-in-the-blank/parse-blanks.test.ts index 7b5196c0aa1..4acf583d6f2 100644 --- a/client/src/templates/Challenges/fill-in-the-blank/parse-blanks.test.ts +++ b/client/src/templates/Challenges/fill-in-the-blank/parse-blanks.test.ts @@ -1,5 +1,9 @@ import { describe, it, expect } from 'vitest'; -import { parseBlanks } from './parse-blanks'; +import { + parseBlanks, + parseHanziPinyinPairs, + parseAnswer +} from './parse-blanks'; describe('parseBlanks', () => { it('handles strings without blanks', () => { @@ -129,4 +133,221 @@ describe('parseBlanks', () => { expect(() => parseBlanks('

hello BLANK!

hello BLANK!')).toThrow(); expect(() => parseBlanks('hello BLANK!

hello

')).toThrow(); }); + + it('handles Chinese with single BLANK', () => { + expect( + parseBlanks('

BLANK(hǎo)

') + ).toEqual([ + [ + { type: 'blank', value: 0 }, + { + type: 'hanzi-pinyin', + value: { hanzi: '好', pinyin: 'hǎo' } + } + ] + ]); + }); + + it('handles Chinese without pinyin', () => { + expect(parseBlanks('

你BLANK好

')).toEqual([ + [ + { type: 'text', value: '你' }, + { type: 'blank', value: 0 }, + { type: 'text', value: '好' } + ] + ]); + }); + + it('handles Chinese with multiple BLANKs', () => { + expect( + parseBlanks( + '

BLANK(hǎo),BLANK是王华(shì Wang Hua)

' + ) + ).toEqual([ + [ + { type: 'blank', value: 0 }, + { + type: 'hanzi-pinyin', + value: { hanzi: '好', pinyin: 'hǎo' } + }, + { type: 'text', value: ',' }, + { type: 'blank', value: 1 }, + { + type: 'hanzi-pinyin', + value: { hanzi: '是王华', pinyin: 'shì Wang Hua' } + } + ] + ]); + }); + + it('handles Chinese with multiple adjacent BLANKs', () => { + expect( + parseBlanks( + '

BLANK BLANK(hǎo)

' + ) + ).toEqual([ + [ + { type: 'blank', value: 0 }, + { type: 'text', value: ' ' }, + { type: 'blank', value: 1 }, + { + type: 'hanzi-pinyin', + value: { hanzi: '好', pinyin: 'hǎo' } + } + ] + ]); + }); + + it('handles Chinese with BLANK at the end', () => { + expect( + parseBlanks( + '

你好(nǐ hǎo)BLANK

' + ) + ).toEqual([ + [ + { + type: 'hanzi-pinyin', + value: { hanzi: '你好', pinyin: 'nǐ hǎo' } + }, + { type: 'blank', value: 0 } + ] + ]); + }); + + it('handles Chinese with spaces around BLANK', () => { + expect( + parseBlanks( + '

() BLANK ()

' + ) + ).toEqual([ + [ + { + type: 'hanzi-pinyin', + value: { hanzi: '你', pinyin: 'nǐ' } + }, + { type: 'text', value: ' ' }, + { type: 'blank', value: 0 }, + { type: 'text', value: ' ' }, + { + type: 'hanzi-pinyin', + value: { hanzi: '我', pinyin: 'wǒ' } + } + ] + ]); + }); + + it('handles Latin text adjacent to BLANK', () => { + expect( + parseBlanks( + '

() BLANK UI 设计师(shè jì shī)

' + ) + ).toEqual([ + [ + { + type: 'hanzi-pinyin', + value: { hanzi: '我', pinyin: 'wǒ' } + }, + { type: 'text', value: ' ' }, + { type: 'blank', value: 0 }, + { type: 'text', value: ' UI ' }, + { + type: 'hanzi-pinyin', + value: { hanzi: '设计师', pinyin: 'shè jì shī' } + }, + { type: 'text', value: ' 。' } + ] + ]); + }); + + it('handles Chinese with multiple separate groups', () => { + expect( + parseBlanks( + '

BLANK(hǎo)我是王华(wǒ shì Wang Hua)请问你(qǐng wèn nǐ)BLANK什么名字(shén me míng zi)

' + ) + ).toEqual([ + [ + { type: 'blank', value: 0 }, + { + type: 'hanzi-pinyin', + value: { hanzi: '好', pinyin: 'hǎo' } + }, + { type: 'text', value: ',' }, + { + type: 'hanzi-pinyin', + value: { hanzi: '我是王华', pinyin: 'wǒ shì Wang Hua' } + }, + { type: 'text', value: ',' }, + { + type: 'hanzi-pinyin', + value: { hanzi: '请问你', pinyin: 'qǐng wèn nǐ' } + }, + { type: 'blank', value: 1 }, + { + type: 'hanzi-pinyin', + value: { hanzi: '什么名字', pinyin: 'shén me míng zi' } + }, + { type: 'text', value: '?' } + ] + ]); + }); + + it('handles Chinese ruby with trailing punctuation', () => { + expect( + parseBlanks( + '

你是刘明吗(nǐ shì Liu Ming ma)

' + ) + ).toEqual([ + [ + { + type: 'hanzi-pinyin', + value: { hanzi: '你是刘明吗', pinyin: 'nǐ shì Liu Ming ma' } + }, + { type: 'text', value: '?' } + ] + ]); + }); +}); + +describe('parseHanziPinyinPairs', () => { + it('parseHanziPinyinPairs returns array with one pair for well-formed input', () => { + const result = parseHanziPinyinPairs('你好 (nǐ hǎo)'); + expect(result).toHaveLength(1); + expect(result[0]).toEqual({ + hanzi: '你好', + pinyin: 'nǐ hǎo' + }); + }); + + it('parseHanziPinyinPairs handles parentheses without a space', () => { + const result = parseHanziPinyinPairs('你好(nǐ hǎo)'); + expect(result).toHaveLength(1); + expect(result[0]).toEqual({ + hanzi: '你好', + pinyin: 'nǐ hǎo' + }); + }); + + it('parseHanziPinyinPairs returns empty array for non-matching input', () => { + expect(parseHanziPinyinPairs('hello')).toEqual([]); + }); + + it('parseAnswer returns parsed object when pattern matches', () => { + expect(parseAnswer('你好 (nǐ hǎo)')).toEqual({ + hanzi: '你好', + pinyin: 'nǐ hǎo' + }); + }); +}); + +describe('parseAnswer', () => { + it('parseAnswer returns hanzi-pinyin string when pattern matches', () => { + expect(parseAnswer('你好(nǐ hǎo)')).toEqual({ + hanzi: '你好', + pinyin: 'nǐ hǎo' + }); + }); + + it('parseAnswer returns original string when pattern does not match', () => { + expect(parseAnswer('just some text')).toBe('just some text'); + }); }); diff --git a/client/src/templates/Challenges/fill-in-the-blank/parse-blanks.ts b/client/src/templates/Challenges/fill-in-the-blank/parse-blanks.ts index ddd6cb678d1..54358a8a737 100644 --- a/client/src/templates/Challenges/fill-in-the-blank/parse-blanks.ts +++ b/client/src/templates/Challenges/fill-in-the-blank/parse-blanks.ts @@ -1,6 +1,48 @@ -type TextNode = { type: 'text'; value: string }; +type PlainTextNode = { + type: 'text'; + value: string; +}; + +// Hanzi/pinyin node representing an inline pronunciation pair +type HanziPinyinNode = { + type: 'hanzi-pinyin'; + value: { hanzi: string; pinyin: string }; +}; + type BlankNode = { type: 'blank'; value: number }; -type ParagraphElement = TextNode | BlankNode; + +type ParagraphElement = PlainTextNode | BlankNode | HanziPinyinNode; + +/** + * Parses all hanzi-pinyin pairs from text + * @param text - Text potentially containing hanzi (pinyin) patterns + * @returns Array of parsed hanzi and pinyin pairs + */ +export function parseHanziPinyinPairs( + text: string +): Array<{ hanzi: string; pinyin: string }> { + const pairs: Array<{ hanzi: string; pinyin: string }> = []; + const regex = /([^()]+?)\s*\(([^)]+)\)/g; + let match; + + while ((match = regex.exec(text)) !== null) { + pairs.push({ + hanzi: match[1].trim(), + pinyin: match[2].trim() + }); + } + + return pairs; +} + +export function parseAnswer( + text: string +): { hanzi: string; pinyin: string } | string { + const pairs = parseHanziPinyinPairs(text); + const hanziPinyin = pairs.length === 1 ? pairs[0] : null; + + return hanziPinyin || text; +} export const parseBlanks = (text: string) => { const trimmed = text.trim(); @@ -19,27 +61,14 @@ to be wrapped in

tags`); const { paragraphs } = rawParagraphs.reduce( (acc, p) => { - const splitByBlank = p.split('BLANK'); + const containsRuby = //.test(p); + const { elements, blankCount } = containsRuby + ? parseChineseParagraph(p, acc.count) + : parsePlainParagraph(p, acc.count); - const parsedParagraph = splitByBlank - .map((text, i) => [ - { type: 'text', value: text }, - { type: 'blank', value: acc.count + i } - ]) - .flat(); - parsedParagraph.pop(); // remove last blank - - const paragraph = parsedParagraph.filter(p => { - // remove empty strings - if (p.type === 'text') { - return p.value; - } else { - return true; - } - }); return { - count: acc.count + splitByBlank.length - 1, - paragraphs: [...acc.paragraphs, paragraph] + count: acc.count + blankCount, + paragraphs: [...acc.paragraphs, elements] }; }, { count: 0, paragraphs: [] } as { @@ -50,3 +79,84 @@ to be wrapped in

tags`); return paragraphs; }; + +/** + * Parses a paragraph that contains ruby HTML elements (Chinese hanzi-pinyin) + * Handles multiple ruby elements separated by text and BLANK tokens + */ +function parseChineseParagraph( + paragraph: string, + startingBlankIndex: number +): { elements: ParagraphElement[]; blankCount: number } { + const elements: ParagraphElement[] = []; + let blankIndex = startingBlankIndex; + + // First, split the paragraph on BLANK tokens so we can add blanks between segments + const segments = paragraph.split('BLANK'); + + for (let s = 0; s < segments.length; s++) { + const segment = segments[s]; + + // Split the segment into text and ruby parts. Capturing group keeps the ruby tags. + const parts = segment.split(/(.*?<\/ruby>)/g).filter(Boolean); + + for (const part of parts) { + if (part.startsWith('')) { + const rubyMatch = part.match( + /^([^<]+)\(<\/rp>([^<]+)<\/rt>\)<\/rp><\/ruby>$/ + ); + if (rubyMatch) { + elements.push({ + type: 'hanzi-pinyin', + value: { hanzi: rubyMatch[1], pinyin: rubyMatch[2] } + }); + } + } else if (part) { + elements.push({ type: 'text', value: part }); + } + } + + // After each segment except the last, insert a blank node. + if (s < segments.length - 1) { + elements.push({ type: 'blank', value: blankIndex }); + blankIndex++; + } + } + + return { + elements, + blankCount: blankIndex - startingBlankIndex + }; +} + +/** + * Parses a plain (non-Chinese) paragraph + */ +function parsePlainParagraph( + paragraph: string, + startingBlankIndex: number +): { elements: ParagraphElement[]; blankCount: number } { + const splitByBlank = paragraph.split('BLANK'); + + const parsedParagraph = splitByBlank + .map((text, i) => [ + { type: 'text', value: text }, + { type: 'blank', value: startingBlankIndex + i } + ]) + .flat(); + + // remove last blank inserted by the mapping + parsedParagraph.pop(); + + const elements = parsedParagraph.filter(p => { + if (p.type === 'text') { + return p.value; + } + return true; + }); + + return { + elements, + blankCount: splitByBlank.length - 1 + }; +} diff --git a/client/src/templates/Challenges/fill-in-the-blank/show.tsx b/client/src/templates/Challenges/fill-in-the-blank/show.tsx index e8325a14c99..4c95ab32630 100644 --- a/client/src/templates/Challenges/fill-in-the-blank/show.tsx +++ b/client/src/templates/Challenges/fill-in-the-blank/show.tsx @@ -35,6 +35,7 @@ import { SceneSubject } from '../components/scene/scene-subject'; import { getChallengePaths } from '../utils/challenge-paths'; import { isChallengeCompletedSelector } from '../redux/selectors'; import { replaceAppleQuotes } from '../../../utils/replace-apple-quotes'; +import { parseHanziPinyinPairs } from './parse-blanks'; import './show.css'; @@ -135,12 +136,27 @@ const ShowFillInTheBlank = ({ const handleSubmit = () => { const blankAnswers = fillInTheBlank.blanks.map(b => b.answer); - const newAnswersCorrect = userAnswers.map( - (userAnswer, i) => - !!userAnswer && - replaceAppleQuotes(userAnswer.trim()).toLowerCase() === - blankAnswers[i].toLowerCase() - ); + const newAnswersCorrect = userAnswers.map((userAnswer, i) => { + if (!userAnswer) return false; + + const answer = blankAnswers[i]; + const normalizedUserAnswer = replaceAppleQuotes( + userAnswer.trim() + ).toLowerCase(); + + const pairs = parseHanziPinyinPairs(answer); + const hanziPinyin = pairs.length === 1 ? pairs[0] : null; + + if (hanziPinyin) { + const { hanzi } = hanziPinyin; + // TODO: Implement full hanzi-pinyin validation logic + // https://github.com/freeCodeCamp/language-curricula/issues/18 + return normalizedUserAnswer === hanzi; + } + + return normalizedUserAnswer === answer.toLowerCase(); + }); + setAnswersCorrect(newAnswersCorrect); const hasWrongAnswer = newAnswersCorrect.some(a => a === false); if (!hasWrongAnswer) { @@ -294,6 +310,7 @@ export const query = graphql` answer feedback } + inputType } tests { text diff --git a/curriculum/challenges/english/blocks/zh-a1-learn-understanding-the-questions-and-answers/6904b3933a383d68ec0e5f0d.md b/curriculum/challenges/english/blocks/zh-a1-learn-understanding-the-questions-and-answers/6904b3933a383d68ec0e5f0d.md index 14dc7a8748d..1df94dbe2a5 100644 --- a/curriculum/challenges/english/blocks/zh-a1-learn-understanding-the-questions-and-answers/6904b3933a383d68ec0e5f0d.md +++ b/curriculum/challenges/english/blocks/zh-a1-learn-understanding-the-questions-and-answers/6904b3933a383d68ec0e5f0d.md @@ -54,4 +54,4 @@ That is part of the question, but not how she politely begins it. `请问 (qǐng wèn)` means "excuse me". It's often used at the start of a question to sound polite. For example: -`请问你是刘明吗?(qǐng wèn nǐ shì Liu Ming ma)` – Excuse me, are you Liu Ming? +`请问你是刘明吗 (qǐng wèn nǐ shì Liu Ming ma)?` – Excuse me, are you Liu Ming? diff --git a/curriculum/challenges/english/blocks/zh-a1-learn-understanding-the-questions-and-answers/6904bfa12f761c705b37b377.md b/curriculum/challenges/english/blocks/zh-a1-learn-understanding-the-questions-and-answers/6904bfa12f761c705b37b377.md index faa5f99786d..8922c28d1ee 100644 --- a/curriculum/challenges/english/blocks/zh-a1-learn-understanding-the-questions-and-answers/6904bfa12f761c705b37b377.md +++ b/curriculum/challenges/english/blocks/zh-a1-learn-understanding-the-questions-and-answers/6904bfa12f761c705b37b377.md @@ -21,24 +21,20 @@ Listen to the audio and complete the sentence below. ## --sentence-- -`你好,我是王华,请问BLANK叫什么名字?(nǐ hǎo wǒ shì Wang Hua qǐng wèn BLANK jiào shén me míng zi)` +`你好 (nǐ hǎo),我是王华 (wǒ shì Wang Hua),请问 (qǐng wèn) BLANK 叫什么名字 (jiào shén me míng zi)?` ## --blanks-- -`你` +`你 (nǐ)` ### --feedback-- This word means "you" and refers to someone you are speaking to. ---- - -`nǐ` - # --explanation-- `你 (nǐ)` means "you". It's used to talk directly to another person. For example: -`你是刘明吗?(nǐ shì Liu Ming ma)` – Are you Liu Ming? +`你是刘明吗 (nǐ shì Liu Ming ma)?` – Are you Liu Ming? You've learned how to use `我 (wǒ)` to refer to yourself. Both `我 (wǒ)` and `你 (nǐ)` are **personal pronouns**, which means they are used to refer to people. diff --git a/curriculum/challenges/english/blocks/zh-a1-learn-understanding-the-questions-and-answers/6904c3aaa0b7a0757ffc4d2b.md b/curriculum/challenges/english/blocks/zh-a1-learn-understanding-the-questions-and-answers/6904c3aaa0b7a0757ffc4d2b.md index ef67435256e..b0d8e56a384 100644 --- a/curriculum/challenges/english/blocks/zh-a1-learn-understanding-the-questions-and-answers/6904c3aaa0b7a0757ffc4d2b.md +++ b/curriculum/challenges/english/blocks/zh-a1-learn-understanding-the-questions-and-answers/6904c3aaa0b7a0757ffc4d2b.md @@ -21,22 +21,18 @@ Listen to the audio and complete the sentence below. ## --sentence-- -`你好,我是王华,请问你BLANK什么名字? (nǐ hǎo wǒ shì Wang Hua qǐng wèn nǐ BLANK shén me míng zi)` +`你好 (nǐ hǎo),我是王华 (wǒ shì Wang Hua),请问你 (qǐng wèn nǐ) BLANK 什么名字 (shén me míng zi)?` ## --blanks-- -`叫` +`叫 (jiào)` ### --feedback-- This character means "to be called" or "to be named". ---- - -`jiào` - # --explanation-- `叫 (jiào)` means "to be called". It's often used after a subject to introduce a name. For example: -`我叫王华。(wǒ jiào Wang Hua)` – I am called Wang Hua. +`我叫王华 (wǒ jiào Wang Hua)。` – I am called Wang Hua. diff --git a/curriculum/challenges/english/blocks/zh-a1-learn-understanding-the-questions-and-answers/6904c51ee5d1fb78335b71bf.md b/curriculum/challenges/english/blocks/zh-a1-learn-understanding-the-questions-and-answers/6904c51ee5d1fb78335b71bf.md index 880c466d548..75277146904 100644 --- a/curriculum/challenges/english/blocks/zh-a1-learn-understanding-the-questions-and-answers/6904c51ee5d1fb78335b71bf.md +++ b/curriculum/challenges/english/blocks/zh-a1-learn-understanding-the-questions-and-answers/6904c51ee5d1fb78335b71bf.md @@ -52,4 +52,4 @@ She isn't asking where the person is from. # --explanation-- -`什么名字 (shén me míng zi)` means "what name". `你叫什么名字?(nǐ jiào shén me míng zi)` means "what is your name?". Wang Hua is asking for the other person's name. +`什么名字 (shén me míng zi)` means "what name". `你叫什么名字 (nǐ jiào shén me míng zi)?` means "what is your name?". Wang Hua is asking for the other person's name. diff --git a/curriculum/schema/challenge-schema.js b/curriculum/schema/challenge-schema.js index ed447a34750..b4b34250ac6 100644 --- a/curriculum/schema/challenge-schema.js +++ b/curriculum/schema/challenge-schema.js @@ -228,7 +228,8 @@ const schema = Joi.object().keys({ feedback: Joi.string().allow(null) }) ) - .required() + .required(), + inputType: Joi.string().valid('pinyin-tone', 'pinyin-to-hanzi').optional() }), forumTopicId: Joi.number(), id: Joi.objectId().required(), diff --git a/tools/challenge-parser/parser/__fixtures__/with-chinese-fill-in-the-blank-blank-answer-mismatch.md b/tools/challenge-parser/parser/__fixtures__/with-chinese-fill-in-the-blank-blank-answer-mismatch.md new file mode 100644 index 00000000000..8872947d889 --- /dev/null +++ b/tools/challenge-parser/parser/__fixtures__/with-chinese-fill-in-the-blank-blank-answer-mismatch.md @@ -0,0 +1,9 @@ +# --fillInTheBlank-- + +## --sentence-- + +`BLANK BLANK` + +## --blanks-- + +`你 (nǐ)` \ No newline at end of file diff --git a/tools/challenge-parser/parser/__fixtures__/with-chinese-fill-in-the-blank-latin.md b/tools/challenge-parser/parser/__fixtures__/with-chinese-fill-in-the-blank-latin.md new file mode 100644 index 00000000000..ada003169f1 --- /dev/null +++ b/tools/challenge-parser/parser/__fixtures__/with-chinese-fill-in-the-blank-latin.md @@ -0,0 +1,17 @@ +# --fillInTheBlank-- + +## --sentence-- + +`我 (wǒ) BLANK UI 设计师 (shè jì shī) 。` + +## --blanks-- + +`是 (shì)` + +### --feedback-- + +Feedback text. + +# --explanation-- + +Explanation text. \ No newline at end of file diff --git a/tools/challenge-parser/parser/__fixtures__/with-chinese-fill-in-the-blank-no-blanks.md b/tools/challenge-parser/parser/__fixtures__/with-chinese-fill-in-the-blank-no-blanks.md new file mode 100644 index 00000000000..b084fde5b4a --- /dev/null +++ b/tools/challenge-parser/parser/__fixtures__/with-chinese-fill-in-the-blank-no-blanks.md @@ -0,0 +1,9 @@ +# --fillInTheBlank-- + +## --sentence-- + +`你好 (nǐ hǎo)` + +## --blanks-- + +`你` \ No newline at end of file diff --git a/tools/challenge-parser/parser/__fixtures__/with-chinese-fill-in-the-blank-no-hanzi.md b/tools/challenge-parser/parser/__fixtures__/with-chinese-fill-in-the-blank-no-hanzi.md new file mode 100644 index 00000000000..ce334bee91c --- /dev/null +++ b/tools/challenge-parser/parser/__fixtures__/with-chinese-fill-in-the-blank-no-hanzi.md @@ -0,0 +1,9 @@ +# --fillInTheBlank-- + +## --sentence-- + +`BLANK hǎo` + +## --blanks-- + +`nǐ` \ No newline at end of file diff --git a/tools/challenge-parser/parser/__fixtures__/with-chinese-fill-in-the-blank-no-pinyin.md b/tools/challenge-parser/parser/__fixtures__/with-chinese-fill-in-the-blank-no-pinyin.md new file mode 100644 index 00000000000..386c7c272d7 --- /dev/null +++ b/tools/challenge-parser/parser/__fixtures__/with-chinese-fill-in-the-blank-no-pinyin.md @@ -0,0 +1,9 @@ +# --fillInTheBlank-- + +## --sentence-- + +`BLANK好` + +## --blanks-- + +`你 (nǐ)` \ No newline at end of file diff --git a/tools/challenge-parser/parser/__fixtures__/with-chinese-fill-in-the-blank-wrong-answer-format.md b/tools/challenge-parser/parser/__fixtures__/with-chinese-fill-in-the-blank-wrong-answer-format.md new file mode 100644 index 00000000000..ca01d3dc71b --- /dev/null +++ b/tools/challenge-parser/parser/__fixtures__/with-chinese-fill-in-the-blank-wrong-answer-format.md @@ -0,0 +1,13 @@ +# --fillInTheBlank-- + +## --sentence-- + +`BLANK 好 (hǎo) BLANK` + +## --blanks-- + +`你` + +--- + +`nǐ` \ No newline at end of file diff --git a/tools/challenge-parser/parser/__fixtures__/with-chinese-fill-in-the-blank.md b/tools/challenge-parser/parser/__fixtures__/with-chinese-fill-in-the-blank.md new file mode 100644 index 00000000000..eb7d1a58e5b --- /dev/null +++ b/tools/challenge-parser/parser/__fixtures__/with-chinese-fill-in-the-blank.md @@ -0,0 +1,46 @@ +--- +lang: zh-CN +inputType: pinyin-to-hanzi +--- + +# --fillInTheBlank-- + +## --sentence-- + +`BLANK BLANK,BLANK 是王华 (shì Wang Hua),请问你 (qǐng wèn nǐ) BLANK 什么名字 (shén me míng zi)?` + +## --blanks-- + +`你 (nǐ)` + +### --feedback-- + +Feedback text containing `汉字 (hàn zì)`. + +--- + +`好 (hǎo)` + +### --feedback-- + +This means "good" or "well". + +--- + +`我 (wǒ)` + +### --feedback-- + +This means "I". + +--- + +`叫 (jiào)` + +### --feedback-- + +This means "to be called". + +# --explanation-- + +Explanation text containing `汉字 (hàn zì)`. diff --git a/tools/challenge-parser/parser/__fixtures__/with-chinese-mcq.md b/tools/challenge-parser/parser/__fixtures__/with-chinese-mcq.md index beb0c4df2f1..fceeae221d3 100644 --- a/tools/challenge-parser/parser/__fixtures__/with-chinese-mcq.md +++ b/tools/challenge-parser/parser/__fixtures__/with-chinese-mcq.md @@ -49,4 +49,6 @@ Feedback text. # --explanation-- -Wang Hua uses `请问 (qǐng wèn)` to politely start her question. \ No newline at end of file +`我是 (wǒ shì) Web 开发者 (kāi fā zhě)。` – I am a web developer. + +`你好 (nǐ hǎo),我是王华 (wǒ shì Wang Hua),请问你叫什么名字 (qǐng wèn nǐ jiào shén me míng zi)?` – Hello, I am Wang Hua, may I ask what your name is? \ No newline at end of file diff --git a/tools/challenge-parser/parser/plugins/add-fill-in-the-blank.js b/tools/challenge-parser/parser/plugins/add-fill-in-the-blank.js index 5647c26a6b7..ce35d6204db 100644 --- a/tools/challenge-parser/parser/plugins/add-fill-in-the-blank.js +++ b/tools/challenge-parser/parser/plugins/add-fill-in-the-blank.js @@ -3,8 +3,10 @@ const find = require('unist-util-find'); const visit = require('unist-util-visit'); const { getSection } = require('./utils/get-section'); const getAllBefore = require('./utils/before-heading'); -const mdastToHtml = require('./utils/mdast-to-html'); - +const { + createMdastToHtml, + parseHanziPinyinPairs +} = require('./utils/i18n-stringify'); const { splitOnThematicBreak } = require('./utils/split-on-thematic-break'); const NOT_IN_PARAGRAPHS = `Each inline code block in the fillInTheBlank sentence section must in its own paragraph @@ -40,19 +42,102 @@ function plugin() { if (fillInTheBlankNodes.length > 0) { const fillInTheBlankTree = root(fillInTheBlankNodes); - validateBlanksCount(fillInTheBlankTree); + validateBlanksSectionCount(fillInTheBlankTree); const sentenceNodes = getSection(fillInTheBlankTree, '--sentence--'); const blanksNodes = getSection(fillInTheBlankTree, '--blanks--'); - const fillInTheBlank = getfillInTheBlank(sentenceNodes, blanksNodes); + const lang = file.data.lang; + const inputType = file.data.inputType; + const toHtml = createMdastToHtml(lang); - file.data.fillInTheBlank = fillInTheBlank; + file.data.fillInTheBlank = getFillInTheBlank(sentenceNodes, blanksNodes); + + function getFillInTheBlank(sentenceNodes, blanksNodes) { + const sentenceWithoutCodeBlocks = sentenceNodes.map(node => { + node.children.forEach(child => { + if (child.type === 'text' && child.value.trim() === '') + throw Error(NOT_IN_PARAGRAPHS); + if (child.type !== 'inlineCode') throw Error(NOT_IN_CODE_BLOCK); + }); + + // For Chinese hanzi-pinyin, keep as inlineCode so handler generates ruby elements + if (lang === 'zh-CN') { + const hasChinesePairs = node.children.some( + child => + child.type === 'inlineCode' && + parseHanziPinyinPairs(child.value).length > 0 + ); + + if (hasChinesePairs) { + return node; + } + } + + // Convert inlineCode to text for non-Chinese content + const children = node.children.map(child => ({ + ...child, + type: 'text' + })); + return { ...node, children }; + }); + + const sentence = toHtml(sentenceWithoutCodeBlocks); + const blanks = getBlanks(blanksNodes); + + if (!sentence) + throw Error('sentence is missing from fill in the blank'); + if (!blanks) throw Error('blanks are missing from fill in the blank'); + if (sentence.match(/BLANK/g).length !== blanks.length) + throw Error(`Number of BLANKs doesn't match the number of answers.`); + + // For 'pinyin-to-hanzi' inputType, all answers must be of type 'hanzi-pinyin'. + // This validation ensures compatibility with the pinyin input in the UI, + // where users type pinyin and the system automatically converts it to hanzi + // if the input value matches the expected pinyin from the answer. + if (inputType === 'pinyin-to-hanzi') { + const allAnswersAreHanziPinyin = blanks.every( + blank => parseHanziPinyinPairs(blank.answer).length === 1 + ); + + if (!allAnswersAreHanziPinyin) { + throw Error( + `When inputType is 'pinyin-to-hanzi', all answers must be in 'hanzi (pinyin)' format.` + ); + } + } + + return { sentence, blanks, ...(inputType && { inputType }) }; + } + + function getBlanks(blanksNodes) { + const blanksGroups = splitOnThematicBreak(blanksNodes); + + return blanksGroups.map(blanksGroup => { + const blanksTree = root(blanksGroup); + const feedback = find(blanksTree, { value: '--feedback--' }); + + if (feedback) { + const blanksNodes = getAllBefore(blanksTree, '--feedback--'); + const feedbackNodes = getSection(blanksTree, '--feedback--'); + + return { + answer: blanksNodes[0].children[0].value, + feedback: toHtml(feedbackNodes) + }; + } + + return { + answer: blanksGroup[0].children[0].value, + feedback: null + }; + }); + } } } } -function validateBlanksCount(fillInTheBlankTree) { +function validateBlanksSectionCount(fillInTheBlankTree) { let blanksCount = 0; visit(fillInTheBlankTree, { value: '--blanks--' }, () => { blanksCount++; @@ -64,49 +149,4 @@ function validateBlanksCount(fillInTheBlankTree) { ); } -function getfillInTheBlank(sentenceNodes, blanksNodes) { - const sentenceWithoutCodeBlocks = sentenceNodes.map(node => { - node.children.forEach(child => { - if (child.type === 'text' && child.value.trim() === '') - throw Error(NOT_IN_PARAGRAPHS); - if (child.type !== 'inlineCode') throw Error(NOT_IN_CODE_BLOCK); - }); - - const children = node.children.map(child => ({ ...child, type: 'text' })); - return { ...node, children }; - }); - const sentence = mdastToHtml(sentenceWithoutCodeBlocks); - const blanks = getBlanks(blanksNodes); - - if (!sentence) throw Error('sentence is missing from fill in the blank'); - if (!blanks) throw Error('blanks are missing from fill in the blank'); - if (sentence.match(/BLANK/g).length !== blanks.length) - throw Error( - `Number of underscores in sentence doesn't match the number of blanks` - ); - - return { sentence, blanks }; -} - -function getBlanks(blanksNodes) { - const blanksGroups = splitOnThematicBreak(blanksNodes); - - return blanksGroups.map(blanksGroup => { - const blanksTree = root(blanksGroup); - const feedback = find(blanksTree, { value: '--feedback--' }); - - if (feedback) { - const blanksNodes = getAllBefore(blanksTree, '--feedback--'); - const feedbackNodes = getSection(blanksTree, '--feedback--'); - - return { - answer: blanksNodes[0].children[0].value, - feedback: mdastToHtml(feedbackNodes) - }; - } - - return { answer: blanksGroup[0].children[0].value, feedback: null }; - }); -} - module.exports = plugin; diff --git a/tools/challenge-parser/parser/plugins/add-fill-in-the-blank.test.js b/tools/challenge-parser/parser/plugins/add-fill-in-the-blank.test.js index 9b1cc8b921d..f3332f10caf 100644 --- a/tools/challenge-parser/parser/plugins/add-fill-in-the-blank.test.js +++ b/tools/challenge-parser/parser/plugins/add-fill-in-the-blank.test.js @@ -8,7 +8,13 @@ describe('fill-in-the-blanks plugin', () => { mockFillInTheBlankTwoSentencesAST, mockFillInTheBlankBadSentence, mockFillInTheBlankBadParagraph, - mockFillInTheBlankMultipleBlanks; + mockFillInTheBlankMultipleBlanks, + mockChineseFillInTheBlankAST, + mockChineseFillInTheBlankNoPinyinAST, + mockChineseFillInTheBlankNoHanziAST, + mockChineseFillInTheBlankWrongAnswerFormatAST, + mockChineseFillInTheBlankBlankAnswerMismatchAST, + mockChineseFillInTheBlankLatinAST; const plugin = addFillInTheBlankQuestion(); let file = { data: {} }; @@ -29,6 +35,24 @@ describe('fill-in-the-blanks plugin', () => { mockFillInTheBlankMultipleBlanks = await parseFixture( 'with-fill-in-the-blank-many-blanks.md' ); + mockChineseFillInTheBlankAST = await parseFixture( + 'with-chinese-fill-in-the-blank.md' + ); + mockChineseFillInTheBlankNoPinyinAST = await parseFixture( + 'with-chinese-fill-in-the-blank-no-pinyin.md' + ); + mockChineseFillInTheBlankNoHanziAST = await parseFixture( + 'with-chinese-fill-in-the-blank-no-hanzi.md' + ); + mockChineseFillInTheBlankWrongAnswerFormatAST = await parseFixture( + 'with-chinese-fill-in-the-blank-wrong-answer-format.md' + ); + mockChineseFillInTheBlankBlankAnswerMismatchAST = await parseFixture( + 'with-chinese-fill-in-the-blank-blank-answer-mismatch.md' + ); + mockChineseFillInTheBlankLatinAST = await parseFixture( + 'with-chinese-fill-in-the-blank-latin.md' + ); }); beforeEach(() => { @@ -55,15 +79,15 @@ describe('fill-in-the-blanks plugin', () => { expect(Array.isArray(testObject.blanks)).toBe(true); expect(testObject.blanks.length).toBe(3); expect(testObject.blanks[0]).toHaveProperty('answer'); - expect(typeof testObject.blanks[0].answer).toBe('string'); + expect(testObject.blanks[0].answer).toEqual('are'); expect(testObject.blanks[0]).toHaveProperty('feedback'); expect(typeof testObject.blanks[0].feedback).toBe('string'); expect(testObject.blanks[1]).toHaveProperty('answer'); - expect(typeof testObject.blanks[1].answer).toBe('string'); + expect(testObject.blanks[1].answer).toEqual('right'); expect(testObject.blanks[1]).toHaveProperty('feedback'); expect(typeof testObject.blanks[1].feedback).toBe('string'); expect(testObject.blanks[2]).toHaveProperty('answer'); - expect(typeof testObject.blanks[2].answer).toBe('string'); + expect(testObject.blanks[2].answer).toEqual('Nice'); expect(testObject.blanks[2]).toHaveProperty('feedback'); expect(testObject.blanks[2].feedback).toBeNull(); }); @@ -167,4 +191,86 @@ Example of good formatting: '

The verb to be is an irregular verb. When conjugated with the pronoun you, be becomes are. For example: You are an English learner.

' }); }); + + it('should parse Chinese fill-in-the-blank sentence and answer correctly if they are in `hanzi (pinyin)` format', () => { + file.data.lang = 'zh-CN'; + file.data.inputType = 'pinyin-to-hanzi'; + plugin(mockChineseFillInTheBlankAST, file); + const testObject = file.data.fillInTheBlank; + + expect(testObject.inputType).toBe('pinyin-to-hanzi'); + + expect(testObject.sentence).toBe( + '

BLANK BLANK,BLANK 是王华(shì Wang Hua)请问你(qǐng wèn nǐ) BLANK 什么名字(shén me míng zi)

' + ); + expect(testObject.blanks.length).toBe(4); + + expect(testObject.blanks[0].answer).toEqual('你 (nǐ)'); + expect(testObject.blanks[0].feedback).toBe( + '

Feedback text containing 汉字(hàn zì).

' + ); + + expect(testObject.blanks[1].answer).toEqual('好 (hǎo)'); + expect(testObject.blanks[1].feedback).toBe( + '

This means "good" or "well".

' + ); + + expect(testObject.blanks[2].answer).toEqual('我 (wǒ)'); + expect(testObject.blanks[2].feedback).toBe('

This means "I".

'); + + expect(testObject.blanks[3].answer).toEqual('叫 (jiào)'); + expect(testObject.blanks[3].feedback).toBe( + '

This means "to be called".

' + ); + }); + + it('should return sentence as plain text when sentence does not contain pinyin', () => { + file.data.lang = 'zh-CN'; + plugin(mockChineseFillInTheBlankNoPinyinAST, file); + const testObject = file.data.fillInTheBlank; + + expect(testObject.sentence).toBe('

BLANK好

'); + expect(testObject.blanks[0].answer).toEqual('你 (nǐ)'); + }); + + it('should return sentence as plain text when sentence does not contain hanzi', () => { + file.data.lang = 'zh-CN'; + plugin(mockChineseFillInTheBlankNoHanziAST, file); + const testObject = file.data.fillInTheBlank; + + expect(testObject.sentence).toBe('

BLANK hǎo

'); + expect(testObject.blanks[0].answer).toEqual('nǐ'); + }); + + it("should throw if the number of blanks in the sentence doesn't match the number of answers", () => { + file.data.lang = 'zh-CN'; + expect(() => { + plugin(mockChineseFillInTheBlankBlankAnswerMismatchAST, file); + }).toThrow(`Number of BLANKs doesn't match the number of answers.`); + }); + + it('should throw error when inputType is pinyin-to-hanzi but answer is not in hanzi-pinyin format', () => { + file.data.lang = 'zh-CN'; + file.data.inputType = 'pinyin-to-hanzi'; + + expect(() => { + plugin(mockChineseFillInTheBlankWrongAnswerFormatAST, file); + }).toThrow( + "When inputType is 'pinyin-to-hanzi', all answers must be in 'hanzi (pinyin)' format." + ); + }); + + it('should separate BLANK and adjacent Latin text in Chinese sentences', () => { + file.data.lang = 'zh-CN'; + plugin(mockChineseFillInTheBlankLatinAST, file); + const testObject = file.data.fillInTheBlank; + + expect(testObject.sentence).toBe( + '

() BLANK UI 设计师(shè jì shī)

' + ); + expect(testObject.blanks.length).toBe(1); + + expect(testObject.blanks[0].answer).toEqual('是 (shì)'); + expect(testObject.blanks[0].feedback).toBe('

Feedback text.

'); + }); }); diff --git a/tools/challenge-parser/parser/plugins/add-text.test.js b/tools/challenge-parser/parser/plugins/add-text.test.js index 2a76a53c9cd..9bb839d7351 100644 --- a/tools/challenge-parser/parser/plugins/add-text.test.js +++ b/tools/challenge-parser/parser/plugins/add-text.test.js @@ -172,7 +172,7 @@ describe('add-text', () => { '
\n

Instructions containing 汉字(hàn zì).

\n
' ); expect(zhFile.data.explanation).toBe( - '
\n

Wang Hua uses 请问(qǐng wèn) to politely start her question.

\n
' + '
\n

我是(wǒ shì) Web 开发者(kāi fā zhě)。 – I am a web developer.

\n

你好(nǐ hǎo)我是王华(wǒ shì Wang Hua)请问你叫什么名字(qǐng wèn nǐ jiào shén me míng zi)? – Hello, I am Wang Hua, may I ask what your name is?

\n
' ); }); }); diff --git a/tools/challenge-parser/parser/plugins/utils/i18n-stringify.js b/tools/challenge-parser/parser/plugins/utils/i18n-stringify.js index 0e10a036ff4..8f4ab2bcfc5 100644 --- a/tools/challenge-parser/parser/plugins/utils/i18n-stringify.js +++ b/tools/challenge-parser/parser/plugins/utils/i18n-stringify.js @@ -1,61 +1,94 @@ const mdastToHTML = require('./mdast-to-html'); -/** - * Parses Chinese text in format: hanzi (pinyin) - * @param {string} text - Text in format: hanzi (pinyin) - * @returns {{ hanzi: string, pinyin: string } | null} Parsed hanzi and pinyin, or null if not matching - */ -function parseChinesePattern(text) { - const match = text.match(/^(.+?)\s*\((.+?)\)$/); +// Captures hanzi (pinyin) pairs (hanzi, optional whitespace, then pinyin parentheses) +const HANZI_PINYIN_PAIR = '([\u4e00-\u9fff]+)\\s*\\(([^)]+)\\)'; - if (!match) { - return null; +// Matches the BLANK placeholder +const BLANK_TOKEN = 'BLANK'; + +// Matches Chinese and English punctuation +const PUNCTUATION = '[,。?!!?,;:;:、]+'; + +// Matches Latin text with spaces +const OTHER_TEXT = '([a-zA-Z\\s]+)'; + +const HANZI_PINYIN_REGEX = new RegExp( + `${HANZI_PINYIN_PAIR}|${BLANK_TOKEN}|${PUNCTUATION}|${OTHER_TEXT}`, + 'g' +); + +/** + * Parses all hanzi-pinyin pairs from text + * @param {string} text - Text potentially containing multiple hanzi (pinyin) patterns + * @returns {Array<{hanzi: string, pinyin: string}>} Array of parsed pairs + */ +function parseHanziPinyinPairs(text) { + const pairs = []; + const regex = new RegExp(HANZI_PINYIN_REGEX); + let match; + + while ((match = regex.exec(text)) !== null) { + if (match[1] && match[2]) { + pairs.push({ + hanzi: match[1].trim(), + pinyin: match[2].trim() + }); + } } - return { - hanzi: match[1].trim(), - pinyin: match[2].trim() - }; + return pairs; } /** * Custom handler for Chinese inline code to render as ruby elements + * Matches hanzi-pinyin pairs, BLANK, and punctuation as separate elements * @param {object} state - The state object from mdast-util-to-hast * @param {object} node - The inlineCode node - * @returns {object} Hast element node + * @returns {object|Array} Hast element node or array of nodes */ function chineseInlineCodeHandler(state, node) { - const parsed = parseChinesePattern(node.value); + const rubyPairs = parseHanziPinyinPairs(node.value); - if (parsed) { - return { - type: 'element', - tagName: 'ruby', - properties: {}, - children: [ - { type: 'text', value: parsed.hanzi }, - { + if (rubyPairs.length > 0) { + const matches = [...node.value.matchAll(HANZI_PINYIN_REGEX)]; + const nodes = matches.map(fullMatch => { + if (fullMatch[1] && fullMatch[2]) { + return { type: 'element', - tagName: 'rp', + tagName: 'ruby', properties: {}, - children: [{ type: 'text', value: '(' }] - }, - { - type: 'element', - tagName: 'rt', - properties: {}, - children: [{ type: 'text', value: parsed.pinyin }] - }, - { - type: 'element', - tagName: 'rp', - properties: {}, - children: [{ type: 'text', value: ')' }] - } - ] - }; + children: [ + { type: 'text', value: fullMatch[1].trim() }, + { + type: 'element', + tagName: 'rp', + properties: {}, + children: [{ type: 'text', value: '(' }] + }, + { + type: 'element', + tagName: 'rt', + properties: {}, + children: [{ type: 'text', value: fullMatch[2].trim() }] + }, + { + type: 'element', + tagName: 'rp', + properties: {}, + children: [{ type: 'text', value: ')' }] + } + ] + }; + } + + // Other captures (BLANK, punctuation, other text including spaces) should preserve exactly + return { type: 'text', value: fullMatch[0] }; + }); + + return nodes.length === 1 ? nodes[0] : nodes; } + // If static text, return code return { type: 'element', // TODO: change this to span @@ -75,4 +108,7 @@ const rubyOptions = { const createMdastToHtml = lang => lang == 'zh-CN' ? x => mdastToHTML(x, rubyOptions) : mdastToHTML; -module.exports = { parseChinesePattern, createMdastToHtml }; +module.exports = { + parseHanziPinyinPairs, + createMdastToHtml +}; diff --git a/tools/challenge-parser/parser/plugins/utils/i18n-stringify.test.js b/tools/challenge-parser/parser/plugins/utils/i18n-stringify.test.js index 64631528c58..aff2e3b9110 100644 --- a/tools/challenge-parser/parser/plugins/utils/i18n-stringify.test.js +++ b/tools/challenge-parser/parser/plugins/utils/i18n-stringify.test.js @@ -1,44 +1,56 @@ import { describe, it, expect } from 'vitest'; -import { createMdastToHtml, parseChinesePattern } from './i18n-stringify'; +import { createMdastToHtml, parseHanziPinyinPairs } from './i18n-stringify'; -describe('parseChinesePattern', () => { - it('should parse Chinese text with hanzi and pinyin', () => { - const result = parseChinesePattern('你好 (nǐ hǎo)'); - expect(result).toEqual({ +describe('parseHanziPinyinPairs', () => { + it('should parse single hanzi-pinyin pair', () => { + const withSpaceSeparator = parseHanziPinyinPairs('你好 (nǐ hǎo)'); + + expect(withSpaceSeparator).toHaveLength(1); + expect(withSpaceSeparator[0]).toMatchObject({ + hanzi: '你好', + pinyin: 'nǐ hǎo' + }); + + const withoutSpaceSeparator = parseHanziPinyinPairs('你好(nǐ hǎo)'); + + expect(withoutSpaceSeparator).toHaveLength(1); + expect(withoutSpaceSeparator[0]).toMatchObject({ hanzi: '你好', pinyin: 'nǐ hǎo' }); }); - it('should handle text without spaces before parentheses', () => { - const result = parseChinesePattern('你好(nǐ hǎo)'); - expect(result).toEqual({ + it('should parse multiple hanzi-pinyin pairs', () => { + const withSpaceSeparator = parseHanziPinyinPairs( + '你好 (nǐ hǎo),我是王华 (wǒ shì Wang Hua)' + ); + expect(withSpaceSeparator).toHaveLength(2); + expect(withSpaceSeparator[0]).toMatchObject({ hanzi: '你好', pinyin: 'nǐ hǎo' }); - }); + expect(withSpaceSeparator[1]).toMatchObject({ + hanzi: '我是王华', + pinyin: 'wǒ shì Wang Hua' + }); - it('should handle text with multiple spaces', () => { - const result = parseChinesePattern('你好 (nǐ hǎo)'); - expect(result).toEqual({ + const withoutSpaceSeparator = parseHanziPinyinPairs( + '你好(nǐ hǎo),我是王华(wǒ shì Wang Hua)' + ); + expect(withoutSpaceSeparator).toHaveLength(2); + expect(withoutSpaceSeparator[0]).toMatchObject({ hanzi: '你好', pinyin: 'nǐ hǎo' }); + expect(withoutSpaceSeparator[1]).toMatchObject({ + hanzi: '我是王华', + pinyin: 'wǒ shì Wang Hua' + }); }); - it('should return null for text without parentheses', () => { - const result = parseChinesePattern('你好'); - expect(result).toBeNull(); - }); - - it('should return null for text with only opening parenthesis', () => { - const result = parseChinesePattern('你好 (nǐ hǎo'); - expect(result).toBeNull(); - }); - - it('should return null for empty string', () => { - const result = parseChinesePattern(''); - expect(result).toBeNull(); + it('should return empty array for text without pairs', () => { + const result = parseHanziPinyinPairs('你好'); + expect(result).toHaveLength(0); }); }); @@ -99,6 +111,93 @@ describe('createMdastToHtml', () => { ); }); + it('should render BLANK tokens and punctuation marks as plain text', () => { + const toHtml = createMdastToHtml('zh-CN'); + const withoutSpacesAroundBlanks = [ + { + type: 'paragraph', + children: [ + { + type: 'inlineCode', + value: + '你好 (nǐ hǎo),BLANK是王华 (shì Wang Hua),请问你 (qǐng wèn nǐ)BLANK什么名字 (shén me míng zi)?' + } + ] + } + ]; + expect(toHtml(withoutSpacesAroundBlanks)).toBe( + '

你好(nǐ hǎo),BLANK是王华(shì Wang Hua)请问你(qǐng wèn nǐ)BLANK什么名字(shén me míng zi)

' + ); + + const withSpacesAroundBlanks = [ + { + type: 'paragraph', + children: [ + { + type: 'inlineCode', + value: + '你好 (nǐ hǎo), BLANK 是王华 (shì Wang Hua),请问你 (qǐng wèn nǐ) BLANK 什么名字 (shén me míng zi)?' + } + ] + } + ]; + expect(toHtml(withSpacesAroundBlanks)).toBe( + '

你好(nǐ hǎo), BLANK 是王华(shì Wang Hua)请问你(qǐng wèn nǐ) BLANK 什么名字(shén me míng zi)

' + ); + }); + + it('should render Latin words as plain text while applying ruby to hanzi-pinyin pairs', () => { + const toHtml = createMdastToHtml('zh-CN'); + const nodes = [ + { + type: 'paragraph', + children: [ + { + type: 'inlineCode', + value: '我是 (wǒ shì) UI 设计师 (shè jì shī)' + } + ] + } + ]; + const actual = toHtml(nodes); + expect(actual).toBe( + '

我是(wǒ shì) UI 设计师(shè jì shī)

' + ); + }); + + it('should handle BLANK token and Latin word mix', () => { + const toHtml = createMdastToHtml('zh-CN'); + const nodes = [ + { + type: 'paragraph', + children: [ + { + type: 'inlineCode', + value: '我 (wǒ) BLANK UI 设计师 (shè jì shī)' + } + ] + } + ]; + const actual = toHtml(nodes); + expect(actual).toBe( + '

() BLANK UI 设计师(shè jì shī)

' + ); + }); + + it('should render multiple adjacent BLANK tokens in Chinese sentence', () => { + const toHtml = createMdastToHtml('zh-CN'); + const nodes = [ + { + type: 'paragraph', + children: [{ type: 'inlineCode', value: 'BLANK BLANK,你好 (nǐ hǎo)' }] + } + ]; + const actual = toHtml(nodes); + expect(actual).toBe( + '

BLANK BLANK,你好(nǐ hǎo)

' + ); + }); + it('should fallback to code element if pattern does not match', () => { const toHtml = createMdastToHtml('zh-CN'); const nodes = [ @@ -126,4 +225,16 @@ describe('createMdastToHtml', () => { const actual = toHtml(nodes); expect(actual).toBe('

请问 (qǐng wèn)

'); }); + + it('should render as regular code when lang is not defined', () => { + const toHtml = createMdastToHtml(); + const nodes = [ + { + type: 'paragraph', + children: [{ type: 'inlineCode', value: '请问 (qǐng wèn)' }] + } + ]; + const actual = toHtml(nodes); + expect(actual).toBe('

请问 (qǐng wèn)

'); + }); });