feat(client,challenge-parser): update fill-in-the-blank to support Chinese (#63741)

This commit is contained in:
Huyen Nguyen
2025-11-25 11:02:22 -08:00
committed by GitHub
parent b6fff6e2b7
commit 33325b9002
24 changed files with 964 additions and 176 deletions
+1
View File
@@ -410,6 +410,7 @@ exports.createSchemaCustomization = ({ actions }) => {
type FillInTheBlank {
sentence: String
blanks: [Blank]
inputType: String
}
type Blank {
answer: String
+1
View File
@@ -49,6 +49,7 @@ export type Question = {
export type FillInTheBlank = {
sentence: string;
blanks: MultipleChoiceAnswer[];
inputType?: 'pinyin-tone' | 'pinyin-to-hanzi';
};
export type Fields = {
@@ -2,7 +2,7 @@ import React from 'react';
import { useTranslation } from 'react-i18next';
import { Spacer } from '@freecodecamp/ui';
import { parseBlanks } from '../fill-in-the-blank/parse-blanks';
import { parseBlanks, parseAnswer } from '../fill-in-the-blank/parse-blanks';
import PrismFormatted from '../components/prism-formatted';
import { FillInTheBlank } from '../../../redux/prop-types';
import ChallengeHeading from './challenge-heading';
@@ -16,6 +16,23 @@ type FillInTheBlankProps = {
handleInputChange: (inputIndex: number, value: string) => void;
};
const AnswerText = ({ answer }: { answer: string }) => {
const parsedAnswer = parseAnswer(answer);
if (typeof parsedAnswer === 'string') {
return <span className='correct-blank-answer'>{parsedAnswer}</span>;
}
return (
<ruby className='correct-blank-answer'>
{parsedAnswer.hanzi}
<rp>(</rp>
<rt>{parsedAnswer.pinyin}</rt>
<rp>)</rp>
</ruby>
);
};
function FillInTheBlanks({
fillInTheBlank: { sentence, blanks },
answersCorrect,
@@ -36,6 +53,17 @@ function FillInTheBlanks({
return cls;
};
const getAnswerLength = (answer: string): number => {
const parsedAnswer = parseAnswer(answer);
if (typeof parsedAnswer === 'string') {
return parsedAnswer.length;
}
// TODO: This is a simplification. Revisit later to account for tones and spaces.
return parsedAnswer.pinyin.length;
};
const paragraphs = parseBlanks(sentence);
const blankAnswers = blanks.map(b => b.answer);
@@ -55,25 +83,35 @@ function FillInTheBlanks({
return value;
}
// If a blank is answered correctly, render the answer as part of the sentence.
if (type === 'blank' && answersCorrect[value] === true) {
if (type === 'hanzi-pinyin') {
const { hanzi, pinyin } = value;
return (
<span key={j} className='correct-blank-answer'>
{blankAnswers[value]}
</span>
<ruby key={j}>
{hanzi}
<rp>(</rp>
<rt>{pinyin}</rt>
<rp>)</rp>
</ruby>
);
}
// If a blank is answered correctly, render the answer as part of the sentence.
if (type === 'blank' && answersCorrect[value] === true) {
return <AnswerText key={j} answer={blankAnswers[value]} />;
}
const answerLength = getAnswerLength(blankAnswers[value]);
return (
<input
key={j}
type='text'
maxLength={blankAnswers[value].length + 3}
maxLength={answerLength + 3}
className={getInputClass(value)}
onChange={e =>
handleInputChange(node.value, e.target.value)
}
size={blankAnswers[value].length}
size={answerLength}
autoComplete='off'
aria-label={t('learn.fill-in-the-blank.blank')}
{...(answersCorrect[value] === false
@@ -1,5 +1,9 @@
import { describe, it, expect } from 'vitest';
import { parseBlanks } from './parse-blanks';
import {
parseBlanks,
parseHanziPinyinPairs,
parseAnswer
} from './parse-blanks';
describe('parseBlanks', () => {
it('handles strings without blanks', () => {
@@ -129,4 +133,221 @@ describe('parseBlanks', () => {
expect(() => parseBlanks('<p>hello BLANK!</p>hello BLANK!')).toThrow();
expect(() => parseBlanks('hello BLANK!<p>hello</p>')).toThrow();
});
it('handles Chinese with single BLANK', () => {
expect(
parseBlanks('<p>BLANK<ruby>好<rp>(</rp><rt>hǎo</rt><rp>)</rp></ruby></p>')
).toEqual([
[
{ type: 'blank', value: 0 },
{
type: 'hanzi-pinyin',
value: { hanzi: '好', pinyin: 'hǎo' }
}
]
]);
});
it('handles Chinese without pinyin', () => {
expect(parseBlanks('<p>你BLANK好</p>')).toEqual([
[
{ type: 'text', value: '你' },
{ type: 'blank', value: 0 },
{ type: 'text', value: '好' }
]
]);
});
it('handles Chinese with multiple BLANKs', () => {
expect(
parseBlanks(
'<p>BLANK<ruby>好<rp>(</rp><rt>hǎo</rt><rp>)</rp></ruby>BLANK<ruby>是王华<rp>(</rp><rt>shì Wang Hua</rt><rp>)</rp></ruby></p>'
)
).toEqual([
[
{ type: 'blank', value: 0 },
{
type: 'hanzi-pinyin',
value: { hanzi: '好', pinyin: 'hǎo' }
},
{ type: 'text', value: '' },
{ type: 'blank', value: 1 },
{
type: 'hanzi-pinyin',
value: { hanzi: '是王华', pinyin: 'shì Wang Hua' }
}
]
]);
});
it('handles Chinese with multiple adjacent BLANKs', () => {
expect(
parseBlanks(
'<p>BLANK BLANK<ruby>好<rp>(</rp><rt>hǎo</rt><rp>)</rp></ruby></p>'
)
).toEqual([
[
{ type: 'blank', value: 0 },
{ type: 'text', value: ' ' },
{ type: 'blank', value: 1 },
{
type: 'hanzi-pinyin',
value: { hanzi: '好', pinyin: 'hǎo' }
}
]
]);
});
it('handles Chinese with BLANK at the end', () => {
expect(
parseBlanks(
'<p><ruby>你好<rp>(</rp><rt>nǐ hǎo</rt><rp>)</rp></ruby>BLANK</p>'
)
).toEqual([
[
{
type: 'hanzi-pinyin',
value: { hanzi: '你好', pinyin: 'nǐ hǎo' }
},
{ type: 'blank', value: 0 }
]
]);
});
it('handles Chinese with spaces around BLANK', () => {
expect(
parseBlanks(
'<p><ruby>你<rp>(</rp><rt>nǐ</rt><rp>)</rp></ruby> BLANK <ruby>我<rp>(</rp><rt>wǒ</rt><rp>)</rp></ruby></p>'
)
).toEqual([
[
{
type: 'hanzi-pinyin',
value: { hanzi: '你', pinyin: 'nǐ' }
},
{ type: 'text', value: ' ' },
{ type: 'blank', value: 0 },
{ type: 'text', value: ' ' },
{
type: 'hanzi-pinyin',
value: { hanzi: '我', pinyin: 'wǒ' }
}
]
]);
});
it('handles Latin text adjacent to BLANK', () => {
expect(
parseBlanks(
'<p><ruby>我<rp>(</rp><rt>wǒ</rt><rp>)</rp></ruby> BLANK UI <ruby>设计师<rp>(</rp><rt>shè jì shī</rt><rp>)</rp></ruby> 。</p>'
)
).toEqual([
[
{
type: 'hanzi-pinyin',
value: { hanzi: '我', pinyin: 'wǒ' }
},
{ type: 'text', value: ' ' },
{ type: 'blank', value: 0 },
{ type: 'text', value: ' UI ' },
{
type: 'hanzi-pinyin',
value: { hanzi: '设计师', pinyin: 'shè jì shī' }
},
{ type: 'text', value: ' 。' }
]
]);
});
it('handles Chinese with multiple separate groups', () => {
expect(
parseBlanks(
'<p>BLANK<ruby>好<rp>(</rp><rt>hǎo</rt><rp>)</rp></ruby><ruby>我是王华<rp>(</rp><rt>wǒ shì Wang Hua</rt><rp>)</rp></ruby><ruby>请问你<rp>(</rp><rt>qǐng wèn nǐ</rt><rp>)</rp></ruby>BLANK<ruby>什么名字<rp>(</rp><rt>shén me míng zi</rt><rp>)</rp></ruby></p>'
)
).toEqual([
[
{ type: 'blank', value: 0 },
{
type: 'hanzi-pinyin',
value: { hanzi: '好', pinyin: 'hǎo' }
},
{ type: 'text', value: '' },
{
type: 'hanzi-pinyin',
value: { hanzi: '我是王华', pinyin: 'wǒ shì Wang Hua' }
},
{ type: 'text', value: '' },
{
type: 'hanzi-pinyin',
value: { hanzi: '请问你', pinyin: 'qǐng wèn nǐ' }
},
{ type: 'blank', value: 1 },
{
type: 'hanzi-pinyin',
value: { hanzi: '什么名字', pinyin: 'shén me míng zi' }
},
{ type: 'text', value: '' }
]
]);
});
it('handles Chinese ruby with trailing punctuation', () => {
expect(
parseBlanks(
'<p><ruby>你是刘明吗<rp>(</rp><rt>nǐ shì Liu Ming ma</rt><rp>)</rp></ruby></p>'
)
).toEqual([
[
{
type: 'hanzi-pinyin',
value: { hanzi: '你是刘明吗', pinyin: 'nǐ shì Liu Ming ma' }
},
{ type: 'text', value: '' }
]
]);
});
});
describe('parseHanziPinyinPairs', () => {
it('parseHanziPinyinPairs returns array with one pair for well-formed input', () => {
const result = parseHanziPinyinPairs('你好 (nǐ hǎo)');
expect(result).toHaveLength(1);
expect(result[0]).toEqual({
hanzi: '你好',
pinyin: 'nǐ hǎo'
});
});
it('parseHanziPinyinPairs handles parentheses without a space', () => {
const result = parseHanziPinyinPairs('你好(nǐ hǎo)');
expect(result).toHaveLength(1);
expect(result[0]).toEqual({
hanzi: '你好',
pinyin: 'nǐ hǎo'
});
});
it('parseHanziPinyinPairs returns empty array for non-matching input', () => {
expect(parseHanziPinyinPairs('hello')).toEqual([]);
});
it('parseAnswer returns parsed object when pattern matches', () => {
expect(parseAnswer('你好 (nǐ hǎo)')).toEqual({
hanzi: '你好',
pinyin: 'nǐ hǎo'
});
});
});
describe('parseAnswer', () => {
it('parseAnswer returns hanzi-pinyin string when pattern matches', () => {
expect(parseAnswer('你好(nǐ hǎo)')).toEqual({
hanzi: '你好',
pinyin: 'nǐ hǎo'
});
});
it('parseAnswer returns original string when pattern does not match', () => {
expect(parseAnswer('just some text')).toBe('just some text');
});
});
@@ -1,6 +1,48 @@
type TextNode = { type: 'text'; value: string };
type PlainTextNode = {
type: 'text';
value: string;
};
// Hanzi/pinyin node representing an inline pronunciation pair
type HanziPinyinNode = {
type: 'hanzi-pinyin';
value: { hanzi: string; pinyin: string };
};
type BlankNode = { type: 'blank'; value: number };
type ParagraphElement = TextNode | BlankNode;
type ParagraphElement = PlainTextNode | BlankNode | HanziPinyinNode;
/**
* Parses all hanzi-pinyin pairs from text
* @param text - Text potentially containing hanzi (pinyin) patterns
* @returns Array of parsed hanzi and pinyin pairs
*/
export function parseHanziPinyinPairs(
text: string
): Array<{ hanzi: string; pinyin: string }> {
const pairs: Array<{ hanzi: string; pinyin: string }> = [];
const regex = /([^()]+?)\s*\(([^)]+)\)/g;
let match;
while ((match = regex.exec(text)) !== null) {
pairs.push({
hanzi: match[1].trim(),
pinyin: match[2].trim()
});
}
return pairs;
}
export function parseAnswer(
text: string
): { hanzi: string; pinyin: string } | string {
const pairs = parseHanziPinyinPairs(text);
const hanziPinyin = pairs.length === 1 ? pairs[0] : null;
return hanziPinyin || text;
}
export const parseBlanks = (text: string) => {
const trimmed = text.trim();
@@ -19,27 +61,14 @@ to be wrapped in <p> tags`);
const { paragraphs } = rawParagraphs.reduce(
(acc, p) => {
const splitByBlank = p.split('BLANK');
const containsRuby = /<ruby>/.test(p);
const { elements, blankCount } = containsRuby
? parseChineseParagraph(p, acc.count)
: parsePlainParagraph(p, acc.count);
const parsedParagraph = splitByBlank
.map<ParagraphElement[]>((text, i) => [
{ type: 'text', value: text },
{ type: 'blank', value: acc.count + i }
])
.flat();
parsedParagraph.pop(); // remove last blank
const paragraph = parsedParagraph.filter(p => {
// remove empty strings
if (p.type === 'text') {
return p.value;
} else {
return true;
}
});
return {
count: acc.count + splitByBlank.length - 1,
paragraphs: [...acc.paragraphs, paragraph]
count: acc.count + blankCount,
paragraphs: [...acc.paragraphs, elements]
};
},
{ count: 0, paragraphs: [] } as {
@@ -50,3 +79,84 @@ to be wrapped in <p> tags`);
return paragraphs;
};
/**
* Parses a paragraph that contains ruby HTML elements (Chinese hanzi-pinyin)
* Handles multiple ruby elements separated by text and BLANK tokens
*/
function parseChineseParagraph(
paragraph: string,
startingBlankIndex: number
): { elements: ParagraphElement[]; blankCount: number } {
const elements: ParagraphElement[] = [];
let blankIndex = startingBlankIndex;
// First, split the paragraph on BLANK tokens so we can add blanks between segments
const segments = paragraph.split('BLANK');
for (let s = 0; s < segments.length; s++) {
const segment = segments[s];
// Split the segment into text and ruby parts. Capturing group keeps the ruby tags.
const parts = segment.split(/(<ruby>.*?<\/ruby>)/g).filter(Boolean);
for (const part of parts) {
if (part.startsWith('<ruby>')) {
const rubyMatch = part.match(
/^<ruby>([^<]+)<rp>\(<\/rp><rt>([^<]+)<\/rt><rp>\)<\/rp><\/ruby>$/
);
if (rubyMatch) {
elements.push({
type: 'hanzi-pinyin',
value: { hanzi: rubyMatch[1], pinyin: rubyMatch[2] }
});
}
} else if (part) {
elements.push({ type: 'text', value: part });
}
}
// After each segment except the last, insert a blank node.
if (s < segments.length - 1) {
elements.push({ type: 'blank', value: blankIndex });
blankIndex++;
}
}
return {
elements,
blankCount: blankIndex - startingBlankIndex
};
}
/**
* Parses a plain (non-Chinese) paragraph
*/
function parsePlainParagraph(
paragraph: string,
startingBlankIndex: number
): { elements: ParagraphElement[]; blankCount: number } {
const splitByBlank = paragraph.split('BLANK');
const parsedParagraph = splitByBlank
.map<ParagraphElement[]>((text, i) => [
{ type: 'text', value: text },
{ type: 'blank', value: startingBlankIndex + i }
])
.flat();
// remove last blank inserted by the mapping
parsedParagraph.pop();
const elements = parsedParagraph.filter(p => {
if (p.type === 'text') {
return p.value;
}
return true;
});
return {
elements,
blankCount: splitByBlank.length - 1
};
}
@@ -35,6 +35,7 @@ import { SceneSubject } from '../components/scene/scene-subject';
import { getChallengePaths } from '../utils/challenge-paths';
import { isChallengeCompletedSelector } from '../redux/selectors';
import { replaceAppleQuotes } from '../../../utils/replace-apple-quotes';
import { parseHanziPinyinPairs } from './parse-blanks';
import './show.css';
@@ -135,12 +136,27 @@ const ShowFillInTheBlank = ({
const handleSubmit = () => {
const blankAnswers = fillInTheBlank.blanks.map(b => b.answer);
const newAnswersCorrect = userAnswers.map(
(userAnswer, i) =>
!!userAnswer &&
replaceAppleQuotes(userAnswer.trim()).toLowerCase() ===
blankAnswers[i].toLowerCase()
);
const newAnswersCorrect = userAnswers.map((userAnswer, i) => {
if (!userAnswer) return false;
const answer = blankAnswers[i];
const normalizedUserAnswer = replaceAppleQuotes(
userAnswer.trim()
).toLowerCase();
const pairs = parseHanziPinyinPairs(answer);
const hanziPinyin = pairs.length === 1 ? pairs[0] : null;
if (hanziPinyin) {
const { hanzi } = hanziPinyin;
// TODO: Implement full hanzi-pinyin validation logic
// https://github.com/freeCodeCamp/language-curricula/issues/18
return normalizedUserAnswer === hanzi;
}
return normalizedUserAnswer === answer.toLowerCase();
});
setAnswersCorrect(newAnswersCorrect);
const hasWrongAnswer = newAnswersCorrect.some(a => a === false);
if (!hasWrongAnswer) {
@@ -294,6 +310,7 @@ export const query = graphql`
answer
feedback
}
inputType
}
tests {
text