Regex Generator
Given two words—the “good” word and the “bad” word—you need to output 5 regexes and 5 extra words as pairs. Each regex must match the good word and its paired extra word, but none of the other words (the bad word or other extra words).
For instance, if your words where apple banana
, you might output:
applex? applex applx?e applxe appx?le appxle apx?ple apxple ax?pple axpple
Each word will and must consist only of ACII lower case letters.
The judge will attempt to figure out your pattern and generate adversarial examples.
IO format
Input will be a list of tab separated words, first representing the good word, the second the bad word.
Each output must be 5 lines each with 2 words, first representing the regex the second representing the extra word. Each output should be separated by a double newline.
Regex Specification
We use JavaScripts Regular Expression Syntax.
Judge
(async function*(context: Context): Challenge { const badTestCases: [string, string][] = []; const verifyCase = async (name: string, words: [string, string][]) => { const test = await context.run(words.map(i=>i.join('\t')).join('\n')); const outputs = test.text.trimEnd().split('\n\n').map(i=>i.split('\n').map(i=>i.split(/[ \t]+/))); let errors = ""; if (outputs.length != words.length) { errors+=`Expected ${words.length} values, got ${outputs.length}.\n`; } for (const [[goodWord, badWord], extraWords] of zip(words, outputs)) { let errorsForTestCase=""; if (extraWords.length !== 5) { errorsForTestCase+=`Expected 5 regexes, got ${extraWords.length}\n`; } for (const[index, [extraRegex, extraWord]] of zip(range(5), extraWords)) { if (extraWord == undefined) { errorsForTestCase+=`Regex /${extraRegex}/: No extra word given\n`; continue; } let regex; try { regex = new RegExp(extraRegex); } catch { errorsForTestCase+=`Regex /${extraRegex}/ is invalid\n`; continue; } if (!extraWord.match(/^[a-z]+$/)) { errorsForTestCase+=`Word "${extraWord} must consist of only ASCII lower case letters"\n`; continue; } if (!goodWord.match(regex)) { errorsForTestCase+=`Regex ${regex} does not match "${goodWord}"\n`; continue; } if (badWord.match(regex)) { errorsForTestCase+=`Regex ${regex} matches bad word "${badWord}"\n`; continue; } if (!extraWord.match(regex)) { errorsForTestCase+=`Regex ${regex} does not match extra word "${extraWord}"\n`; continue; } let foundValue = extraWords.map(i=>i[1]).toSpliced(index, 1).find(k=>k.match(extraRegex)); if (foundValue) { errorsForTestCase+=`Regex ${regex} matches other extra word "${foundValue}"\n`; continue; } badTestCases.push([goodWord, extraWord]); } if (errorsForTestCase !== "") { errors+=`Test Case "${goodWord}" "${badWord}":\n${errorsForTestCase}\n`; } }; return new TestCase(name, errors === '' ? 'Pass': 'Fail', {"Text": errors}); } const words: string[] = shuffle( (await Deno.readTextFile("/scripts/words.txt")) .split('\n') .map((i:string)=>i.toLowerCase()) .filter((i:string)=>i.match(/^[a-z]+$/)) ); // Single Test yield context.registerTestCase( await verifyCase( 'Basic Tests', [...zip(words.slice(0, 40), words.slice(40, 80)), ...[ ...zip(words.slice(80, 90), words.slice(90, 100)) ].map( ([i,j]: [string, string])=>[i,i+j] as [string, string] ) ] ) ); let newBadTestCases = shuffle(badTestCases).filter(([a,b])=>a!=b); badTestCases.length = 0; yield context.registerTestCase( await verifyCase( 'Pathological Tests', [ ...newBadTestCases.slice(0, 45), ...newBadTestCases.slice(45, 50).map( ([i,j])=>[j,i] as [string, string] ) ] ) ); newBadTestCases = shuffle(badTestCases).filter(([a,b])=>a!=b); yield context.registerTestCase( await verifyCase( 'Pathological Tests 2', newBadTestCases.slice(0, 50) ) ); // Finally, the challenge is passed if no test cases failed return context.noFailures(); })
Example Code
process.stdin.on('data', data=>{ const words = (''+data).split('\n').map(i=>i.split('\t')); for([goodWord, badWord] of words) { let i=0; console.log( [1,2,3,4,5].map( k=>{ let padding='abcdefg'[i++]; if (goodWord+padding === badWord) { padding='abcdefg'[i++]; } return `^${goodWord}${padding}?$ ${goodWord}${padding}` } ).join('\n')+'\n' ) } })