String Hashing

Description

Given a list of strings, one on each line, output a positive 31-bit integer for each one. Each word in words.txt should get a unique hash, and the hash should stay consistent for the same word, even between runs.

Integers outputted should be at most 2^31-1.

Judge

(async function*(context: Context): Challenge {
	// Single Test
	const words: string[] = shuffle(
		(await Deno.readTextFile("/scripts/words.txt"))
			.split('\n')
			.map((i:string)=>i.toLowerCase())
			.filter((i:string)=>i.match(/^[a-z]+$/))
	);

	class WordComparer {
		word: string;
		expected: string | undefined;
		
		constructor(word: string) {
			this.word = word;
		}

		toString() {
			return ''+this.expected
		}
	}

	let hashes = new Map<string,number>();
	let inverseHashes = new Map<number,string>();

	function compareFunction(hash: string, wordComparer: WordComparer): boolean {
		if (!hash.match(/^\d+\s*$/)) {
			wordComparer.expected = 'A nonnegative integer'
			return false;
		}
		let hashValue = +hash;
		if (hashValue > 2**31-1) {
			wordComparer.expected = 'A nonnegative integer <= 2**31-1'
			return false;
		}
		
		if (hashes.has(wordComparer.word)) {
			let expected = hashes.get(wordComparer.word)!;
			wordComparer.expected = ''+expected;
			return expected == hashValue;
		} else {
			let previousWord;
			if (previousWord = inverseHashes.get(hashValue)) {
				wordComparer.expected = `${wordComparer.word} to have a hash different from what was used by ${previousWord}`;
				return false;
			}

			hashes.set(wordComparer.word, hashValue);
			inverseHashes.set(hashValue, wordComparer.word);
			wordComparer.expected = ''+hashValue;

			return true;
		}
	}

	// Automatically shuffle and deal test cases over multiple runs
	yield* context.runTestCases<WordComparer>(
		words
			.flatMap(i=>[i,i])
			.map(
				(i:string)=>[i,new WordComparer(i)]as[string, WordComparer]
		),
		{
			compareFunction
		}
	);
	// Finally, the challenge is passed if no test cases failed
	return context.noFailures();
})

Example Code

process.stdin.on('data',(e)=>{
	(''+e).split('\n').forEach(word=>{
		console.log((word.length << 16) ^ [...word].map(i=>i.charCodeAt(0)).reduce((a,b)=>a*2+b) ^ (word.charCodeAt(0)<<8) ^ (word.charCodeAt(2)<<24) ^ (word.charCodeAt(3)<<19))
	})
})

Comments

lukegustafson

I feel this is a bit too easy in both JS and Python. Perhaps it could be made harder by some combination of reducing the hash size and increasing the word list, but I think that would just turn it into a brute-force problem.