Byte Heist Home Leaderboard
Join the Heist (with Github)
Solve View

Text Compression V2

For this challenge, you will write a program which can compress text, and also decompress its own compression scheme.

Your program will first receive the decompressed text, a capital “D” followed by 1 kB (1000 B) of space separated lowercased words from the first 50 entries in the Byte Heist word list. It should compress the text with a factor of at least 2:1; in other words, its output may be no more than 500 B.

Your program will then receive a capital “C” followed by its own compressed text, and should output the original decompressed text.

Judge

(async function*(context: Context): Challenge {
    const MAX_WORD_LENGTH = 5;
	const INPUT_LENGTH = 1000;
    const words: string[] = shuffle(
        (await Deno.readTextFile("/scripts/words.txt"))
	        .split('\n')
	        .map((i: string) => i.toLowerCase())
	        .filter((i: string) => i.match(/^[a-z]{1,5}$/))
	        .slice(0, 50));

    const staticInput = "for have his all your on one they the in when said when this up was all from it in for how you had is out and out is are word can this you it on one was be from we said by you to they she what for not is not that as with other word a other the and are have a it a i were can each for your have an were it from as out is with out not that be how use have other there and in of word but up from be not word had this they he up had not his at one be there at he at how some this as he is said we at of that there by i the can we or on other this there your they in that it was an he we to an i are have as but up she of he what some his all how are a other but what by are on were what use of when the word you we can all each his each use each as by she or an was other were a and said when had out in or be your of his how some to but i to use up for can an was some use on one at all you some and from that the to with had by with or there i said when they what but she each she were your with or one";

    const randomInput = (words: string[]): string => {
        let input = "";

		// Generate exactly 1000 bytes of input
        while (input.length < INPUT_LENGTH - MAX_WORD_LENGTH) {
            input += words[rand(words.length)] + " ";
        }
        return input + words.find((word) => word.length == INPUT_LENGTH - input.length);
    };

    const inputs = [
		staticInput, 
		...range(3).map((_)=>randomInput(words)), randomInput(words.slice(0, words.length/2)), 
		randomInput(words.slice(words.length/2))];

    let runNum = 1;
    for (let input of inputs) {
        const compressed = (await context.run("D" + input)).text;
        const compressedSize = new TextEncoder().encode(compressed).length;
        yield context.registerTestCase(
            new TestCase(
                "Run " + runNum + ": Intermediate output size",
                compressedSize <= INPUT_LENGTH/2 ? "Pass" : "Fail", {
                    Text: "Expected " + INPUT_LENGTH/2 + " or less bytes, got " + compressedSize + " bytes:\n" + compressed
                }
            )
        );

        yield(await context.run("C" + compressed)).assertEquals(input).setName("Run " + runNum + ": Correct final output");

        runNum++;
    }
    return context.noFailures();
})

Example Code

const 
	fs = require('fs'),
	input = fs.readFileSync(0, 'utf-8'),
	mode = input.slice(0,1),
	data = input.slice(1);

if(mode === "D")
{
	let output = '';
	for(let word of data.split(' '))
		output += {"the": "A", "of": "B", "to": "C", "and": "D", "a": "E", "in": "F", "is": "G", "it": "H", "you": "I", "that": "J", "he": "K", "was": "L", "for": "M", "on": "N", "are": "O", "with": "P", "as": "Q", "i": "R", "his": "S", "they": "T", "be": "U", "at": "V", "one": "W", "have": "X", "this": "Y", "from": "Z", "or": "a", "had": "b", "by": "c", "not": "d", "word": "e", "but": "f", "what": "g", "some": "h", "we": "i", "can": "j", "out": "k", "other": "l", "were": "m", "all": "n", "there": "o", "when": "p", "up": "q", "use": "r", "your": "s", "how": "t", "said": "u", "an": "v", "each": "w", "she": "x"}[word];

	console.log(output);
}

if(mode === "C")
	console.log([...data].map((chr)=>({"A": "the", "B": "of", "C": "to", "D": "and", "E": "a", "F": "in", "G": "is", "H": "it", "I": "you", "J": "that", "K": "he", "L": "was", "M": "for", "N": "on", "O": "are", "P": "with", "Q": "as", "R": "i", "S": "his", "T": "they", "U": "be", "V": "at", "W": "one", "X": "have", "Y": "this", "Z": "from", "a": "or", "b": "had", "c": "by", "d": "not", "e": "word", "f": "but", "g": "what", "h": "some", "i": "we", "j": "can", "k": "out", "l": "other", "m": "were", "n": "all", "o": "there", "p": "when", "q": "up", "r": "use", "s": "your", "t": "how", "u": "said", "v": "an", "w": "each", "x": "she"}[chr])).join(" "));