Text Compression
For this challenge, you will write two programs: one which compresses text, and the other which decompresses it.
The first program will receive 1 kB (1000 B) of input consisting of space separated lowercased words from the first 50 entries in the Byte Heist word list. It should compress the text with a factor of at least 2:1; in other words, its output may be no more than 500 B.
The second program will receive the output of the first program, and must output the original input.
In order to separate the two programs, place a line between them with nothing on it except PROGRAM_DELIMITER
. If your compressor is
abc def
and your decompressor is
123456
then your submission should look like this:
abc def PROGRAM_DELIMITER 123456
Judge
(async function*(context: Context): Challenge { const MAX_WORD_LENGTH = 5; const INPUT_LENGTH = 1000; const words: string[] = shuffle( (await Deno.readTextFile("/scripts/words.txt")) .split('\n') .map((i: string) => i.toLowerCase()) .filter((i: string) => i.match(/^[a-z]{1,5}$/)) .slice(0, 50)); const programs = context.code.split(/\r?\nPROGRAM_DELIMITER\r?\n/); yield context.registerTestCase( new TestCase( "Properly delimited programs", programs.length === 2 ? "Pass" : "Fail", { Text: "Expected 2 programs separated by PROGRAM_DELIMITER, found " + programs.length } ) ); // Exit early if there aren't 2 programs to prevent the judge from erroring. if (programs.length !== 2) return context.noFailures(); const staticInput = "for have his all your on one they the in when said when this up was all from it in for how you had is out and out is are word can this you it on one was be from we said by you to they she what for not is not that as with other word a other the and are have a it a i were can each for your have an were it from as out is with out not that be how use have other there and in of word but up from be not word had this they he up had not his at one be there at he at how some this as he is said we at of that there by i the can we or on other this there your they in that it was an he we to an i are have as but up she of he what some his all how are a other but what by are on were what use of when the word you we can all each his each use each as by she or an was other were a and said when had out in or be your of his how some to but i to use up for can an was some use on one at all you some and from that the to with had by with or there i said when they what but she each she were your with or one"; const randomInput = (words: string[]): string => { let input = ""; // Generate exactly 1000 bytes of input while (input.length < INPUT_LENGTH - MAX_WORD_LENGTH) { input += words[rand(words.length)] + " "; } return input + words.find((word) => word.length == INPUT_LENGTH - input.length); }; const inputs = [ staticInput, ...range(3).map((_)=>randomInput(words)), randomInput(words.slice(0, words.length/2)), randomInput(words.slice(words.length/2))]; let runNum = 1; for (let input of inputs) { const compressed = (await context.runCode(programs[0], input)).text; const compressedSize = new TextEncoder().encode(compressed).length; yield context.registerTestCase( new TestCase( "Run " + runNum + ": Intermediate output size", compressedSize <= INPUT_LENGTH/2 ? "Pass" : "Fail", { Text: "Expected " + INPUT_LENGTH/2 + " or less bytes, got " + compressedSize + " bytes:\n" + compressed } ) ); yield(await context.runCode(programs[1], compressed)).assertEquals(input).setName("Run " + runNum + ": Correct final output"); runNum++; } return context.noFailures(); })
Example Code
const fs = require('fs'); const input = fs.readFileSync(0, 'utf-8'); const words = input.split(' '); let output = ''; for(let word of words) output += {"the": "A", "of": "B", "to": "C", "and": "D", "a": "E", "in": "F", "is": "G", "it": "H", "you": "I", "that": "J", "he": "K", "was": "L", "for": "M", "on": "N", "are": "O", "with": "P", "as": "Q", "i": "R", "his": "S", "they": "T", "be": "U", "at": "V", "one": "W", "have": "X", "this": "Y", "from": "Z", "or": "a", "had": "b", "by": "c", "not": "d", "word": "e", "but": "f", "what": "g", "some": "h", "we": "i", "can": "j", "out": "k", "other": "l", "were": "m", "all": "n", "there": "o", "when": "p", "up": "q", "use": "r", "your": "s", "how": "t", "said": "u", "an": "v", "each": "w", "she": "x"}[word]; console.log(output); PROGRAM_DELIMITER const fs = require('fs'); const input = fs.readFileSync(0, 'utf-8'); console.log([...input].map((chr)=>({"A": "the", "B": "of", "C": "to", "D": "and", "E": "a", "F": "in", "G": "is", "H": "it", "I": "you", "J": "that", "K": "he", "L": "was", "M": "for", "N": "on", "O": "are", "P": "with", "Q": "as", "R": "i", "S": "his", "T": "they", "U": "be", "V": "at", "W": "one", "X": "have", "Y": "this", "Z": "from", "a": "or", "b": "had", "c": "by", "d": "not", "e": "word", "f": "but", "g": "what", "h": "some", "i": "we", "j": "can", "k": "out", "l": "other", "m": "were", "n": "all", "o": "there", "p": "when", "q": "up", "r": "use", "s": "your", "t": "how", "u": "said", "v": "an", "w": "each", "x": "she"}[chr])).join(" "));