Compressing Data with the LZW Algorithm in JavaScript

The code below, when run, displays an interface that can be used to compress and decompress text with the LZW algorithm. To see it in action, copy it into an .html file and open that file in a web browser that runs JavaScript.  Or, for an online version, visit http://thiscouldbebetter.neocities.org/lzwcompressor.html.

The LZW algorithm is named after its creators, Abraham Lempel, Jacob Ziv, and Terry Welch. It is based on an earlier compression algorithm by Lempel and Ziv named LZ78. I’d explain how it works here, but I don’t really understand it super well. Basically, it starts off by plugging every possible 8-bit value into a dictionary, and appending a couple of control symbols to the end of it. Then it runs through the data to compress and, uh, looks symbols up in the dictionary and, like, maybe… adds new strings if it doesn’t find any? Or even if it does? Some variants (including this one) also increase the bit width of the symbols being written to the compressed symbol stream whenever the dictionary of known symbols grows larger than two to the power of the current bit width.

So, uh, yeah. It’s all there in the code. You should probably just read the code.

CompressorLZW

<html>
<body onload="onload();">

	<table>
		<tr>
			<td colspan="2">
				<label for="textareaTextToCompress">Text to Compress:</label><br />
			</td>
			<td colspan="2">
				<label for="textareaDataCompressed">Data Compressed:</label>
			</td>
		</tr>
		<tr>
			<td colspan="2">
				<textarea id="textareaTextToCompress" rows="16"></textarea>
			</td>
			<td colspan="2">
				<textarea id="textareaDataCompressed" rows="16"></textarea>
			</td>
		</tr>
		<tr>
			<td>
				<input id="buttonClearTextToCompress" type="button" value="Clear" onclick="buttonClearTextToCompress_Click();" />	
			</td>
			<td>
				<input id="buttonCompress" type="button" value="Compress" onclick="buttonCompress_Click();" />
			</td>
			<td>
				<input id="buttonDecompress" type="button" value="Decompress" onclick="buttonDecompress_Click();" />
			</td>
			<td>
				<input id="buttonClearDataCompresssed" type="button" value="Clear" onclick="buttonClearDataCompressed_Click();" />
			</td>
		</tr>
	</table>

<script type='text/javascript'>

// events

function onload()
{
	var textareaTextToCompressID = "textareaTextToCompress";
	var textareaTextToCompress = document.getElementById(textareaTextToCompressID);
	textareaTextToCompress.innerHTML = DeclarationOfIndependence.IntroductionAndPreamble;
}

function buttonClearDataCompressed_Click()
{
	var textareaDataCompressedID = "textareaDataCompressed";
	var textareaDataCompressed = document.getElementById(textareaDataCompressedID);

	textareaDataCompressed.value = "";
}

function buttonClearTextToCompress_Click()
{
	var textareaTextToCompressID = "textareaTextToCompress";
	var textareaTextToCompress = document.getElementById(textareaTextToCompressID);

	textareaTextToCompress.value = "";
}

function buttonCompress_Click()
{
	var textareaTextToCompressID = "textareaTextToCompress";
	var textareaTextToCompress = document.getElementById(textareaTextToCompressID);
	var textareaDataCompressedID = "textareaDataCompressed";
	var textareaDataCompressed = document.getElementById(textareaDataCompressedID);

	var stringToCompress = textareaTextToCompress.value;

	var compressor = new CompressorLZW();

	var bytesCompressed = compressor.compressString
	(
		stringToCompress
	);

	var bytesCompressedAsString = bytesCompressed.join(",");

	textareaDataCompressed.value = bytesCompressedAsString;

}

function buttonDecompress_Click()
{
	var textareaTextToCompressID = "textareaTextToCompress";
	var textareaTextToCompress = document.getElementById(textareaTextToCompressID);
	var textareaDataCompressedID = "textareaDataCompressed";
	var textareaDataCompressed = document.getElementById(textareaDataCompressedID);

	var compressor = new CompressorLZW();

	var bytesCompressedAsString = textareaDataCompressed.value;
	var bytesCompressedAsStrings = bytesCompressedAsString.split(",");
	var bytesCompressed = [];
	for (var i = 0; i < bytesCompressedAsStrings.length; i++)
	{
		var byteCompressedAsString = bytesCompressedAsStrings[i];
		var byteCompressedAsNumber = parseInt(byteCompressedAsString);
		bytesCompressed.push(byteCompressedAsNumber);
	}

	var stringDecompressed = compressor.decompressBytes
	(
		bytesCompressed
	);

	textareaTextToCompress.value = stringDecompressed;
}

// classes

function BitStream(bytes)
{
	if (bytes == null)
	{
		bytes = [];
	}

	this.bytes = bytes;
	this.byteOffset = 0;
	this.bitOffsetWithinByteCurrent = 0;
	this.byteCurrent = 0;

	this.hasMoreBits = true;
}
{
	// constants

	BitStream.BitsPerByte = 8;
	BitStream.NaturalLogarithmOf2 = Math.log(2);

	// static methods

	BitStream.convertNumberToBitString = function(numberToConvert)
	{
		var returnValue = "";

		var numberOfBitsNeeded = Math.ceil
		(
			Math.log(numberToConvert + 1)
			/ BitStream.NaturalLogarithmOf2
		);

		if (numberOfBitsNeeded == 0)
		{
			numberOfBitsNeeded = 1;
		}

		for (var b = 0; b < numberOfBitsNeeded; b++)
		{
			var bitValue = (numberToConvert >> b) & 1;
			returnValue = "" + bitValue + returnValue;
		}

		return returnValue;
	}

	BitStream.convertNumbersToBitStrings = function(numbersToConvert)
	{
		returnValues = [];

		for (var i = 0; i < numbersToConvert.length; i++)
		{
			var numberToConvert = numbersToConvert[i];
			var numberAsBitString = this.convertNumberToBitString
			(
				numberToConvert
			);
			returnValues.push(numberAsBitString);
		}

		return returnValues;
	}

	// instance methods

	BitStream.prototype.close = function()
	{
		if (this.bitOffsetWithinByteCurrent > 0)
		{
			this.bytes.push(this.byteCurrent);
		}
	}

	BitStream.prototype.readBit = function()
	{
		this.byteCurrent = this.bytes[this.byteOffset];
		var returnValue = (this.byteCurrent >> this.bitOffsetWithinByteCurrent) & 1;

		this.bitOffsetWithinByteCurrent++;

		if (this.bitOffsetWithinByteCurrent >= BitStream.BitsPerByte)
		{
			this.byteOffset++;			
			this.bitOffsetWithinByteCurrent = 0;
			if (this.byteOffset < this.bytes.length)
			{
				this.byteCurrent = this.bytes[this.byteOffset];
			}
			else
			{
				this.hasMoreBits = false;
			}
		}

		return returnValue;
	}
	
	BitStream.prototype.readNumber = function(numberOfBitsInNumber)
	{
		var returnValue = 0;

		for (var i = 0; i < numberOfBitsInNumber; i++)
		{
			var bitRead = this.readBit();
			returnValue |= (bitRead << i);
		}

		return returnValue;
	}
	
	BitStream.prototype.writeBit = function(bitToWrite)
	{
		this.byteCurrent |= (bitToWrite << this.bitOffsetWithinByteCurrent);

		this.bitOffsetWithinByteCurrent++;

		if (this.bitOffsetWithinByteCurrent >= BitStream.BitsPerByte)
		{
			this.bytes.push(this.byteCurrent);
			this.byteOffset++;			
			this.bitOffsetWithinByteCurrent = 0;
			this.byteCurrent = 0;
		}
	}

	BitStream.prototype.writeNumber = function(numberToWrite, numberOfBitsToUse)
	{
		for (var b = 0; b < numberOfBitsToUse; b++)
		{
			var bitValue = (numberToWrite >> b) & 1;
			this.writeBit(bitValue);
		}
	}
}

function CompressorLZW()
{
	// do nothing
}
{
	// constants
	CompressorLZW.SymbolForBitWidthIncrease = 256;
	CompressorLZW.SymbolForBitStreamEnd = CompressorLZW.SymbolForBitWidthIncrease + 1;

	// instance methods

	CompressorLZW.prototype.compressString = function(stringToCompress)
	{
		var bitStream = new BitStream();

		// Adapted from pseudocode found at the URL:
		// http://oldwww.rasip.fer.hr/research/compress/algorithms/fund/lz/lzw.html

		var dictionary = this.initializeDictionary();
		var pattern = "";

		var symbolForBitWidthIncrease = CompressorLZW.SymbolForBitWidthIncrease;
		var symbolWidthInBitsCurrent = Math.ceil
		(
			Math.log(symbolForBitWidthIncrease + 1) 
			/ BitStream.NaturalLogarithmOf2
		);

		for (var i = 0; i < stringToCompress.length; i++)
		{
			var character = stringToCompress[i];
			var patternPlusCharacter = pattern + character;
			if (dictionary[patternPlusCharacter] == null)
			{				
				var dictionaryIndex = dictionary.length;
				dictionary[patternPlusCharacter] = dictionaryIndex;
				dictionary[dictionaryIndex] = patternPlusCharacter;

				var patternEncoded = dictionary[pattern];

				numberOfBitsRequired = Math.ceil
				(
					Math.log(patternEncoded + 1) 
					/ BitStream.NaturalLogarithmOf2
				);

				if (numberOfBitsRequired > symbolWidthInBitsCurrent)
				{
					bitStream.writeNumber
					(
						symbolForBitWidthIncrease,
						symbolWidthInBitsCurrent
					);

					symbolWidthInBitsCurrent = numberOfBitsRequired;
				}

				bitStream.writeNumber
				(
					patternEncoded,
					symbolWidthInBitsCurrent
				);

				pattern = character;
			}
			else
			{
				pattern = patternPlusCharacter;
			}
			
		}

		var patternEncoded = dictionary[pattern];
		bitStream.writeNumber
		(
			patternEncoded, 
			symbolWidthInBitsCurrent
		);

		bitStream.writeNumber
		(
			CompressorLZW.SymbolForBitStreamEnd,
			symbolWidthInBitsCurrent
		);

		bitStream.close();

		return bitStream.bytes;
	}

	CompressorLZW.prototype.compressStringToSymbols = function(stringToCompress)
	{
		var symbolsCompressed = [];

		// Adapted from pseudocode found at the URL:
		// http://oldwww.rasip.fer.hr/research/compress/algorithms/fund/lz/lzw.html

		var dictionary = this.initializeDictionary();
		var pattern = "";

		for (var i = 0; i < stringToCompress.length; i++)
		{
			var character = stringToCompress[i];
			var patternPlusCharacter = pattern + character;
			if (dictionary[patternPlusCharacter] == null)
			{
				var patternEncoded = dictionary[pattern];
				symbolsCompressed.push(patternEncoded);
				
				var dictionaryIndex = dictionary.length;
				dictionary[patternPlusCharacter] = dictionaryIndex;
				dictionary[dictionaryIndex] = patternPlusCharacter;

				pattern = character;
			}
			else
			{
				pattern = patternPlusCharacter;
			}
			
		}

		var patternEncoded = dictionary[pattern];
		symbolsCompressed.push(patternEncoded);

		return symbolsCompressed;
	}

	CompressorLZW.prototype.decompressBytes = function(bytesToDecode)
	{
		var stringDecompressed = "";

		// Adapted from pseudocode found at the URL:
		// http://oldwww.rasip.fer.hr/research/compress/algorithms/fund/lz/lzw.html

		var dictionary = this.initializeDictionary();

		var bitStream = new BitStream(bytesToDecode);

		var symbolForBitStreamEnd = CompressorLZW.SymbolForBitStreamEnd;
		var symbolForBitWidthIncrease = CompressorLZW.SymbolForBitWidthIncrease;
		var symbolWidthInBitsCurrent = Math.ceil
		(
			Math.log(symbolForBitWidthIncrease + 1) 
			/ BitStream.NaturalLogarithmOf2
		);

		var symbolToDecode = bitStream.readNumber(symbolWidthInBitsCurrent);
		var symbolDecoded = dictionary[symbolToDecode];
		stringDecompressed += symbolDecoded;

		var pattern;
		var character;
		var patternPlusCharacter;
	
		while (true) //bitStream.hasMoreBits == true)
		{
			pattern = dictionary[symbolToDecode];
			var symbolNext = bitStream.readNumber
			(
				symbolWidthInBitsCurrent
			);

			if (symbolNext == symbolForBitWidthIncrease)
			{
				symbolWidthInBitsCurrent++;
			}
			else if (symbolNext == symbolForBitStreamEnd)
			{
				break;
			}
			else
			{
				symbolToDecode = symbolNext;
				symbolDecoded = dictionary[symbolToDecode];
	
				if (symbolDecoded == null)
				{
					character = pattern[0];
					patternPlusCharacter = pattern + character;
					stringDecompressed += patternPlusCharacter;
				}
				else
				{
					stringDecompressed += symbolDecoded;
					character = symbolDecoded[0];
					patternPlusCharacter = pattern + character;
				}

				var dictionaryIndex = dictionary.length;
				dictionary[patternPlusCharacter] = dictionaryIndex;
				dictionary[dictionaryIndex] = patternPlusCharacter;
			}
		}

		return stringDecompressed;
	}

	CompressorLZW.prototype.initializeDictionary = function()
	{
		var dictionary = [];

		var firstControlSymbol = CompressorLZW.SymbolForBitWidthIncrease;

		for (var i = 0; i < firstControlSymbol; i++)
		{
			var charCode = String.fromCharCode(i);
			dictionary[charCode] = i;
			dictionary[i] = charCode;
		}

		dictionary[CompressorLZW.SymbolForBitWidthIncrease] = "[WIDEN]";
		dictionary[CompressorLZW.SymbolForBitStreamEnd] = "[END]";

		return dictionary;
	}
}

// data

function DeclarationOfIndependence()
{}
{
	DeclarationOfIndependence.Introduction = 
		"When in the course of human events, "
		+ "it becomes necessary for one people "
		+ "to dissolve the political bands "
		+ "which have connected them with another, "
		+ "and to assume among the powers of the earth, "
		+ "the separate and equal station "
		+ "to which the laws of nature "
		+ "and of nature's god entitle them, "
		+ "a decent respect to the opinions of mankind "
		+ "requires that they should declare the causes "
		+ "which impel them to the separation.  "

	DeclarationOfIndependence.PreamblePart1 = 
		"We hold these truths to be self-evident, "
		+ "that all men are created equal, "
		+ "that they are endowed by their creator "
		+ "with certain unalienable rights, "
		+ "that among these are life, liberty "
		+ "and the pursuit of happiness.  "
		+ "That to secure these rights, "
		+ "governments are instituted among men, "
		+ "deriving their just powers "
		+ "from the consent of the governed, "
		+ "that whenever any form of government "
		+ "becomes destructive of these ends, "
		+ "it is the right of the people "
		+ "to alter or to abolish it, "
		+ "and to institute new government, "
		+ "laying its foundation on such principles "
		+ "and organizing its powers in such form, "
		+ "as to them shall seem most likely "
		+ "to effect their safety and happiness.  "

	DeclarationOfIndependence.PreamblePart2 = 
		"Prudence, indeed, will dictate "
		+ "that governments long established "
		+ "should not be changed "
		+ "for light and transient causes; "
		+ "and accordingly all experience hath shewn, "
		+ "that mankind are more disposed to suffer, "
		+ "while evils are sufferable, "
		+ "than to right themselves by abolishing "
		+ "the forms to which they are accustomed.  "
		+ "But when a long train of abuses and usurpations, "
		+ "pursuing invariably the same object "
		+ "evinces a design to reduce them "
		+ "under absolute despotism, "
		+ "it is their right, it is their duty, "
		+ "to throw off such government, "
		+ "and to provide new guards "
		+ "for their future security. ";

	DeclarationOfIndependence.IntroductionAndPreamble = 
		DeclarationOfIndependence.Introduction
		+ DeclarationOfIndependence.PreamblePart1
		+ DeclarationOfIndependence.PreamblePart2;
}

</script>
</body>
</html>
Advertisements
This entry was posted in Uncategorized and tagged , , , , . Bookmark the permalink.

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s