Compressing Data with the LZ77 Algorithm in JavaScript

The JavaScript code below, when run, uses the LZ77 algorithm to compress and decompress some demo data, and displays the results on screen.

The LZ77 algorithm was first described, as the name somewhat implies, in the year 1977 by the researchers Abraham Lempel and Jacob Ziv.  The algorithm performs what is called “sliding-window” compression, in which it looks back some distance in the previously considered data to see if any previous sequence of symbols matches the current data.  If it does, it replaces that data in the output with a “link” back to the preceding data.

Actually, this implementation running against this demo data doesn’t REALLY compress the data, in that it actually makes it somewhat bigger. But that’s likely because the sample data isn’t large enough. Presumably compression would improve with a larger sample.

<html>
<body>
<script type='text/javascript'>

// main

function main()
{
	var newline = "<br />";

	//var stringToCompress = "this is a test";
	var stringToCompress = 
		DeclarationOfIndependence.IntroductionAndPreamble;

	write("stringToCompress is: " + stringToCompress);

	var bytesToCompress = ByteHelper.stringASCIIToBytes
	(
		stringToCompress
	);

	write("bytesToCompress.length is: " + bytesToCompress.length);

	var compressor = new CompressorLZ77
	(
		32768
	);

	var bytesCompressed = compressor.compressBytes
	(
		bytesToCompress
	);

	write("bytesCompressed is: " + bytesCompressed);
	write("bytesCompressed.length is: " + bytesCompressed.length);
	
	var bytesDecompressed = compressor.decompressBytes
	(
		bytesCompressed
	);

	write("bytesDecompressed is: " + bytesDecompressed);

	var stringDecompressed = ByteHelper.bytesToStringASCII
	(
		bytesDecompressed
	);

	write("stringDecompressed is: " + stringDecompressed);
}

function write(messageToWrite)
{
	var newline = "<br />";
	document.write(messageToWrite + newline);
}

// classes

function ByteHelper()
{}
{
	ByteHelper.BitsPerByte = 8;
	ByteHelper.BitsPerNibble = ByteHelper.BitsPerByte / 2;

	ByteHelper.bytesToStringASCII = function(bytesToConvert)
	{
		var returnValue = "";

		for (var i = 0; i < bytesToConvert.length; i++)
		{
			var charCode = bytesToConvert[i];
			var character = String.fromCharCode(charCode);
			returnValue += character;
		}

		return returnValue;
	}

	ByteHelper.bytesToStringHexadecimal = function(bytesToConvert)
	{
		var returnValue = "";

		var bitsPerNibble = ByteHelper.BitsPerNibble;

		for (var i = 0; i < bytesToConvert.length; i++)
		{
			var byte = bytesToConvert[i];

			for (var d = 1; d >= 0; d--)
			{
				var digitValue = byte >> (bitsPerNibble * d) & 0xF;
				var digitString = "";
				digitString += (digitValue + (digitValue < 10 ? 0: 'A'));
				returnValue += digitString;
			}

			returnValue += " ";
		}

		return returnValue;
	}

	ByteHelper.bytesToNumber = function(bytes)
	{
		var returnValue = 0;

		var bitsPerByte = ByteHelper.BitsPerByte;

		for (var i = 0; i < bytes.length; i++)
		{
			var byte = bytes[i];
			var byteValue = (byte << (bitsPerByte * i));
			returnValue += byteValue;
		}

		return returnValue;
	}

	ByteHelper.numberOfBytesNeededToStoreNumber = function(number)
	{
		var numberOfBitsInNumber = Math.ceil
		(
			Math.log(number + 1) / Math.log(2)
		);

		var numberOfBytesNeeded = Math.ceil
		(
			numberOfBitsInNumber 
			/ ByteHelper.BitsPerByte
		);

		return numberOfBytesNeeded;
	}

	ByteHelper.numberToBytes = function(number, numberOfBytesToUse)
	{
		var returnValues = [];

		if (numberOfBytesToUse == null)
		{
			numberOfBytesToUse = this.numberOfBytesNeededToStoreNumber
			(
				number
			);
		}

		var bitsPerByte = ByteHelper.BitsPerByte;

		for (var i = 0; i < numberOfBytesToUse; i++)
		{
			var byte = (number >> (bitsPerByte * i)) & 0xFF;
			returnValues.push(byte);
		}

		return returnValues;
	}

	ByteHelper.stringASCIIToBytes = function(stringToConvert)
	{
		var returnValues = [];

		for (var i = 0; i < stringToConvert.length; i++)
		{
			var charCode = stringToConvert.charCodeAt(i);
			returnValues.push(charCode);
		}

		return returnValues;
	}
}

function CompressorLZ77(slidingWindowSizeInBytes)
{
	this.slidingWindowSizeInBytes = slidingWindowSizeInBytes;

	this.distanceSizeInBytes = ByteHelper.numberOfBytesNeededToStoreNumber
	(
		this.slidingWindowSizeInBytes
	);

	if (this.distanceSizeInBytes > 4)
	{
		throw "Sliding window size not supported!"
	}
}
{
	CompressorLZ77.prototype.compressBytes = function(bytesToCompress)
	{
		var distanceLengthLiterals = [];

		var bytesToCompressAsString = "";
		var pattern = "";
		var patternPrev = "";
		var offsetOfMatch = -1;
		var offsetOfMatchPrev = -1;

		for (var i = 0; i < bytesToCompress.length; i++)
		{

if (i >= 348)
{
	var one = 1;
}
			var byteToCompress = bytesToCompress[i];

			var byteToCompressAsChar = String.fromCharCode(byteToCompress);

			patternPrev = pattern;
			pattern += byteToCompressAsChar;

			offsetOfMatchPrev = offsetOfMatch;
			offsetOfMatch = this.findMatchForPatternInSlidingWindow
			(
				pattern,
				bytesToCompressAsString
			);		

			if (offsetOfMatch == -1)
			{
				var distanceToMatch = 
					offsetOfMatchPrev 
					- patternPrev.length 
					+ 1;


				var distanceLengthLiteral = new DistanceLengthLiteral
				(
					distanceToMatch,
					patternPrev.length,
					byteToCompressAsChar
				);	

				distanceLengthLiterals.push(distanceLengthLiteral);

				pattern = "";
			}

			bytesToCompressAsString += byteToCompressAsChar;

			if (bytesToCompressAsString.length >= this.slidingWindowSizeInBytes)
			{
				var substringIndex = 
					bytesToCompressAsString.length 
					- this.slidingWindowSizeInBytes;

				// Using "substr()", not "substring()", which is different.
				bytesToCompressAsString = bytesToCompressAsString.substr 
				(
					substringIndex
				);
			}
		}

		var distanceLengthLiteralsAsBytes = DistanceLengthLiteral.convertManyToBytes
		(
			distanceLengthLiterals,
			this.distanceSizeInBytes
		);

		return distanceLengthLiteralsAsBytes;
	}

	CompressorLZ77.prototype.decompressBytes = function(bytesToDecompress)
	{
		var distanceLengthLiterals = DistanceLengthLiteral.buildManyFromBytes
		(
			bytesToDecompress,
			this.distanceSizeInBytes
		);

		var returnValue = this.decompressDistanceLengthLiterals
		(
			distanceLengthLiterals
		);

		return returnValue;
	}

	CompressorLZ77.prototype.decompressDistanceLengthLiterals = function(distanceLengthLiterals)
	{
		var bytesDecompressedAsString = "";
		
		for (var i = 0; i < distanceLengthLiterals.length; i++)
		{
			var distanceLengthLiteral = distanceLengthLiterals[i];
			var length = distanceLengthLiteral.length;
			var distance = distanceLengthLiteral.distance;
			var literalNext = distanceLengthLiteral.literalNext;

			if (distance > 0)
			{
				for (var j = 0; j < length; j++)
				{
					var slidingWindowIndex =
						bytesDecompressedAsString.length
						- distance;
	
					var valueFromSlidingWindow = bytesDecompressedAsString
					[
						slidingWindowIndex
					];
	
					bytesDecompressedAsString += valueFromSlidingWindow;
				}	
			}


			bytesDecompressedAsString += literalNext;
		}

		var bytesDecompressed = ByteHelper.stringASCIIToBytes
		(
			bytesDecompressedAsString
		);

		return bytesDecompressed;
	}

	CompressorLZ77.prototype.findMatchForPatternInSlidingWindow = function
	(
		patternToMatch,
		slidingWindowAsString
	)
	{
		var indexOfLastMatch = slidingWindowAsString.lastIndexOf
		(
			patternToMatch,
			this.slidingWindowSizeInBytes
		);

		var returnValue = (indexOfLastMatch == -1 ? -1 : slidingWindowAsString.length - indexOfLastMatch);

		return returnValue;
	}
}

function DistanceLengthLiteral(distance, length, literalNext)
{
	this.distance = distance;
	this.length = length;
	this.literalNext = literalNext;
}
{
	// static methods

	DistanceLengthLiteral.buildManyFromBytes = function(bytes, distanceSizeInBytes)
	{
		var returnValues = [];

		var sizeOfLengthAndLiteralInBytes = 2;

		var sizeInBytes = 
			sizeOfLengthAndLiteralInBytes
			+ distanceSizeInBytes;

		for (var i = 0; i < bytes.length; i += sizeInBytes)
		{
			var distance = ByteHelper.bytesToNumber
			(
				bytes.slice
				(
					i, 
					i + distanceSizeInBytes
				)
			);

			var length = bytes[i + distanceSizeInBytes];

			var literalNext = String.fromCharCode
			(
				bytes[i + 1 + distanceSizeInBytes]
			);

			var distanceLengthLiteral = new DistanceLengthLiteral
			(
				distance,
				length,
				literalNext
			);
	
			returnValues.push(distanceLengthLiteral);
		}

		return returnValues;
	}

	DistanceLengthLiteral.convertManyToBytes = function(distanceLengthLiterals, distanceSizeInBytes)
	{
		var returnValues = [];

		for (var i = 0; i < distanceLengthLiterals.length; i++)
		{
			var distanceLengthLiteral = distanceLengthLiterals[i];

			var distanceAsBytes = ByteHelper.numberToBytes
			(
				distanceLengthLiteral.distance,
				distanceSizeInBytes
			);

			for (var b = 0; b < distanceAsBytes.length; b++)
			{
				returnValues.push(distanceAsBytes[b]);
			}

			returnValues.push(distanceLengthLiteral.length);
			returnValues.push
			(
				distanceLengthLiteral.literalNext.charCodeAt(0)
			);
		}

		return returnValues;
	}

	// instance methods

	DistanceLengthLiteral.prototype.toString = function()
	{
		var returnValue = 
			"(" 
			+ this.distance + ","
			+ this.length + ","
			+ "\"" + this.literalNext + "\""
			+ ")";

		return returnValue;
	}
}

// data

function DeclarationOfIndependence()
{}
{
	DeclarationOfIndependence.Introduction = 
		"When in the course of human events, "
		+ "it becomes necessary for one people "
		+ "to dissolve the political bands "
		+ "which have connected them with another, "
		+ "and to assume among the powers of the earth, "
		+ "the separate and equal station "
		+ "to which the laws of nature "
		+ "and of nature's god entitle them, "
		+ "a decent respect to the opinions of mankind "
		+ "requires that they should declare the causes "
		+ "which impel them to the separation.  "

	DeclarationOfIndependence.PreamblePart1 = 
		"We hold these truths to be self-evident, "
		+ "that all men are created equal, "
		+ "that they are endowed by their creator "
		+ "with certain unalienable rights, "
		+ "that among these are life, liberty "
		+ "and the pursuit of happiness.  "
		+ "That to secure these rights, "
		+ "governments are instituted among men, "
		+ "deriving their just powers "
		+ "from the consent of the governed, "
		+ "that whenever any form of government "
		+ "becomes destructive of these ends, "
		+ "it is the right of the people "
		+ "to alter or to abolish it, "
		+ "and to institute new government, "
		+ "laying its foundation on such principles "
		+ "and organizing its powers in such form, "
		+ "as to them shall seem most likely "
		+ "to effect their safety and happiness.  "

	DeclarationOfIndependence.PreamblePart2 = 
		"Prudence, indeed, will dictate "
		+ "that governments long established "
		+ "should not be changed "
		+ "for light and transient causes; "
		+ "and accordingly all experience hath shewn, "
		+ "that mankind are more disposed to suffer, "
		+ "while evils are sufferable, "
		+ "than to right themselves by abolishing "
		+ "the forms to which they are accustomed.  "
		+ "But when a long train of abuses and usurpations, "
		+ "pursuing invariably the same object "
		+ "evinces a design to reduce them "
		+ "under absolute despotism, "
		+ "it is their right, it is their duty, "
		+ "to throw off such government, "
		+ "and to provide new guards "
		+ "for their future security. ";

	DeclarationOfIndependence.IntroductionAndPreamble = 
		DeclarationOfIndependence.Introduction
		+ DeclarationOfIndependence.PreamblePart1
		+ DeclarationOfIndependence.PreamblePart2;
}

// run

main();

</script>
</body>
</html>

This entry was posted in Uncategorized and tagged , , , , . Bookmark the permalink.

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s