Exploring the ZIP File Format in JavaScript

The JavaScript program below, when run, prompts the user to upload a file in .ZIP format. Then the file is parsed and its internal structure is displayed as text in the JSON format.

Note that this code does not actually decompress the data, though it does display the compressed data for each internal file in hexadecimal format.

To see the code in action, copy it into an .html file and open that file in a web browser that runs JavaScript.


<html>
<body>

<div id="divUI">
	<label>ZIP File to Load:</label><br />
	<input type="file" onchange="inputFile_Changed(this);"></input><br />
	<label>File Contents as JSON:</label><br />
	<textarea id="textareaFileAsJSON" cols="80" rows="20"></textarea>
</div>

<script type="text/javascript">

// ui event handlers

function inputFile_Changed(inputFile)
{
	var file = inputFile.files[0];
	if (file != null)
	{
		var fileReader = new FileReader();
		fileReader.onload = function(event)
		{
			var fileAsBinaryString = event.target.result;
			var fileAsBytes = ByteHelper.binaryStringToBytes(fileAsBinaryString);
			var fileAsZipFile = ZipFile.fromBytes(fileAsBytes);
			var fileAsJSON = fileAsZipFile.toStringJSON();
			var textareaFileAsJSON = document.getElementById("textareaFileAsJSON");
			textareaFileAsJSON.value = fileAsJSON;
		}
		fileReader.readAsBinaryString(file);
	}
}

// classes

function ByteHelper()
{
	// static class
}
{
	ByteHelper.binaryStringToBytes = function(binaryString)
	{
		var bytesSoFar = [];
		for (var i = 0; i < binaryString.length; i++)
		{
			var byte = binaryString.charCodeAt(i);
			bytesSoFar.push(byte);
		}
		return bytesSoFar;
	}

	// "LE" = "Little Endian"

	ByteHelper.bytesToInteger16LE = function(bytes) 
	{
		return (bytes[1] << 8) | bytes[0];
	}

	ByteHelper.bytesToInteger32LE = function(bytes)
	{
		return (bytes[3] << 24) | (bytes[2] << 16) | (bytes[1] << 8) | bytes[0];
	}

	ByteHelper.bytesToStringHexadecimal = function(bytes)
	{
		var returnValue = "";
		for (var i = 0; i < bytes.length; i++)
		{
			var byte = bytes[i];
			var byteAsString = byte.toString(16);
			returnValue += byteAsString;
		}
		return returnValue;
	}

	ByteHelper.bytesToStringUTF8 = function(bytes)
	{
		var returnValue = "";
		for (var i = 0; i < bytes.length; i++)
		{
			var byte = bytes[i];
			var byteAsString = String.fromCharCode(byte);
			returnValue += byteAsString;
		}
		return returnValue;
	}
}

function ByteStream(bytes)
{
	this.bytes = bytes;
	this.byteOffset = 0;

	this.byteBuffer = [];
}
{
	ByteStream.prototype.hasMore = function()
	{
		return this.byteOffset < this.bytes.length;
	}

	ByteStream.prototype.readByte = function()
	{
		var byteRead = this.bytes[this.byteOffset];
		this.byteOffset++;

		return byteRead;
	}

	ByteStream.prototype.readBytes = function(byteCount)
	{
		return this.readBytesIntoBuffer(byteCount).slice();
	}

	ByteStream.prototype.readBytesIntoBuffer = function(byteCount)
	{
		this.byteBuffer.length = 0;
		for (var i = 0; i < byteCount; i++)
		{
			var byteRead = this.readByte();
			this.byteBuffer.push(byteRead);
		}
		return this.byteBuffer;
	}

	// "LE" = "Little Endian"

	ByteStream.prototype.readInteger16LE = function()
	{
		return ByteHelper.bytesToInteger16LE(this.readBytesIntoBuffer(2));
	}

	ByteStream.prototype.readInteger32LE = function()
	{
		return ByteHelper.bytesToInteger32LE(this.readBytesIntoBuffer(4));
	}

	ByteStream.prototype.readStringHexadecimal = function(length)
	{
		return ByteHelper.bytesToStringHexadecimal(this.readBytesIntoBuffer(length));
	}

	ByteStream.prototype.readStringUTF8 = function(length)
	{
		return ByteHelper.bytesToStringUTF8(this.readBytesIntoBuffer(length));
	}
}

function ZipFile(entriesLocal, entriesCentralDirectory, centralDirectoryEndRecord)
{
	this.entriesLocal = entriesLocal;
	this.entriesCentralDirectory = entriesCentralDirectory;
	this.centralDirectoryEndRecord = centralDirectoryEndRecord;
}
{
	ZipFile.fromBytes = function(bytes)
	{
		var entriesLocal = [];
		var entriesCentralDirectory = [];
		var centralDirectoryEndRecord = null;
	
		var reader = new ByteStream(bytes);

		while (reader.hasMore() == true)
		{
			var signature = reader.readInteger32LE();
			if (signature == ZipFileLocalFileHeader.Signature)
			{
				// It's a local file header.
				var versionNeededToExtract = reader.readInteger16LE();
				var flags = reader.readInteger16LE();
				var compressionMethod = reader.readStringUTF8(2);

				var timeLastModified = ZipFile.fromBytes_ReadTimeFromByteStream(reader);

				var crc32 = reader.readInteger32LE();
				var sizeCompressedInBytes = reader.readInteger32LE();
				var sizeUncompressedInBytes = reader.readInteger32LE();
				var filenameLength = reader.readInteger16LE();
				var extraFieldLength = reader.readInteger16LE();
				var filename = reader.readStringUTF8(filenameLength);
				var extraFieldAsHexadecimal = reader.readStringHexadecimal(extraFieldLength);

				var entryHeader = new ZipFileLocalFileHeader
				(
					signature,
					versionNeededToExtract,
					flags,
					compressionMethod,
					timeLastModified,
					crc32,
					sizeCompressedInBytes,
					sizeUncompressedInBytes,
					filename,
					extraFieldAsHexadecimal
				);

				var entryDataCompressedAsStringHexadecimal = 
					reader.readStringHexadecimal(sizeCompressedInBytes);

				var entry = new ZipFileLocalFileEntry(entryHeader, entryDataCompressedAsStringHexadecimal);

				entriesLocal.push(entry);
			}
			else if (signature == ZipFileCentralDirectoryEntry.Signature)
			{
				// It's a central directory file header.
				var versionMadeBy = reader.readInteger16LE();				
				var versionNeededToExtract = reader.readInteger16LE();
				var flags = reader.readInteger16LE();
				var compressionMethod = reader.readStringUTF8(2);
				var timeLastModified = ZipFile.fromBytes_ReadTimeFromByteStream(reader);
				var crc32 = reader.readInteger32LE();
				var sizeCompressedInBytes = reader.readInteger32LE();
				var sizeUncompressedInBytes = reader.readInteger32LE();
				var filenameLength = reader.readInteger16LE();
				var extraFieldLength = reader.readInteger16LE();
				var fileCommentLength = reader.readInteger16LE();
				var diskNumber = reader.readInteger16LE();
				var fileAttributesInternal = reader.readInteger16LE();
				var fileAttributesExternal = reader.readInteger32LE();
				var offsetOfLocalHeader = reader.readInteger32LE();
				var filename = reader.readStringUTF8(filenameLength);
				var extraFieldAsHexadecimal = reader.readStringHexadecimal(extraFieldLength);
				var fileComment = reader.readStringUTF8(fileCommentLength);

				var entry = new ZipFileCentralDirectoryEntry
				(
					signature,
					versionMadeBy,
					versionNeededToExtract,
					flags,
					compressionMethod,
					timeLastModified,
					crc32,
					sizeCompressedInBytes,
					sizeUncompressedInBytes,
					filename,
					extraFieldAsHexadecimal,
					fileComment,
					diskNumber,
					fileAttributesInternal,
					fileAttributesExternal,
					offsetOfLocalHeader
				);

				entriesCentralDirectory.push(entry);
			}
			else if (signature == ZipFileCentralDirectoryEndRecord.Signature)
			{
				// It's an end of central directory record.
				var diskNumber = reader.readInteger16LE();
				var diskStart = reader.readInteger16LE();
				var recordsOnDisk = reader.readInteger16LE();
				var recordsTotal = reader.readInteger16LE();
				var sizeInBytes = reader.readInteger32LE();
				var offset = reader.readInteger32LE();
				var commentLength = reader.readInteger16LE();
				var comment = reader.readStringUTF8(commentLength);

				centralDirectoryEndRecord = new ZipFileCentralDirectoryEndRecord
				(
					diskNumber,
					diskStart,
					recordsOnDisk,
					recordsTotal,
					sizeInBytes,
					offset,
					comment
				);
			}
			else
			{
				throw "Unexpected format."
			}
		}

		var returnValue = new ZipFile(entriesLocal, entriesCentralDirectory, centralDirectoryEndRecord);
		return returnValue;
	}

	ZipFile.fromBytes_ReadTimeFromByteStream = function(reader)
	{
		// Based on the timestamp format for the FAT filesystem,
		// made popular by Microsoft's MS-DOS.

		var hours5Bits_Minutes6Bits_DualSeconds5Bits = reader.readInteger16LE();
		var hours = (hours5Bits_Minutes6Bits_DualSeconds5Bits >> 11);
		var minutes = (hours5Bits_Minutes6Bits_DualSeconds5Bits >> 5) & 0x3F;
		var secondsOver2 = hours5Bits_Minutes6Bits_DualSeconds5Bits & 0x1F;
		var seconds = secondsOver2 * 2;

		var years7Bits_Month4Bits_Days5Bits = reader.readInteger16LE();
		var fatFilesystemEpochYear = 1980;
		var year = (years7Bits_Month4Bits_Days5Bits >> 9) + fatFilesystemEpochYear;
		var month = ((years7Bits_Month4Bits_Days5Bits >> 5) & 0xF) - 1;
		var day = (years7Bits_Month4Bits_Days5Bits) & 0x1F;

		var returnValue = new Date(year, month, day, hours, minutes, seconds, 0);
		return returnValue;
	}

	// instance methods

	ZipFile.prototype.toStringJSON = function()
	{
		var indentSizeInSpaces = 2;
		var returnValue = JSON.stringify(this, null, indentSizeInSpaces);
		return returnValue;
	}
}

function ZipFileCentralDirectoryEntry
(
	signature,
	versionMadeBy,
	versionNeededToExtract,
	flags,
	compressionMethod,
	timeLastModified,
	crc32,
	sizeCompressedInBytes,
	sizeUncompressedInBytes,
	filename,
	extraFieldAsHexadecimal,
	fileComment,
	diskNumber,
	fileAttributesInternal,
	fileAttributesExternal,
	offsetOfLocalHeader
)
{
	// A "central directory" entry
	// is an expanded version of the "local" file entry header.

	this.signature = signature;
	this.versionMadeBy = versionMadeBy;
	this.versionNeededToExtract = versionNeededToExtract;
	this.flags = flags;
	this.compressionMethod = compressionMethod;
	this.timeLastModified = timeLastModified;
	this.crc32 = crc32; // "crc32" = "32-bit cyclical redundancy check" - Validates file not corrupt.
	this.sizeCompressedInBytes = sizeCompressedInBytes;
	this.sizeUncompressedInBytes = sizeUncompressedInBytes;
	this.filename = filename;
	this.extraFieldAsHexadecimal = extraFieldAsHexadecimal;
	this.fileComment = fileComment;
	this.diskNumber = diskNumber;
	this.fileAttributesInternal = fileAttributesInternal;
	this.fileAttributesExternal = fileAttributesExternal;
	this.offsetOfLocalHeader = offsetOfLocalHeader;	
}
{
	ZipFileCentralDirectoryEntry.Signature = 33639248;
}

function ZipFileCentralDirectoryEndRecord
(
	diskNumber,
	diskStart,
	recordsOnDisk,
	recordsTotal,
	sizeInBytes,
	offset,
	comment
)
{
	this.diskNumber = diskNumber;
	this.diskStart = diskStart;
	this.recordsOnDisk = recordsOnDisk;
	this.recordsTotal = recordsTotal;
	this.sizeInBytes = sizeInBytes;
	this.offset = offset;
	this.comment = comment;
}
{
	ZipFileCentralDirectoryEndRecord.Signature = 101010256;
}

function ZipFileLocalFileEntry(header, dataCompressedAsStringHexadecimal)
{
	this.header = header;
	this.dataCompressedAsStringHexadecimal = dataCompressedAsStringHexadecimal;
}

function ZipFileLocalFileHeader
(
	signature,
	versionNeededToExtract,
	flags,
	compressionMethod,
	timeLastModified,
	crc32,
	sizeCompressedInBytes,
	sizeUncompressedInBytes,
	filename,
	extraFieldAsHexadecimal
)
{
	// This is a "central directory" file header,
	// which is an expanded version of the "local" file header.

	this.signature = signature;
	this.versionNeededToExtract = versionNeededToExtract;
	this.flags = flags;
	this.compressionMethod = compressionMethod;
	this.timeLastModified = timeLastModified;
	this.crc32 = crc32;
	this.sizeCompressedInBytes = sizeCompressedInBytes;
	this.sizeUncompressedInBytes = sizeUncompressedInBytes;
	this.filename = filename;
	this.extraFieldAsHexadecimal = extraFieldAsHexadecimal;
}
{
	ZipFileLocalFileHeader.Signature = 67324752;
}

</script>

</body>
</html>

Advertisement
This entry was posted in Uncategorized and tagged , , , , . Bookmark the permalink.

3 Responses to Exploring the ZIP File Format in JavaScript

  1. psychocod3r says:

    So kinda like hexdump for the web. Neat.

  2. SarahC says:

    Very interesting, thanks!

  3. ilikemacsalot says:

    wow

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s