A CSV Compressor in JavaScript

Below is a simple program that can remove repeated values from a CSV, thus making the file smaller. To see it in action, copy it into an .html file and open that file in a web browser that runs JavaScript.

I suppose it’s actually more of a “run-length suppressor” than a compressor, in that it only trims out value that repeat in subsequent rows. and will likely only be of much value in certain very limited contexts similar to the one I wrote it for. Nonetheless, I’m posting it here on the chance that someone might find it useful.


<html>
<body>

<div id="divUI">
	<label><b>CSV Repeated Value Compressor</b></label><br />
	<input type="file" onchange="inputFile_Changed(this);"></input><br />
	<textarea id="textareaData" cols="80" rows="25"></textarea><br />
	<button onclick="buttonCompress_Clicked();">Compress</button>
	<button onclick="buttonDecompress_Clicked();">Decompress</button>
	<button onclick="buttonSave_Clicked();">Save</button>
</div>

<script type="text/javascript">

// ui events

function buttonCompress_Clicked()
{
	var textareaData = document.getElementById("textareaData");
	var dataToCompress = textareaData.value;
	var csvCompressor = new CsvCompressor();
	var dataCompressed = csvCompressor.compress(dataToCompress);
	textareaData.value = dataCompressed;
}

function buttonSave_Clicked()
{
	var textareaData = document.getElementById("textareaData");

	var textToSave = textareaData.value;
	
	var textToSaveAsBlob = new Blob([textToSave], {type:"text/plain"});
	var textToSaveAsURL = window.URL.createObjectURL(textToSaveAsBlob);
	var fileNameToSaveAs = "Out.csv";

	var downloadLink = document.createElement("a");
	downloadLink.download = fileNameToSaveAs;
	downloadLink.innerHTML = "Download File";
	downloadLink.href = textToSaveAsURL;

	downloadLink.click();
}

function buttonDecompress_Clicked()
{
	var textareaData = document.getElementById("textareaData");
	var dataToDecompress = textareaData.value;
	var csvCompressor = new CsvCompressor();
	var dataDecompressed = csvCompressor.decompress(dataToDecompress);
	textareaData.value = dataDecompressed;
}

function inputFile_Changed(inputFile)
{
	var file = inputFile.files[0];
	if (file != null)
	{
		var fileReader = new FileReader();
		fileReader.onload = function(event)
		{
			var fileContents = event.target.result;
			var textareaData = document.getElementById("textareaData");
			textareaData.value = fileContents;
		}
		fileReader.readAsText(file);
	}
}

// classes

function CsvCompressor()
{
	// Do nothing.
}
{
	CsvCompressor.Blank = "";
	CsvCompressor.Comma = ",";
	CsvCompressor.Newline = "\n";

	CsvCompressor.prototype.compress = function(dataToCompress)
	{
		var blank = CsvCompressor.Blank;
		var comma = CsvCompressor.Comma;
		var newline = CsvCompressor.Newline;
		
		var linesToCompress = dataToCompress.split(newline);
		var lineToCompress0 = linesToCompress[0];
		var valuesToCompressPrev = lineToCompress0.split(comma);
		var linesCompressed = [ lineToCompress0 ];
		for (var i = 1; i < linesToCompress.length; i++)
		{
			var lineToCompress = linesToCompress[i];
			var valuesToCompress = lineToCompress.split(comma);
			var valuesCompressed = [];
			for (var v = 0; v < valuesToCompress.length; v++)
			{
				var valueToCompress = valuesToCompress[v];
				var valueToCompressPrev = valuesToCompressPrev[v];
				var isValueSameAsPrev = (valueToCompress == valueToCompressPrev);
				var valueCompressed = (isValueSameAsPrev ? blank : valueToCompress);
				if (isValueSameAsPrev == false)
				{
					valuesToCompressPrev[v] = valueToCompress;
				}
				valuesCompressed.push(valueCompressed);
			}
			var lineCompressed = valuesCompressed.join(comma);
			linesCompressed.push(lineCompressed);
		}
		var dataCompressed = linesCompressed.join(newline);
		return dataCompressed;
	}
	
	CsvCompressor.prototype.decompress = function(dataToDecompress)
	{
		var blank = CsvCompressor.Blank;
		var comma = CsvCompressor.Comma;
		var newline = CsvCompressor.Newline;
		
		var linesToDecompress = dataToDecompress.split(newline);
		var lineToDecompress0 = linesToDecompress[0];
		var valuesToDecompressPrev = lineToDecompress0.split(comma);
		var linesDecompressed = [ lineToDecompress0 ];
		for (var i = 1; i < linesToDecompress.length; i++)
		{
			var lineToDecompress = linesToDecompress[i];
			var valuesToDecompress = lineToDecompress.split(comma);
			var valuesDecompressed = [];
			for (var v = 0; v < valuesToDecompress.length; v++)
			{
				var valueToDecompress = valuesToDecompress[v];
				var valueToDecompressPrev = valuesToDecompressPrev[v];
				var isValueBlank = (valueToDecompress == blank);
				var valueDecompressed = 
					(isValueBlank ? valueToDecompressPrev : valueToDecompress);
				if (isValueBlank == false)
				{
					valuesToDecompressPrev[v] = valueToDecompress;
				}
				valuesDecompressed.push(valueDecompressed);
			}
			var lineDecompressed = valuesDecompressed.join(comma);
			linesDecompressed.push(lineDecompressed);
		}
		var dataDecompressed = linesDecompressed.join(newline);
		return dataDecompressed;
	}
}

</script>

</body>
</html>

This entry was posted in Uncategorized and tagged , , , , . Bookmark the permalink.

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Google photo

You are commenting using your Google account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s