Speech Synthesis in JavaScript with the Web Speech API

The code below uses the Google Speech Synthesis API to say a few simple statements at various speeds and pitches. To see it in action, copy it into an .html file and open that file in a web browser that runs JavaScript.

The code creates five voices and then loops through each of them, having each of them say a simple sentence. This loop itself actually exits almost immediately, and the system then just plays through each of the queued-up messages one after the other until its buffer is empty. In a real-world application, you’d probably want to use asynchronous callbacks to handle sequences like this, but that tends to get messy, and simple iteration is good enough to demonstrate the basic functionality.

I have tested this in Google’s Chrome browser only. I wasn’t initially able to figure out how to change the timbre of the voice, but eventually I learned that the window.speechSynthesis.getVoices() call is asynchronous, and you have to wait for the voices to load, and handle them by adding a listener on the window.speechSynthesis.onvoiceschanged event.  I haven’t updated this program accordingly, but I did update the subsequent post, which implements a simple text-to-speech application.

<html>
<body>

function main()
{
	var voices = 
	[
		new Voice
		(
			"Default"
		),
		new Voice
		(
			"Low",
			.01 // pitch
		),
		new Voice
		(
			"High",
			2 // pitch
		),
		new Voice
		(
			"Slow",
			null, // pitch
			.001 // speed
		),
		new Voice
		(
			"Fast",
			null, // pitch
			2 // speed
		),
	];

	for (var i = 0; i < voices.length; i++)
	{
		// This should be done with callbacks
		// rather than a loop,
		// but in this case it works.

		var voice = voices[i];
		voice.say("My voice is " + voice.name + "!");
	}
}

// classes

function Voice(name, pitch, speed)
{
	this.name = name;
	this.pitch = (pitch == null ? 1 : pitch);
	this.speed = (speed == null ? 1 : speed);

	var systemVoices = window.speechSynthesis.getVoices();
	// This index doesn't seem to matter.	
	var systemVoiceIndex = 10; 
	var systemVoice = systemVoices[systemVoiceIndex];
	this.systemUtterance = new SpeechSynthesisUtterance();
	this.systemUtterance.voice = systemVoice;
	this.systemUtterance.onend = this.say_Ended.bind(this);
}
{
	Voice.prototype.say = function(thingToSay, callback, contextForCallback)
	{
		this.callback = callback;
		this.contextForCallback = contextForCallback;

		this.isSpeaking = true;

		var utterance = this.systemUtterance;
		utterance.text = thingToSay;
		utterance.voice = this.systemVoice;
		utterance.pitch = this.pitch;
		utterance.rate = this.speed;

		window.speechSynthesis.speak
		(
			utterance
		);
	}

	Voice.prototype.say_Ended = function()
	{
		this.isSpeaking = false;
		if (this.callback != null)
		{
			this.callback.call(this.contextForCallback);
		}
	}
}

// run

main();

This entry was posted in Uncategorized and tagged , , , , , . Bookmark the permalink.

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s