web speech template

add voice control to your device

▶

HTML:

<!doctype html>
<!--=======================================================
//
//     FILE : index.html
//
//  PROJECT : Web Speech Template
//            https://developer.mozilla.org/en-US/docs/Web/API/Web_Speech_API/Using_the_Web_Speech_API
//
//   AUTHOR : Bill Daniels
//            Copyright 2024, D+S Tech Labs, Inc.
//            MIT License
//
//======================================================-->

<html lang="en-us">

  <!-- Head -->
  <head>

    <!-- These three meta tags MUST come first in the <head> tag. -->
    <!-- All other <head> content must come AFTER these tags.     -->
    <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
    <meta http-equiv="X-UA-Compatible" content="IE=edge" />
    <meta name="viewport" content="width=device-width, initial-scale=1" />

    <title>Web Speech Template</title>

    <style>

      body
      {
        margin           : 0;
        padding          : 0;
        min-height       : 100vh;
        color            : #FFFFFF;
        background-color : #202030;
        text-align       : center;
      }

      #dataWindow
      {
        display  : inline-block;
        padding  : 0.5vh 0.5vw;
        width    : 80vw;
        height   : 300px;
        overflow : auto;

        color            : #008000;
        background-color : #202020;
        border           : 0.2vw inset #505050;

        font-family : monospace, sans-serif;
        font-size   : large;

        text-align : left;
      }

    </style>

  </head>


  <!-- Body -->
  <body>

    <h1>Web Speech Template</h1>
    <br>

    <h2><u>Speech to Text:</u></h2>

    <div id="dataWindow"></div><br>
    <br>

    <h2><u>Text to Speech:</u></h2>

    <div style="display:inline-block; font-size:x-large; line-height:1.5; text-align:right">

      Select Voice: <select id="voiceList" style="width:50vw; font-size:inherit"></select><br>
      Select Pitch: <input  id="pitch"     style="width:50vw; font-size:inherit" type="range" min="0.0" max="2.0"  value="1.0" step="0.1" /><br>
      Select Rate:  <input  id="rate"      style="width:50vw; font-size:inherit" type="range" min="0.1" max="10.0" value="1.0" step="0.1" /><br>
      <br>

      Enter text to speak: <input id="textToSpeak" type="text" style="width:50vw; font-size:inherit" onkeydown="if (event.keyCode==13) speakIt(this)" /><br>
      <small><i>Press [Enter] to speak     </i></small><br>

    </div>
    <br>

    <script src="website_speech.js"></script>

  </body>

</html>

Javascript:

//=========================================================
//
//     FILE : webSpeechApp.js
//
//  PROJECT : Web Speech Template
//            https://developer.mozilla.org/en-US/docs/Web/API/Web_Speech_API/Using_the_Web_Speech_API
//
//   AUTHOR : Bill Daniels
//            Copyright 2024, D+S Tech Labs, Inc.
//            MIT License
//
//=========================================================

//--- Globals ---------------------------------------------

const SpeechRecognition      = window.SpeechRecognition      || window.webkitSpeechRecognition;
const SpeechRecognitionEvent = window.SpeechRecognitionEvent || window.webkitSpeechRecognitionEvent;

const dataWindow = document.getElementById ('dataWindow');
let   listener   = null;
let   speaker    = null;
let   canListen  = false;
let   canSpeak   = false;
let   voices     = [];
let   numResults = 0;


//--- Startup ---------------------------------------------

try
{
  // Check for speech support
  if (SpeechRecognition      == undefined || SpeechRecognition      == null ||
      SpeechRecognitionEvent == undefined || SpeechRecognitionEvent == null)
    throw 'This browser does not support speech.\nPlease use the Chrome browser.';

  //-----------------
  // Create listener
  //-----------------
  listener = new SpeechRecognition ();
  if (listener == undefined || listener == null)
    addToLog ('This browser cannot recognize speech.');
  else
  {
    canListen = true;

    listener.continuous      = true;
    listener.lang            = 'en-US';  // Many languages are supported
    listener.interimResults  = false;
    listener.maxAlternatives = 1;

    listener.onresult = function (event)
    {
      // The SpeechRecognitionEvent results property returns a SpeechRecognitionResultList object
      // The SpeechRecognitionResultList object contains SpeechRecognitionResult objects.
      // It has a getter so it can be accessed like an array
      // The first [0] returns the SpeechRecognitionResult at the last position.
      // Each SpeechRecognitionResult object contains SpeechRecognitionAlternative objects that contain individual results.
      // These also have getters so they can be accessed like arrays.
      // The second [0] returns the SpeechRecognitionAlternative at position 0.
      // We then return the transcript property of the SpeechRecognitionAlternative object

      const output = event.results[numResults][0].transcript + ' (' + event.results[numResults][0].confidence + ')';
      ++numResults;

      addToLog (output);
    }

    listener.onnomatch = () =>
    {
      addToLog ('((( I did not recognise that )))');
    }

    listener.onend = () =>
    {
      addToLog ('((( I stopped listening )))');

      // Even though the listener was set to continous, listening may stop
      // You can start it up again by calling startListening()
    }

    listener.onerror = (event) =>
    {
      addToLog ('((( Error occurred in recognition ))) ' + event.error);
    }
  }

  //----------------
  // Text-to-Speech
  //----------------
  speaker = window.speechSynthesis;
  if (speaker == undefined || speaker == null)
    addToLog ('This browser cannot speak.');
  else
  {
    canSpeak = true;

    // In Chrome, voices are not loaded immediately with page load.
    // So it is necessary to wait for voices to load asynchronously.
    speaker.onvoiceschanged = populateVoiceList;

    // Other browsers
    populateVoiceList ();
  }

  //-----------------
  // Start listening
  //-----------------
  startListening ();
}
catch (ex)
{
  alert (ex);
}

//--- startListening ---------------------------------------

function startListening ()
{
  try
  {
    if (canListen)
    {
      numResults = 0;
      listener.start ();
      addToLog ('Listening ...');
    }
  }
  catch (ex)
  {
    alert (ex);
  }
}

//--- stopListening ---------------------------------------

function stopListening ()
{
  try
  {
    if (canListen)
      listener.stop ();
  }
  catch (ex)
  {
    alert (ex);
  }
}

//--- populateVoiceList -----------------------------------

function populateVoiceList ()
{
  try
  {
    if (canSpeak)
    {
      const voiceSelect = document.getElementById ("voiceList");
      voiceSelect.innerText = null;

      voices = speaker.getVoices ();

      for (const voice of voices)
      {
        const option = document.createElement ("option");
        option.textContent = `${voice.name} (${voice.lang})`;

        if (voice.default)
          option.textContent += " — DEFAULT";

        option.setAttribute ("data-lang", voice.lang);
        option.setAttribute ("data-name", voice.name);

        voiceSelect.appendChild (option);
      }
    }
  }
  catch (ex)
  {
    alert (ex);
  }
}

//--- speakIt ---------------------------------------------

function speakIt (textInput)
{
  try
  {
    const text = textInput.value;

    if (canSpeak && text.length > 0)
    {
      addToLog ('speaking: ' + text);

      const utterance     = new SpeechSynthesisUtterance (text);
      const selectedIndex = document.getElementById ("voiceList").selectedIndex;  // get selected index

      utterance.voice = voices[selectedIndex];                    // use selection from pulldown
      utterance.pitch = document.getElementById ("pitch").value;  // pitch is a floating point number from 0.0 to 2.0 , 1.0 = default
      utterance.rate  = document.getElementById ("rate" ).value;  // rate  is a floating point number from 0.1 to 10.0, 1.0 = default

      speaker.speak (utterance);
    }
  }
  catch (ex)
  {
    alert (ex);
  }
}

//--- addToLog --------------------------------------------

function addToLog (htmlMessage)
{
  try
  {
    dataWindow.innerHTML += htmlMessage + '<br>';
    dataWindow.scrollTop = Number.MAX_SAFE_INTEGER;
  }
  catch (ex)
  {
    alert (ex);
  }
}

This code should get you started using the Web Speech API.

Here's a live version:

Speech to Text:

Text to Speech:

Select Voice:
Select Pitch:
Select Rate:

Enter text to speak:
Press [Enter] to speak