Pages

Sunday, 29 July 2012

Building Text To Speech Java Application with FreeTTS

Introduction
FreeTTS is a speech synthesis system written entirely in the JavaTM programming language. It is based upon Flite: a small run-time speech synthesis engine developed at Carnegie Mellon University. Flite is derived from the Festival Speech Synthesis System from the University of Edinburgh and the FestVox project from Carnegie Mellon University.

More details about FreeTTS can be found at http://freetts.sourceforge.net/docs/index.php 

Steps
First create a swing based application in Eclipse IDE with help of WindowBuilder plug-in. This application consists of a screen with two links one for starting the speech and the other for stopping the speech. There is also a wide text area component on the screen so that the user can paste the text here and click on "Start Reading" link. If user wants to stop the speech while it is in progress user can simply click on the "Stop Reading" link which is also depicted in below snapshot.



Download FreeTTS binary and copy its lib folder contents to the lib directory of swing application. Also make sure that the corresponding jar file for the AePlayWav.java class is included in the project lib directory.

Following is the AePlayWav.java that actually plays and stops the generated audio file. Note that this class is a thread that executes independently whenever invoked. I have made some changes in this class before adding this dependency to the project in order to fix the thread related issues.

package soundscovery;

import java.io.File;
import java.io.IOException;
import javax.sound.sampled.AudioFormat;
import javax.sound.sampled.AudioInputStream;
import javax.sound.sampled.AudioSystem;
import javax.sound.sampled.DataLine;
import javax.sound.sampled.FloatControl;
import javax.sound.sampled.LineUnavailableException;
import javax.sound.sampled.SourceDataLine;
import javax.sound.sampled.UnsupportedAudioFileException;

/*
 * Plays and stops the generated audio file
 *
 * @author atif   
 */
public class AePlayWav extends Thread {

    public volatile boolean continuePlaying = true;
    private String filename;

    private Position curPosition;

    private final int EXTERNAL_BUFFER_SIZE = 524288/128; // 128Kb

    enum Position {
        LEFT, RIGHT, NORMAL
    };

    public AePlayWav(String wavfile) {
        filename = wavfile;
        curPosition = Position.NORMAL;
    }

    public AePlayWav(String wavfile, Position p) {
        filename = wavfile;
        curPosition = p;
    }
    SourceDataLine auline;
    public void run() {

            File soundFile = new File(filename);
            if (!soundFile.exists()) {
                System.err.println("Wave file not found: " + filename);
                return;
            }
   
            AudioInputStream audioInputStream = null;
            try {
                audioInputStream = AudioSystem.getAudioInputStream(soundFile);
            } catch (UnsupportedAudioFileException e1) {
                e1.printStackTrace();
                return;
            } catch (IOException e1) {
                e1.printStackTrace();
                return;
            }
   
            AudioFormat format = audioInputStream.getFormat();
            auline = null;
            DataLine.Info info = new DataLine.Info(SourceDataLine.class, format);
   
            try {
                auline = (SourceDataLine) AudioSystem.getLine(info);
                auline.open(format);
            } catch (LineUnavailableException e) {
                e.printStackTrace();
                return;
            } catch (Exception e) {
                e.printStackTrace();
                return;
            }
   
            if (auline.isControlSupported(FloatControl.Type.PAN)) {
                FloatControl pan = (FloatControl) auline
                        .getControl(FloatControl.Type.PAN);
                if (curPosition == Position.RIGHT)
                    pan.setValue(1.0f);
                else if (curPosition == Position.LEFT)
                    pan.setValue(-1.0f);
            }
   
            auline.start();
            int nBytesRead = 0;
            byte[] abData = new byte[EXTERNAL_BUFFER_SIZE];
   
            try {
                while (nBytesRead != -1 && continuePlaying) {
                    nBytesRead = audioInputStream.read(abData, 0, abData.length);
                    if (nBytesRead >= 0)
                        auline.write(abData, 0, nBytesRead);
                }
            } catch (IOException e) {
                e.printStackTrace();
                return;
            } finally {
                auline.drain();
                auline.close();
            }
    }
    public void stopPlaying()
    {
           continuePlaying = false;   
    }
    public static final AePlayWav getInstance(String wavfile)
    {
        return new AePlayWav(wavfile);
    }
}

Following is the TTSConverter.java that I have added which is responsible for generating the corresponding audio file and calling the AePlayWav.java class methods for playing and stopping the generated audio file.

package org.tts;

import com.sun.speech.freetts.Voice;
import com.sun.speech.freetts.VoiceManager;
import com.sun.speech.freetts.audio.SingleFileAudioPlayer;

/*
 * Converts the given text into audio file
 * and plays the generated audio file
 *
 * @author atif   
 */
public class TTSConverter {
   
    private soundscovery.AePlayWav aePlayWavThread =  null;
   
   /*
    *@param msg message that should be converted to speech
    */
    public void convertTextToSpeech(String msg){
       
        // stop playing if another speech is already in progress
        stopSpeaking();
   
        aePlayWavThread =  soundscovery.AePlayWav.getInstance("freetts.wav");
        VoiceManager vm = VoiceManager.getInstance();
        Voice voice = vm.getVoice("kevin16");
        System.setProperty("com.sun.speech.freetts.voice.defaultAudioPlayer", "com.sun.speech.freetts.audio.SingleFileAudioPlayer");
        if(voice!=null)
        {
            voice.allocate();
        }
        String text = new String(msg);
        byte b[] = text.getBytes();
        if(b==null)
        {
            System.out.println("no byte array");
            return;
        }
       
        SingleFileAudioPlayer sfap = null;
        try
        {
            sfap = (SingleFileAudioPlayer)voice.getDefaultAudioPlayer();
            sfap.write(b);
        }
        catch(Exception e)
        {
            // DO NOTHING HERE   
            // e.printStackTrace();
        }
        voice.speak(msg);
        voice.deallocate();
        aePlayWavThread.start();
    }
   
    public void stopSpeaking(){
        if(aePlayWavThread !=  null) {
            aePlayWavThread.stopPlaying();
        }
    }
}


References
http://freetts.sourceforge.net