СПИСОК ИСПОЛЬЗОВАННЫХ ИСТОЧНИКОВ
1. Искусственный интеллект. Системы общения и экспер тные системы. Кн. 1 / Под ред. Э.В.Попова. - М.: Радио и связь, 1990. - 461 с. 2. Оппенгейн А.В., Шафер Р.В. Цифровая обработка сигналов, М.: Радио и связь, 1979 ., 347 с. 3. Рабинер Л.Р. Шафер Р.В. Цифровая обработка речевых сигналов, М.: Радио и связь, 1981 ., 258 с. 4. Литюк В.И. Методическое пособие № 2231 часть 3 «Методы расчета и проектирование цифровых многопроцессорных устройств обработки радиосигналов», Таганрог, 1995, 48 с. 5. Кузнецов В., Отт А. Автоматический синтез речи. - Таллинн: Валгус, 1989. - 135 с. 6. Методы автоматического распознавания речи / Под ред. У.Ли. - М.: Мир, 1983. - 716 с. 7. Зиндер Л.Р. Общая фонетика. - М.: Высшая школа, 1979. - 312 с. 8. Златоустова Л.В., Потапова Р.К., Трунин-Донской В.Н. Общая и прикладная фонетика. М.: МГУ, 1986. - 304 с. 9. Линдсей П., Нордман Д. Переработка информации у человека. - М.: Мир, 1974. - 550 с. 10. Потапова Р.К. Речевое управление роботом. - М.: Радио и связь, 1989. - 248 с. 11. Бакаева Т.Н. Системный анализ безопасности: Методическая разработка к самостоятельной работе по курсу «Безопасность жизнедеятельности». Таганрог: ТРТУ, 1995, 18 с. 12. Бакаева Т.Н. Безопасность жизнедеятельности. Часть 2: Безопасность в условиях производства: Учебное пособие. Таганрог: ТРТУ, 1997, 318 с. 13. Фрумкин Г.А. «Расчет и конструирование РЭА», Москва: Высшая школа, 1997, 289 с. Приложение
1. Листинг программы – Speech Recognition 1.1) WaveIn.cs // Speech recognition // wavein => operations on incoming sound signal using System; using System.Threading; using System.Runtime.InteropServices; namespace SoundViewer { internal class WaveInHelper { public static void Try(int err) { if (err != WaveNative.MMSYSERR_NOERROR) throw new Exception(err.ToString()); } }
public delegate void BufferDoneEventHandler(IntPtr data, int size);
internal class WaveInBuffer : IDisposable { public WaveInBuffer NextBuffer;
private AutoResetEvent m_RecordEvent = new AutoResetEvent(false); private IntPtr m_WaveIn;
private WaveNative.WaveHdr m_Header; private byte[] m_HeaderData; private GCHandle m_HeaderHandle; private GCHandle m_HeaderDataHandle;
private bool m_Recording;
internal static void WaveInProc(IntPtr hdrvr, int uMsg, int dwUser, ref WaveNative.WaveHdr wavhdr, int dwParam2) { if (uMsg == WaveNative.MM_WIM_DATA) { try { GCHandle h = (GCHandle)wavhdr.dwUser; WaveInBuffer buf = (WaveInBuffer)h.Target; buf.OnCompleted(); } catch { } } }
public WaveInBuffer(IntPtr waveInHandle, int size) { m_WaveIn = waveInHandle;
m_HeaderHandle = GCHandle.Alloc(m_Header, GCHandleType.Pinned); m_Header.dwUser = (IntPtr)GCHandle.Alloc(this); m_HeaderData = new byte[size]; m_HeaderDataHandle = GCHandle.Alloc(m_HeaderData, GCHandleType.Pinned); m_Header.lpData = m_HeaderDataHandle.AddrOfPinnedObject(); m_Header.dwBufferLength = size; WaveInHelper.Try(WaveNative.waveInPrepareHeader(m_WaveIn, ref m_Header, Marshal.SizeOf(m_Header))); } ~WaveInBuffer() { Dispose(); }
public void Dispose() { if (m_Header.lpData != IntPtr.Zero) { WaveNative.waveInUnprepareHeader(m_WaveIn, ref m_Header, Marshal.SizeOf(m_Header)); m_HeaderHandle.Free(); m_Header.lpData = IntPtr.Zero; } m_RecordEvent.Close(); if (m_HeaderDataHandle.IsAllocated) m_HeaderDataHandle.Free(); GC.SuppressFinalize(this); }
public int Size { get { return m_Header.dwBufferLength; } }
public IntPtr Data { get { return m_Header.lpData; } }
public bool Record() { lock(this) { m_RecordEvent.Reset(); m_Recording = WaveNative.waveInAddBuffer(m_WaveIn, ref m_Header, Marshal.SizeOf(m_Header)) == WaveNative.MMSYSERR_NOERROR; return m_Recording; } }
public void WaitFor() { if (m_Recording) m_Recording = m_RecordEvent.WaitOne(); else Thread.Sleep(0); }
private void OnCompleted() { m_RecordEvent.Set(); m_Recording = false; } }
public class WaveInRecorder : IDisposable { private IntPtr m_WaveIn; private WaveInBuffer m_Buffers; // linked list private WaveInBuffer m_CurrentBuffer; private Thread m_Thread; private BufferDoneEventHandler m_DoneProc; private bool m_Finished;
private WaveNative.WaveDelegate m_BufferProc = new WaveNative.WaveDelegate(WaveInBuffer.WaveInProc);
public static int DeviceCount { get { return WaveNative.waveInGetNumDevs(); } }
public WaveInRecorder(int device, WaveFormat format, int bufferSize, int bufferCount, BufferDoneEventHandler doneProc) { m_DoneProc = doneProc; WaveInHelper.Try(WaveNative.waveInOpen(out m_WaveIn, device, format, m_BufferProc, 0, WaveNative.CALLBACK_FUNCTION)); AllocateBuffers(bufferSize, bufferCount); for (int i = 0; i < bufferCount; i++) { SelectNextBuffer(); m_CurrentBuffer.Record(); } WaveInHelper.Try(WaveNative.waveInStart(m_WaveIn)); m_Thread = new Thread(new ThreadStart(ThreadProc)); m_Thread.Start(); } ~WaveInRecorder() { Dispose(); } public void Dispose() { if (m_Thread != null) try { m_Finished = true; if (m_WaveIn != IntPtr.Zero) WaveNative.waveInReset(m_WaveIn); WaitForAllBuffers(); m_Thread.Join(); m_DoneProc = null; FreeBuffers(); if (m_WaveIn != IntPtr.Zero) WaveNative.waveInClose(m_WaveIn); } finally { m_Thread = null; m_WaveIn = IntPtr.Zero; } GC.SuppressFinalize(this); } private void ThreadProc() { while (!m_Finished) { Advance(); if (m_DoneProc != null && !m_Finished) m_DoneProc(m_CurrentBuffer.Data, m_CurrentBuffer.Size); m_CurrentBuffer.Record(); } } private void AllocateBuffers(int bufferSize, int bufferCount) { FreeBuffers(); if (bufferCount > 0) { m_Buffers = new WaveInBuffer(m_WaveIn, bufferSize); WaveInBuffer Prev = m_Buffers; try { for (int i = 1; i < bufferCount; i++) { WaveInBuffer Buf = new WaveInBuffer(m_WaveIn, bufferSize); Prev.NextBuffer = Buf; Prev = Buf; } } finally { Prev.NextBuffer = m_Buffers; } } } private void FreeBuffers() { m_CurrentBuffer = null; if (m_Buffers != null) { WaveInBuffer First = m_Buffers; m_Buffers = null;
WaveInBuffer Current = First; do { WaveInBuffer Next = Current.NextBuffer; Current.Dispose(); Current = Next; } while(Current != First); } } private void Advance() { SelectNextBuffer(); m_CurrentBuffer.WaitFor(); } private void SelectNextBuffer() { m_CurrentBuffer = m_CurrentBuffer == null ? m_Buffers : m_CurrentBuffer.NextBuffer; } private void WaitForAllBuffers() { WaveInBuffer Buf = m_Buffers; while (Buf.NextBuffer != m_Buffers) { Buf.WaitFor(); Buf = Buf.NextBuffer; } } } }
1.2) WaveOut.cs // Speech recognition // waveout => show graph on screen
using System; using System.Threading; using System.Runtime.InteropServices;
namespace SoundViewer { internal class WaveOutHelper { public static void Try(int err) { if (err != WaveNative.MMSYSERR_NOERROR) throw new Exception(err.ToString()); } }
public delegate void BufferFillEventHandler(IntPtr data, int size);
internal class WaveOutBuffer : IDisposable { public WaveOutBuffer NextBuffer;
private AutoResetEvent m_PlayEvent = new AutoResetEvent(false); private IntPtr m_WaveOut;
private WaveNative.WaveHdr m_Header; private byte[] m_HeaderData; private GCHandle m_HeaderHandle; private GCHandle m_HeaderDataHandle;
private bool m_Playing;
internal static void WaveOutProc(IntPtr hdrvr, int uMsg, int dwUser, ref WaveNative.WaveHdr wavhdr, int dwParam2) { if (uMsg == WaveNative.MM_WOM_DONE) { try { GCHandle h = (GCHandle)wavhdr.dwUser; WaveOutBuffer buf = (WaveOutBuffer)h.Target; buf.OnCompleted(); } catch { } } }
public WaveOutBuffer(IntPtr waveOutHandle, int size) { m_WaveOut = waveOutHandle;
m_HeaderHandle = GCHandle.Alloc(m_Header, GCHandleType.Pinned); m_Header.dwUser = (IntPtr)GCHandle.Alloc(this); m_HeaderData = new byte[size]; m_HeaderDataHandle = GCHandle.Alloc(m_HeaderData, GCHandleType.Pinned); m_Header.lpData = m_HeaderDataHandle.AddrOfPinnedObject(); m_Header.dwBufferLength = size; WaveOutHelper.Try(WaveNative.waveOutPrepareHeader(m_WaveOut, ref m_Header, Marshal.SizeOf(m_Header))); } ~WaveOutBuffer() { Dispose(); } public void Dispose() { if (m_Header.lpData != IntPtr.Zero) { WaveNative.waveOutUnprepareHeader(m_WaveOut, ref m_Header, Marshal.SizeOf(m_Header)); m_HeaderHandle.Free(); m_Header.lpData = IntPtr.Zero; } m_PlayEvent.Close(); if (m_HeaderDataHandle.IsAllocated) m_HeaderDataHandle.Free(); GC.SuppressFinalize(this); }
public int Size { get { return m_Header.dwBufferLength; } }
public IntPtr Data { get { return m_Header.lpData; } }
public bool Play() { lock(this) { m_PlayEvent.Reset(); m_Playing = WaveNative.waveOutWrite(m_WaveOut, ref m_Header, Marshal.SizeOf(m_Header)) == WaveNative.MMSYSERR_NOERROR; return m_Playing; } } public void WaitFor() { if (m_Playing) { m_Playing = m_PlayEvent.WaitOne(); } else { Thread.Sleep(0); } } public void OnCompleted() { m_PlayEvent.Set(); m_Playing = false; } }
public class WaveOutPlayer : IDisposable { private IntPtr m_WaveOut; private WaveOutBuffer m_Buffers; // linked list private WaveOutBuffer m_CurrentBuffer; private Thread m_Thread; private BufferFillEventHandler m_FillProc; private bool m_Finished; private byte m_zero;
private WaveNative.WaveDelegate m_BufferProc = new WaveNative.WaveDelegate(WaveOutBuffer.WaveOutProc);
public static int DeviceCount { get { return WaveNative.waveOutGetNumDevs(); } }
public WaveOutPlayer(int device, WaveFormat format, int bufferSize, int bufferCount, BufferFillEventHandler fillProc) { m_zero = format.wBitsPerSample == 8 ? (byte)128 : (byte)0; m_FillProc = fillProc; WaveOutHelper.Try(WaveNative.waveOutOpen(out m_WaveOut, device, format, m_BufferProc, 0, WaveNative.CALLBACK_FUNCTION)); AllocateBuffers(bufferSize, bufferCount); m_Thread = new Thread(new ThreadStart(ThreadProc)); m_Thread.Start(); } ~WaveOutPlayer() { Dispose(); } public void Dispose() { if (m_Thread != null) try { m_Finished = true; if (m_WaveOut != IntPtr.Zero) WaveNative.waveOutReset(m_WaveOut); m_Thread.Join(); m_FillProc = null; FreeBuffers(); if (m_WaveOut != IntPtr.Zero) WaveNative.waveOutClose(m_WaveOut); } finally { m_Thread = null; m_WaveOut = IntPtr.Zero; } GC.SuppressFinalize(this); } private void ThreadProc() { while (!m_Finished) { Advance(); if (m_FillProc != null && !m_Finished) m_FillProc(m_CurrentBuffer.Data, m_CurrentBuffer.Size); else { // zero out buffer byte v = m_zero; byte[] b = new byte[m_CurrentBuffer.Size]; for (int i = 0; i < b.Length; i++) b[i] = v; Marshal.Copy(b, 0, m_CurrentBuffer.Data, b.Length);
} m_CurrentBuffer.Play(); } WaitForAllBuffers(); } private void AllocateBuffers(int bufferSize, int bufferCount) { FreeBuffers(); if (bufferCount > 0) { m_Buffers = new WaveOutBuffer(m_WaveOut, bufferSize); WaveOutBuffer Prev = m_Buffers; try { for (int i = 1; i < bufferCount; i++) { WaveOutBuffer Buf = new WaveOutBuffer(m_WaveOut, bufferSize); Prev.NextBuffer = Buf; Prev = Buf; } } finally { Prev.NextBuffer = m_Buffers; } } } private void FreeBuffers() { m_CurrentBuffer = null; if (m_Buffers != null) { WaveOutBuffer First = m_Buffers; m_Buffers = null;
WaveOutBuffer Current = First; do { WaveOutBuffer Next = Current.NextBuffer; Current.Dispose(); Current = Next; } while(Current != First); } } private void Advance() { m_CurrentBuffer = m_CurrentBuffer == null ? m_Buffers : m_CurrentBuffer.NextBuffer; m_CurrentBuffer.WaitFor(); } private void WaitForAllBuffers() { WaveOutBuffer Buf = m_Buffers; while (Buf.NextBuffer != m_Buffers) { Buf.WaitFor(); Buf = Buf.NextBuffer; } } } }
1.3) SignalGenerator.cs // Speech recognition // singal generator => to generate various signals like sawtooth…
using System; using System.Collections.Generic; using System.Text;
namespace SoundViewer { class SignalGenerator { private string _waveForm = "Sine"; private double _amplitude = 128.0; private double _samplingRate = 44100; private double _frequency = 5000.0; private double _dcLevel = 0.0; private double _noise = 0.0; private int _samples = 16384; private bool _addDCLevel = false; private bool _addNoise = false;
public SignalGenerator() { }
public void SetWaveform(string waveForm) { _waveForm = waveForm; }
public String GetWaveform() { return _waveForm; }
public void SetAmplitude(double amplitude) { _amplitude = amplitude; }
public double GetAmplitude() { return _amplitude; }
public void SetFrequency(double frequency) { _frequency = frequency; }
public double GetFrequency() { return _frequency; }
public void SetSamplingRate(double rate) { _samplingRate = rate; }
public double GetSamplingRate() { return _samplingRate; }
public void SetSamples(int samples) { _samples = samples; }
public int GetSamples() { return _samples; }
public void SetDCLevel(double dc) { _dcLevel = dc; }
public double GetDCLevel() { return _dcLevel; }
public void SetNoise(double noise) { _noise = noise; }
public double GetNoise() { return _noise; }
public void SetDCLevelState(bool dcstate) { _addDCLevel = dcstate; }
public bool IsDCLevel() { return _addDCLevel; }
public void SetNoiseState(bool noisestate) { _addNoise = noisestate; }
public bool IsNoise() { return _addNoise; }
public double[] GenerateSignal() { double[] values = new double[_samples]; if (_waveForm.Equals("Sine")) { double theta = 2.0 * Math.PI * _frequency / _samplingRate; for (int i = 0; i < _samples; i++) { values[i] = _amplitude * Math.Sin(i * theta); } } if (_waveForm.Equals("Cosine")) { double theta = 2.0f * (double)Math.PI * _frequency / _samplingRate; for (int i = 0; i < _samples; i++) values[i] = _amplitude * Math.Cos(i * theta); } if (_waveForm.Equals("Square")) { double p = 2.0 * _frequency / _samplingRate; for (int i = 0; i < _samples; i++) values[i] = Math.Round(i * p) % 2 == 0 ? _amplitude : -_amplitude; } if (_waveForm.Equals("Triangular")) { double p = 2.0 * _frequency / _samplingRate; for (int i = 0; i < _samples; i++) { int ip = (int)Math.Round(i * p); values[i] = 2.0 * _amplitude * (1 - 2 * (ip % 2)) * (i * p - ip); } } if (_waveForm.Equals("Sawtooth")) { for (int i = 0; i < _samples; i++) { double q = i * _frequency / _samplingRate; values[i] = 2.0 * _amplitude * (q - Math.Round(q)); } } if (_addDCLevel) { for (int i = 0; i < _samples; i++) values[i] += _dcLevel; } if (_addNoise) { Random r = new Random(); for (int i = 0; i < _samples; i++) values[i] += _noise * r.Next(); } return values; } } }
1.4) AudioFrame.cs // Speech recognition // audioframe => working on audio frame using System; using System.Drawing; using System.Windows.Forms;
namespace SoundViewer { class AudioFrame { private Bitmap _canvasTimeDomain; private Bitmap _canvasFrequencyDomain; private double[] _waveLeft; private double[] _waveRight; private double[] _fftLeft; private double[] _ftRight; private SignalGenerator _signalGenerator; private bool _isTest = false;
public AudioFrame(bool isTest) { _isTest = isTest; }
/// <summary> /// Process 16 bit sample /// </summary> /// <param name="wave"></param> public void Process(ref byte[] wave) { _waveLeft = new double[wave.Length / 4]; _waveRight = new double[wave.Length / 4];
if (_isTest == false) { // Split out channels from sample int h = 0; for (int i = 0; i < wave.Length; i += 4) { _waveLeft[h] = (double)BitConverter.ToInt16(wave, i); _waveRight[h] = (double)BitConverter.ToInt16(wave, i + 2); h++; } } else { // Generate artificial sample for testing _signalGenerator = new SignalGenerator(); _signalGenerator.SetWaveform("Sine"); _signalGenerator.SetSamplingRate(44100); _signalGenerator.SetSamples(16384); _signalGenerator.SetFrequency(5000); _waveLeft = _signalGenerator.GenerateSignal(); _waveRight = _signalGenerator.GenerateSignal(); }
// Generate frequency domain data in decibels _fftLeft = FourierTransform.FFTDb(ref _waveLeft); _fftRight = FourierTransform.FFTDb(ref _waveRight); }
/// Render time domain to PictureBox public void RenderTimeDomain(ref PictureBox pictureBox) { // Set up for drawing _canvasTimeDomain = new Bitmap(pictureBox.Width, pictureBox.Height); Graphics offScreenDC = Graphics.FromImage(_canvasTimeDomain); SolidBrush brush = new System.Drawing.SolidBrush(Color.FromArgb(0, 0, 0)); Pen pen = new System.Drawing.Pen(Color.WhiteSmoke);
// Determine channnel boundries int width = _canvasTimeDomain.Width; int center = _canvasTimeDomain.Height / 2; int height = _canvasTimeDomain.Height;
offScreenDC.DrawLine(pen, 0, center, width, center);
int leftLeft = 0; int leftTop = 0; int leftRight = width; int leftBottom = center - 1;
int rightLeft = 0; int rightTop = center + 1; int rightRight = width; int rightBottom = height;
// Draw left channel double yCenterLeft = (leftBottom - leftTop) / 2; double yScaleLeft = 0.5 * (leftBottom - leftTop) / 32768; // a 16 bit sample has values from -32768 to 32767 int xPrevLeft = 0, yPrevLeft = 0; for (int xAxis = leftLeft; xAxis < leftRight; xAxis++) { int yAxis = (int)(yCenterLeft + (_waveLeft[_waveLeft.Length / (leftRight - leftLeft) * xAxis] * yScaleLeft)); if (xAxis == 0) { xPrevLeft = 0; yPrevLeft = yAxis; } else { pen.Color = Color.LimeGreen; offScreenDC.DrawLine(pen, xPrevLeft, yPrevLeft, xAxis, yAxis); xPrevLeft = xAxis; yPrevLeft = yAxis; } }
// Draw right channel int xCenterRight = rightTop + ((rightBottom - rightTop) / 2); double yScaleRight = 0.5 * (rightBottom - rightTop) / 32768; // a 16 bit sample has values from -32768 to 32767 int xPrevRight = 0, yPrevRight = 0; for (int xAxis = rightLeft; xAxis < rightRight; xAxis++) { int yAxis = (int)(xCenterRight + (_waveRight[_waveRight.Length / (rightRight - rightLeft) * xAxis] * yScaleRight)); if (xAxis == 0) { xPrevRight = 0; yPrevRight = yAxis; } else { pen.Color = Color.LimeGreen; offScreenDC.DrawLine(pen, xPrevRight, yPrevRight, xAxis, yAxis); xPrevRight = xAxis; yPrevRight = yAxis; } }
// Clean up pictureBox.Image = _canvasTimeDomain; offScreenDC.Dispose(); }
/// <summary> /// Render frequency domain to PictureBox /// </summary> /// <param name="pictureBox"></param> public void RenderFrequencyDomain(ref PictureBox pictureBox) { // Set up for drawing _canvasFrequencyDomain = new Bitmap(pictureBox.Width, pictureBox.Height); Graphics offScreenDC = Graphics.FromImage(_canvasFrequencyDomain); SolidBrush brush = new System.Drawing.SolidBrush(Color.FromArgb(0, 0, 0)); Pen pen = new System.Drawing.Pen(Color.WhiteSmoke);
// Determine channnel boundries int width = _canvasFrequencyDomain.Width; int center = _canvasFrequencyDomain.Height / 2; int height = _canvasFrequencyDomain.Height;
offScreenDC.DrawLine(pen, 0, center, width, center);
int leftLeft = 0; int leftTop = 0; int leftRight = width; int leftBottom = center - 1;
int rightLeft = 0; int rightTop = center + 1; int rightRight = width; int rightBottom = height;
// Draw left channel for (int xAxis = leftLeft; xAxis < leftRight; xAxis++) { double amplitude = (int)_fftLeft[(int)(((double)(_fftLeft.Length) / (double)(width)) * xAxis)]; if (amplitude < 0) // Drop negative values amplitude = 0; int yAxis = (int)(leftTop + ((leftBottom - leftTop) * amplitude) / 100); // Arbitrary factor pen.Color = Color.FromArgb(120, 120, (int)amplitude % 255); offScreenDC.DrawLine(pen, xAxis, leftTop, xAxis, yAxis); }
// Draw right channel for (int xAxis = rightLeft; xAxis < rightRight; xAxis++) { double amplitude = (int)_fftRight[(int)(((double)(_fftRight.Length) / (double)(width)) * xAxis)]; if (amplitude < 0) amplitude = 0; int yAxis = (int)(rightBottom - ((rightBottom - rightTop) * amplitude) / 100); pen.Color = Color.FromArgb(120, 120, (int)amplitude % 255); offScreenDC.DrawLine(pen, xAxis, rightBottom, xAxis, yAxis); }
// Clean up pictureBox.Image = _canvasFrequencyDomain; offScreenDC.Dispose(); } void WaveIn(short* buf, int len) { //raspoznavat }
} }
2. Листинг программы – Speech Recognition (Matlab)
2.1) CMN.m
function NormMatrix = CMN(Matrix) [r,c]=size(Matrix); NormMatrix=zeros(г,c); for i=1:c MatMean=mean(Matrix(:,i)); %Derives mean for each column i in utterance NormMatrix(:,i)=Matrix(:,i)-MatMean; %Subtracts mean from each element in End
2.2) Recognition.m
clear all; close all; ncoeff = 13; %Required number of mfcc coefficients N = 20; %Number of words in vocabulary k = 3; %Number of nearest neighbors to choose fs=16000; %Sampling rate duration1 = 0.1; %Initial silence duration in seconds duration2 = 2; %Recording duration in seconds G=2; %vary this factor to compensate for amplitude variations NSpeakers = 5; %Number of training speakers
fprintf('Press any key to start %g seconds of speech recording...', duration2); pause; silence = wavrecord(duration1*fs, fs); fprintf('Recording speech...'); speechIn = wavrecord(duration2*fs, fs); % duration*fs is the total number of sample points fprintf('Finlshed recording.\n'); fprintf('System is trying to recognize what you have spoken...\n'); speechIn1 = [silence;speechIn]; %pads with 150 ms silence speechIn2 = speechIn1.*G; speechIn3 = speechIn2 - mean(speechIn2); %DC offset elimination speechIn = nreduce(speechIn3,fs); %Applies spectral subtraction rMatrix1 = mfccf(ncoeff,speechIn,fs); %Compute test feature vector rMatrix = CMN(rMatrix1); %Removes convolutional noise
Sco = DTWScores(rMatrix,N); %computes all DTW scores [SortedScores,EIndex] = sort(Sco); %Sort scores increasing K_Vector = EIndex(1:k); %Gets k lowest scores Neighbors = zeros(1,k); %will hold k-N neighbors
for t = 1:k u = K_Vector(t); for r = 1:NSpeakers-1 if u <= (N) break else u = u - (N); end end Neighbors(t) = N;
end
%Apply k-Nearest Neighbor rule Nbr = Neighbors %sortk = sort(Nbr); [Modal.Freq] = mode(Nbr); %most frequent value Word = strvcat('One','Two','Three','Four','Five','Six','Seven','Eight','Nine','Ten','Yes','No','Hello','Open','Close','Start','Stop','Dial','On','Off'); if mean(abs(speechIn)) < 0.01 fprintf('No microphone connected or you have not said anything.\n'); elseif ((k/Freq) > 2) %if no majority fprintf('The word you have said could not be properly recognised.\n'); else fprintf('You have just said %s.\n',Word(Modal,:)); %Prints recognized word end
2.3) setTemplates.m
ncoeff=13; %Required number of mfcc coefficients fMatrix1 = cell(1,20); fMatrix2 = cell(1,20); fMatrix3 = cell(1,20); fMatrix4 = cell(1,20);
for j = 1:20 q = ['C:\SpeechData\Amir\5_' num2str(j) '.wav']; [speechIn1,FS1] = wavread(q); speechIn1 = myVAD(speechIn1); %Speech endpoint trimming fMatrix1(1,j) = {mfccf(ncoeff,speechIn1,FS1)}; %MFCC coefficients are %computed here end
for k = 1:20 q = ['C:\SpeechData\Ayo\5_' num2str(k) '.wav']; [speechIn2,FS2] = wavread(q); speechIn2 = myVAD(speechIn2); fMatrix2(1,k) = {mfcvcf(ncoeff,speechIn2,FS2)}; end
for l = 1:20 q = ['C:\SpeechData\Sameh\5_' num2str(l) '.wav']; [speechIn3,F3] = wavread(q); speechIn3 = myVAD(speechIn3); fMatrix3(1,l) = {mfccf(ncoeff,speechIn3,FS3)}; end
for m = 1:20 q = ['C:\SpeechData\Jim\5_' num2str(m) '.wav']; [speechIn4,FS4] = wavread(q); speechIn4 = myVAD(speechIn4); fMatrix4(1,m) = {mfccf(ncoeff,speechIn4,FS4)}; end
for n = 1:20 q = ['C:\SpeechData\Tope\5_' num2str(n) '.wav']; [speechIn5,FS5] = wavread(q); speechIn5 = myVAD(speechIn5); fMatrix5(1,n) = {mfccf(ncoeff,speechIn5,FS5)}; end
%Converts the cells containing all matrices to structures and save %structures in matlab .mat files in the working directory. fields = {'One','Two','Three','Four','Five','Six','Seven','Eight','Nine','Ten','Yes','No','Hello','Open','Close','Start','Stop','Dial','On','Off'}; s1 = cell2struct(fMatrix1, fields, 2); save Vectors1.mat -struct s1; s2 = cell2struct(fMatrix2, fields, 2); save Vectors2.mat -struct s2; s3 = cell2struct(fMatrix3, fields, 2); save Vectors3.mat -struct s3; s4 = cell2struct(fMatrix4, fields, 2); save Vectors4.mat -struct s4; s5 = cell2struct(fMatrix5, fields, 2); save Vectors5.mat -struct s5;
Популярное: Генезис конфликтологии как науки в древней Греции: Для уяснения предыстории конфликтологии существенное значение имеет обращение к античной... Организация как механизм и форма жизни коллектива: Организация не сможет достичь поставленных целей без соответствующей внутренней... Как построить свою речь (словесное оформление):
При подготовке публичного выступления перед оратором возникает вопрос, как лучше словесно оформить свою... Как распознать напряжение: Говоря о мышечном напряжении, мы в первую очередь имеем в виду мускулы, прикрепленные к костям ... ©2015-2024 megaobuchalka.ru Все материалы представленные на сайте исключительно с целью ознакомления читателями и не преследуют коммерческих целей или нарушение авторских прав. (180)
|
Почему 1285321 студент выбрали МегаОбучалку... Система поиска информации Мобильная версия сайта Удобная навигация Нет шокирующей рекламы |