function [pitch,voicing] = pitch_extract(signal,fshift,sample_freq) % function [pitch,voicing] = pitch_extract(signal,fshift,sample_freq) % % GOAL: Extract the pitch frequency and the voicing evidence of a given % speech signal. Both values are calculated each seconds. % % INPUT: signal residusignaal % fshift frame shift (in sec), tijd tussen 2 pitchwaarden % sample_freq sampling frequentie van 'signal' (in Hz) % OUTPUT: pitch array met pitchwaarden van 'signal' % voicing array met overeenkomstige voicing evidence if(nargin < 3) sample_freq = []; if(nargin < 2) fshift = []; if(nargin < 1) help pitch_extract; return; end end end if(isempty(sample_freq)); sample_freq=16000; end; if(isempty(fshift)); fshift=0.01; end; n1 = floor(fshift*sample_freq+0.5); n2 = floor(fshift*4000+0.5); while(n2/gcd(n1,n2) > 3) n2 = n2+1; end sample_freq2 = sample_freq*n2/n1; % write the data in a known format nsamples = prod(size(signal)); if(nsamples ~= max(size(signal))) error('can only handle mono signals'); end if(max(abs(signal(:))) <= 1) signal(:) = signal(:).*2^15; end tmp_ifname = tempname; tmp_ofname = tempname; tmp_sfname = tempname; fd = fopen(tmp_ifname,'w'); if(fd == -1) cleanup(tmp_ifname,tmp_ofname,tmp_sfname); error(sprintf('can''t open %s',tmp_ifname)); end fprintf(fd,'.key\nDATATYPE SAMPLE\nDATAFORMAT A-FLOAT\nNCHAN 1\nCHANLEN %i\nSAMPLEFREQ %i\n#\n',nsamples,sample_freq); fprintf(fd,'%i\n',signal(:)); fclose(fd); % write the pre-processing script fd = fopen(tmp_sfname,'w'); if(fd == -1) cleanup(tmp_ifname,tmp_ofname,tmp_sfname); error(sprintf('can''t open %s',tmp_sfname)); end fprintf(fd,'.fshift %g\n.convert to FLOAT\n\n',fshift); fprintf(fd,'[time_filter]\n set samplefreq to %.4f\n',sample_freq2); fprintf(fd,'[fun_eval]\n eval ovec=ivec^3\n'); fprintf(fd,'[sam2trk]\n flength %g\n preemp 0.00\n',3*fshift); fprintf(fd,'[scale]\n window HAMMING no_norm\n'); fprintf(fd,'[fun_eval]\n eval ovec=[ivec,0:0:3*vlen]\n'); fprintf(fd,'[anafft]\n type POWER\n'); fprintf(fd,'[envelope]\n norm\n'); fprintf(fd,'[pitch]\n freq_band 2000\n window HAMMING\n voicing evidence\n track\n'); fprintf(fd,'.convert to A-FLOAT\n'); fclose(fd); % process the data [rv,e] = unix(sprintf('/users/spraak/spch/soft/ssp23/binlinux-i386/sigp -ssp %s -i %s -o %s -Fo NOHEADER',tmp_sfname,tmp_ifname,tmp_ofname)); if(rv | ~isempty(e)) cleanup(tmp_ifname,tmp_ofname,tmp_sfname); error(e); end % read the result x = load(tmp_ofname); pitch = x(:,1); voicing = x(:,2); % that's all folks cleanup(tmp_ifname,tmp_ofname,tmp_sfname); return; function cleanup(tmp_ifname,tmp_ofname,tmp_sfname) unix(sprintf('/bin/rm -f %s %s %s',tmp_ifname,tmp_ofname,tmp_sfname)); return;