/[zanavi_public1]/navit/navit/support/espeak/klatt.c
ZANavi

Contents of /navit/navit/support/espeak/klatt.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2 - (show annotations) (download)
Fri Oct 28 21:19:04 2011 UTC (12 years, 5 months ago) by zoff99
File MIME type: text/plain
File size: 32799 byte(s)
import files
1
2 /***************************************************************************
3 * Copyright (C) 2008 by Jonathan Duddington *
4 * email: jonsd@users.sourceforge.net *
5 * *
6 * Based on a re-implementation by: *
7 * (c) 1993,94 Jon Iles and Nick Ing-Simmons *
8 * of the Klatt cascade-parallel formant synthesizer *
9 * *
10 * This program is free software; you can redistribute it and/or modify *
11 * it under the terms of the GNU General Public License as published by *
12 * the Free Software Foundation; either version 3 of the License, or *
13 * (at your option) any later version. *
14 * *
15 * This program is distributed in the hope that it will be useful, *
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
18 * GNU General Public License for more details. *
19 * *
20 * You should have received a copy of the GNU General Public License *
21 * along with this program; if not, see: *
22 * <http://www.gnu.org/licenses/>. *
23 ***************************************************************************/
24
25 // See URL: ftp://svr-ftp.eng.cam.ac.uk/pub/comp.speech/synthesis/klatt.3.04.tar.gz
26
27 #include "StdAfx.h"
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <math.h>
32 #include <string.h>
33
34 #include "speak_lib.h"
35 #include "speech.h"
36 #include "klatt.h"
37 #include "phoneme.h"
38 #include "synthesize.h"
39 #include "voice.h"
40
41 #ifdef INCLUDE_KLATT // conditional compilation for the whole file
42
43 extern unsigned char *out_ptr; // **JSD
44 extern unsigned char *out_start;
45 extern unsigned char *out_end;
46 extern WGEN_DATA wdata;
47 static int nsamples;
48 static int sample_count;
49
50
51 #ifdef _MSC_VER
52 #define getrandom(min,max) ((rand()%(int)(((max)+1)-(min)))+(min))
53 #else
54 #define getrandom(min,max) ((rand()%(long)(((max)+1)-(min)))+(min))
55 #endif
56
57
58 /* function prototypes for functions private to this file */
59
60 static void flutter(klatt_frame_ptr);
61 static double sampled_source (void);
62 static double impulsive_source (void);
63 static double natural_source (void);
64 static void pitch_synch_par_reset (klatt_frame_ptr);
65 static double gen_noise (double);
66 static double DBtoLIN (long);
67 static void frame_init (klatt_frame_ptr);
68 static void setabc (long,long,resonator_ptr);
69 static void setzeroabc (long,long,resonator_ptr);
70
71 static klatt_frame_t kt_frame;
72 static klatt_global_t kt_globals;
73
74 /*
75 function RESONATOR
76
77 This is a generic resonator function. Internal memory for the resonator
78 is stored in the globals structure.
79 */
80
81 static double resonator(resonator_ptr r, double input)
82 {
83 double x;
84
85 x = (double) ((double)r->a * (double)input + (double)r->b * (double)r->p1 + (double)r->c * (double)r->p2);
86 r->p2 = (double)r->p1;
87 r->p1 = (double)x;
88
89 return (double)x;
90 }
91
92 static double resonator2(resonator_ptr r, double input)
93 {
94 double x;
95
96 x = (double) ((double)r->a * (double)input + (double)r->b * (double)r->p1 + (double)r->c * (double)r->p2);
97 r->p2 = (double)r->p1;
98 r->p1 = (double)x;
99
100 r->a += r->a_inc;
101 r->b += r->b_inc;
102 r->c += r->c_inc;
103 return (double)x;
104 }
105
106
107
108 /*
109 function ANTIRESONATOR
110
111 This is a generic anti-resonator function. The code is the same as resonator
112 except that a,b,c need to be set with setzeroabc() and we save inputs in
113 p1/p2 rather than outputs. There is currently only one of these - "rnz"
114 Output = (rnz.a * input) + (rnz.b * oldin1) + (rnz.c * oldin2)
115 */
116
117 #ifdef deleted
118 static double antiresonator(resonator_ptr r, double input)
119 {
120 register double x = (double)r->a * (double)input + (double)r->b * (double)r->p1 + (double)r->c * (double)r->p2;
121 r->p2 = (double)r->p1;
122 r->p1 = (double)input;
123 return (double)x;
124 }
125 #endif
126
127 static double antiresonator2(resonator_ptr r, double input)
128 {
129 register double x = (double)r->a * (double)input + (double)r->b * (double)r->p1 + (double)r->c * (double)r->p2;
130 r->p2 = (double)r->p1;
131 r->p1 = (double)input;
132
133 r->a += r->a_inc;
134 r->b += r->b_inc;
135 r->c += r->c_inc;
136 return (double)x;
137 }
138
139
140
141 /*
142 function FLUTTER
143
144 This function adds F0 flutter, as specified in:
145
146 "Analysis, synthesis and perception of voice quality variations among
147 female and male talkers" D.H. Klatt and L.C. Klatt JASA 87(2) February 1990.
148
149 Flutter is added by applying a quasi-random element constructed from three
150 slowly varying sine waves.
151 */
152
153 static void flutter(klatt_frame_ptr frame)
154 {
155 static int time_count;
156 double delta_f0;
157 double fla,flb,flc,fld,fle;
158
159 fla = (double) kt_globals.f0_flutter / 50;
160 flb = (double) kt_globals.original_f0 / 100;
161 // flc = sin(2*PI*12.7*time_count);
162 // fld = sin(2*PI*7.1*time_count);
163 // fle = sin(2*PI*4.7*time_count);
164 flc = sin(PI*12.7*time_count); // because we are calling flutter() more frequently, every 2.9mS
165 fld = sin(PI*7.1*time_count);
166 fle = sin(PI*4.7*time_count);
167 delta_f0 = fla * flb * (flc + fld + fle) * 10;
168 frame->F0hz10 = frame->F0hz10 + (long) delta_f0;
169 time_count++;
170 }
171
172
173
174 /*
175 function SAMPLED_SOURCE
176
177 Allows the use of a glottal excitation waveform sampled from a real
178 voice.
179 */
180
181 static double sampled_source()
182 {
183 int itemp;
184 double ftemp;
185 double result;
186 double diff_value;
187 int current_value;
188 int next_value;
189 double temp_diff;
190
191 if(kt_globals.T0!=0)
192 {
193 ftemp = (double) kt_globals.nper;
194 ftemp = ftemp / kt_globals.T0;
195 ftemp = ftemp * kt_globals.num_samples;
196 itemp = (int) ftemp;
197
198 temp_diff = ftemp - (double) itemp;
199
200 current_value = kt_globals.natural_samples[itemp];
201 next_value = kt_globals.natural_samples[itemp+1];
202
203 diff_value = (double) next_value - (double) current_value;
204 diff_value = diff_value * temp_diff;
205
206 result = kt_globals.natural_samples[itemp] + diff_value;
207 result = result * kt_globals.sample_factor;
208 }
209 else
210 {
211 result = 0;
212 }
213 return(result);
214 }
215
216
217
218
219 /*
220 function PARWAVE
221
222 Converts synthesis parameters to a waveform.
223 */
224
225
226 static int parwave(klatt_frame_ptr frame)
227 {
228 double temp;
229 double outbypas;
230 double out;
231 long n4;
232 double frics;
233 double glotout;
234 double aspiration;
235 double casc_next_in;
236 double par_glotout;
237 static double noise;
238 static double voice;
239 static double vlast;
240 static double glotlast;
241 static double sourc;
242 int ix;
243
244 frame_init(frame); /* get parameters for next frame of speech */
245
246 flutter(frame); /* add f0 flutter */
247
248 #ifdef deleted
249 {
250 FILE *f;
251 f=fopen("klatt_log","a");
252 fprintf(f,"%4dhz %2dAV %4d %3d, %4d %3d, %4d %3d, %4d %3d, %4d, %3d, %4d %3d TLT=%2d\n",frame->F0hz10,frame->AVdb,
253 frame->F1hz,frame->B1hz,frame->F2hz,frame->B2hz,frame->F3hz,frame->B3hz,frame->F4hz,frame->B4hz,frame->F5hz,frame->B5hz,frame->F6hz,frame->B6hz,frame->TLTdb);
254 fclose(f);
255 }
256 #endif
257
258 /* MAIN LOOP, for each output sample of current frame: */
259
260 for (kt_globals.ns=0; kt_globals.ns<kt_globals.nspfr; kt_globals.ns++)
261 {
262 /* Get low-passed random number for aspiration and frication noise */
263 noise = gen_noise(noise);
264
265 /*
266 Amplitude modulate noise (reduce noise amplitude during
267 second half of glottal period) if voicing simultaneously present.
268 */
269
270 if (kt_globals.nper > kt_globals.nmod)
271 {
272 noise *= (double) 0.5;
273 }
274
275 /* Compute frication noise */
276 frics = kt_globals.amp_frica * noise;
277
278 /*
279 Compute voicing waveform. Run glottal source simulation at 4
280 times normal sample rate to minimize quantization noise in
281 period of female voice.
282 */
283
284 for (n4=0; n4<4; n4++)
285 {
286 switch(kt_globals.glsource)
287 {
288 case IMPULSIVE:
289 voice = impulsive_source();
290 break;
291 case NATURAL:
292 voice = natural_source();
293 break;
294 case SAMPLED:
295 voice = sampled_source();
296 break;
297 }
298
299 /* Reset period when counter 'nper' reaches T0 */
300 if (kt_globals.nper >= kt_globals.T0)
301 {
302 kt_globals.nper = 0;
303 pitch_synch_par_reset(frame);
304 }
305
306 /*
307 Low-pass filter voicing waveform before downsampling from 4*samrate
308 to samrate samples/sec. Resonator f=.09*samrate, bw=.06*samrate
309 */
310
311 voice = resonator(&(kt_globals.rsn[RLP]),voice);
312
313 /* Increment counter that keeps track of 4*samrate samples per sec */
314 kt_globals.nper++;
315 }
316
317 /*
318 Tilt spectrum of voicing source down by soft low-pass filtering, amount
319 of tilt determined by TLTdb
320 */
321
322 voice = (voice * kt_globals.onemd) + (vlast * kt_globals.decay);
323 vlast = voice;
324
325 /*
326 Add breathiness during glottal open phase. Amount of breathiness
327 determined by parameter Aturb Use nrand rather than noise because
328 noise is low-passed.
329 */
330
331
332 if (kt_globals.nper < kt_globals.nopen)
333 {
334 voice += kt_globals.amp_breth * kt_globals.nrand;
335 }
336
337 /* Set amplitude of voicing */
338 glotout = kt_globals.amp_voice * voice;
339 par_glotout = kt_globals.par_amp_voice * voice;
340
341 /* Compute aspiration amplitude and add to voicing source */
342 aspiration = kt_globals.amp_aspir * noise;
343 glotout += aspiration;
344
345 par_glotout += aspiration;
346
347 /*
348 Cascade vocal tract, excited by laryngeal sources.
349 Nasal antiresonator, then formants FNP, F5, F4, F3, F2, F1
350 */
351
352 out=0;
353 if(kt_globals.synthesis_model != ALL_PARALLEL)
354 {
355 casc_next_in = antiresonator2(&(kt_globals.rsn[Rnz]),glotout);
356 casc_next_in = resonator(&(kt_globals.rsn[Rnpc]),casc_next_in);
357 casc_next_in = resonator(&(kt_globals.rsn[R8c]),casc_next_in);
358 casc_next_in = resonator(&(kt_globals.rsn[R7c]),casc_next_in);
359 casc_next_in = resonator(&(kt_globals.rsn[R6c]),casc_next_in);
360 casc_next_in = resonator2(&(kt_globals.rsn[R5c]),casc_next_in);
361 casc_next_in = resonator2(&(kt_globals.rsn[R4c]),casc_next_in);
362 casc_next_in = resonator2(&(kt_globals.rsn[R3c]),casc_next_in);
363 casc_next_in = resonator2(&(kt_globals.rsn[R2c]),casc_next_in);
364 out = resonator2(&(kt_globals.rsn[R1c]),casc_next_in);
365 }
366
367 /* Excite parallel F1 and FNP by voicing waveform */
368 sourc = par_glotout; /* Source is voicing plus aspiration */
369
370 /*
371 Standard parallel vocal tract Formants F6,F5,F4,F3,F2,
372 outputs added with alternating sign. Sound source for other
373 parallel resonators is frication plus first difference of
374 voicing waveform.
375 */
376
377 out += resonator(&(kt_globals.rsn[R1p]),sourc);
378 out += resonator(&(kt_globals.rsn[Rnpp]),sourc);
379
380 sourc = frics + par_glotout - glotlast;
381 glotlast = par_glotout;
382
383 for(ix=R2p; ix<=R6p; ix++)
384 {
385 out = resonator(&(kt_globals.rsn[ix]),sourc) - out;
386 }
387
388 outbypas = kt_globals.amp_bypas * sourc;
389
390 out = outbypas - out;
391
392 #ifdef deleted
393 // for testing
394 if (kt_globals.outsl != 0)
395 {
396 switch(kt_globals.outsl)
397 {
398 case 1:
399 out = voice;
400 break;
401 case 2:
402 out = aspiration;
403 break;
404 case 3:
405 out = frics;
406 break;
407 case 4:
408 out = glotout;
409 break;
410 case 5:
411 out = par_glotout;
412 break;
413 case 6:
414 out = outbypas;
415 break;
416 case 7:
417 out = sourc;
418 break;
419 }
420 }
421 #endif
422
423 out = resonator(&(kt_globals.rsn[Rout]),out);
424 temp = (out * wdata.amplitude * kt_globals.amp_gain0) ; /* Convert back to integer */
425
426
427 // mix with a recorded WAV if required for this phoneme
428 {
429 int z2;
430 signed char c;
431 int sample;
432
433 z2 = 0;
434 if(wdata.mix_wavefile_ix < wdata.n_mix_wavefile)
435 {
436 if(wdata.mix_wave_scale == 0)
437 {
438 // a 16 bit sample
439 c = wdata.mix_wavefile[wdata.mix_wavefile_ix+1];
440 sample = wdata.mix_wavefile[wdata.mix_wavefile_ix] + (c * 256);
441 wdata.mix_wavefile_ix += 2;
442 }
443 else
444 {
445 // a 8 bit sample, scaled
446 sample = (signed char)wdata.mix_wavefile[wdata.mix_wavefile_ix++] * wdata.mix_wave_scale;
447 }
448 z2 = sample * wdata.amplitude_v / 1024;
449 z2 = (z2 * wdata.mix_wave_amp)/40;
450 temp += z2;
451 }
452 }
453
454 // if fadeout is set, fade to zero over 64 samples, to avoid clicks at end of synthesis
455 if(kt_globals.fadeout > 0)
456 {
457 kt_globals.fadeout--;
458 temp = (temp * kt_globals.fadeout) / 64;
459 }
460
461 if (temp < -32768.0)
462 {
463 temp = -32768.0;
464 }
465
466 if (temp > 32767.0)
467 {
468 temp = 32767.0;
469 }
470
471 *out_ptr++ = (int)(temp); // **JSD
472 *out_ptr++ = (int)(temp) >> 8;
473 sample_count++;
474 if(out_ptr >= out_end)
475 {
476 return(1);
477 }
478 }
479 return(0);
480 } // end of parwave
481
482
483
484
485 /*
486 function PARWAVE_INIT
487
488 Initialises all parameters used in parwave, sets resonator internal memory
489 to zero.
490 */
491
492 static void reset_resonators()
493 {
494 int r_ix;
495
496 for(r_ix=0; r_ix < N_RSN; r_ix++)
497 {
498 kt_globals.rsn[r_ix].p1 = 0;
499 kt_globals.rsn[r_ix].p2 = 0;
500 }
501 }
502
503 static void parwave_init()
504 {
505 kt_globals.FLPhz = (950 * kt_globals.samrate) / 10000;
506 kt_globals.BLPhz = (630 * kt_globals.samrate) / 10000;
507 kt_globals.minus_pi_t = -PI / kt_globals.samrate;
508 kt_globals.two_pi_t = -2.0 * kt_globals.minus_pi_t;
509 setabc(kt_globals.FLPhz,kt_globals.BLPhz,&(kt_globals.rsn[RLP]));
510 kt_globals.nper = 0;
511 kt_globals.T0 = 0;
512 kt_globals.nopen = 0;
513 kt_globals.nmod = 0;
514
515 reset_resonators();
516 }
517
518
519 /*
520 function FRAME_INIT
521
522 Use parameters from the input frame to set up resonator coefficients.
523 */
524
525 static void frame_init(klatt_frame_ptr frame)
526 {
527 double amp_par[7];
528 static double amp_par_factor[7] = {0.6, 0.4, 0.15, 0.06, 0.04, 0.022, 0.03};
529 long Gain0_tmp;
530 int ix;
531
532 kt_globals.original_f0 = frame->F0hz10 / 10;
533
534 frame->AVdb_tmp = frame->AVdb - 7;
535 if (frame->AVdb_tmp < 0)
536 {
537 frame->AVdb_tmp = 0;
538 }
539
540 kt_globals.amp_aspir = DBtoLIN(frame->ASP) * 0.05;
541 kt_globals.amp_frica = DBtoLIN(frame->AF) * 0.25;
542 kt_globals.par_amp_voice = DBtoLIN(frame->AVpdb);
543 kt_globals.amp_bypas = DBtoLIN(frame->AB) * 0.05;
544
545 for(ix=0; ix <= 6; ix++)
546 {
547 // parallel amplitudes F1 to F6, and parallel nasal pole
548 amp_par[ix] = DBtoLIN(frame->Ap[ix]) * amp_par_factor[ix];
549 }
550
551 Gain0_tmp = frame->Gain0 - 3;
552 if (Gain0_tmp <= 0)
553 {
554 Gain0_tmp = 57;
555 }
556 kt_globals.amp_gain0 = DBtoLIN(Gain0_tmp) / kt_globals.scale_wav;
557
558 /* Set coefficients of variable cascade resonators */
559 for(ix=0; ix<=8; ix++)
560 {
561 // formants 1 to 8, plus nasal pole
562 setabc(frame->Fhz[ix],frame->Bhz[ix],&(kt_globals.rsn[ix]));
563
564 if(ix <= 5)
565 {
566 setabc(frame->Fhz_next[ix],frame->Bhz_next[ix],&(kt_globals.rsn_next[ix]));
567
568 kt_globals.rsn[ix].a_inc = (kt_globals.rsn_next[ix].a - kt_globals.rsn[ix].a) / 64.0;
569 kt_globals.rsn[ix].b_inc = (kt_globals.rsn_next[ix].b - kt_globals.rsn[ix].b) / 64.0;
570 kt_globals.rsn[ix].c_inc = (kt_globals.rsn_next[ix].c - kt_globals.rsn[ix].c) / 64.0;
571 }
572 }
573
574 // nasal zero anti-resonator
575 setzeroabc(frame->Fhz[F_NZ],frame->Bhz[F_NZ],&(kt_globals.rsn[Rnz]));
576 setzeroabc(frame->Fhz_next[F_NZ],frame->Bhz_next[F_NZ],&(kt_globals.rsn_next[Rnz]));
577 kt_globals.rsn[F_NZ].a_inc = (kt_globals.rsn_next[F_NZ].a - kt_globals.rsn[F_NZ].a) / 64.0;
578 kt_globals.rsn[F_NZ].b_inc = (kt_globals.rsn_next[F_NZ].b - kt_globals.rsn[F_NZ].b) / 64.0;
579 kt_globals.rsn[F_NZ].c_inc = (kt_globals.rsn_next[F_NZ].c - kt_globals.rsn[F_NZ].c) / 64.0;
580
581
582 /* Set coefficients of parallel resonators, and amplitude of outputs */
583
584 for(ix=0; ix<=6; ix++)
585 {
586 setabc(frame->Fhz[ix],frame->Bphz[ix],&(kt_globals.rsn[Rparallel+ix]));
587 kt_globals.rsn[Rparallel+ix].a *= amp_par[ix];
588 }
589
590 /* output low-pass filter */
591
592 setabc((long)0.0,(long)(kt_globals.samrate/2),&(kt_globals.rsn[Rout]));
593
594 }
595
596
597
598 /*
599 function IMPULSIVE_SOURCE
600
601 Generate a low pass filtered train of impulses as an approximation of
602 a natural excitation waveform. Low-pass filter the differentiated impulse
603 with a critically-damped second-order filter, time constant proportional
604 to Kopen.
605 */
606
607
608 static double impulsive_source()
609 {
610 static double doublet[] = {0.0,13000000.0,-13000000.0};
611 static double vwave;
612
613 if (kt_globals.nper < 3)
614 {
615 vwave = doublet[kt_globals.nper];
616 }
617 else
618 {
619 vwave = 0.0;
620 }
621
622 return(resonator(&(kt_globals.rsn[RGL]),vwave));
623 }
624
625
626
627 /*
628 function NATURAL_SOURCE
629
630 Vwave is the differentiated glottal flow waveform, there is a weak
631 spectral zero around 800 Hz, magic constants a,b reset pitch synchronously.
632 */
633
634 static double natural_source()
635 {
636 double lgtemp;
637 static double vwave;
638
639 if (kt_globals.nper < kt_globals.nopen)
640 {
641 kt_globals.pulse_shape_a -= kt_globals.pulse_shape_b;
642 vwave += kt_globals.pulse_shape_a;
643 lgtemp=vwave * 0.028;
644
645 return(lgtemp);
646 }
647 else
648 {
649 vwave = 0.0;
650 return(0.0);
651 }
652 }
653
654
655
656
657
658 /*
659 function PITCH_SYNC_PAR_RESET
660
661 Reset selected parameters pitch-synchronously.
662
663
664 Constant B0 controls shape of glottal pulse as a function
665 of desired duration of open phase N0
666 (Note that N0 is specified in terms of 40,000 samples/sec of speech)
667
668 Assume voicing waveform V(t) has form: k1 t**2 - k2 t**3
669
670 If the radiation characterivative, a temporal derivative
671 is folded in, and we go from continuous time to discrete
672 integers n: dV/dt = vwave[n]
673 = sum over i=1,2,...,n of { a - (i * b) }
674 = a n - b/2 n**2
675
676 where the constants a and b control the detailed shape
677 and amplitude of the voicing waveform over the open
678 potion of the voicing cycle "nopen".
679
680 Let integral of dV/dt have no net dc flow --> a = (b * nopen) / 3
681
682 Let maximum of dUg(n)/dn be constant --> b = gain / (nopen * nopen)
683 meaning as nopen gets bigger, V has bigger peak proportional to n
684
685 Thus, to generate the table below for 40 <= nopen <= 263:
686
687 B0[nopen - 40] = 1920000 / (nopen * nopen)
688 */
689
690 static void pitch_synch_par_reset(klatt_frame_ptr frame)
691 {
692 long temp;
693 double temp1;
694 static long skew;
695 static short B0[224] =
696 {
697 1200,1142,1088,1038, 991, 948, 907, 869, 833, 799, 768, 738, 710, 683, 658,
698 634, 612, 590, 570, 551, 533, 515, 499, 483, 468, 454, 440, 427, 415, 403,
699 391, 380, 370, 360, 350, 341, 332, 323, 315, 307, 300, 292, 285, 278, 272,
700 265, 259, 253, 247, 242, 237, 231, 226, 221, 217, 212, 208, 204, 199, 195,
701 192, 188, 184, 180, 177, 174, 170, 167, 164, 161, 158, 155, 153, 150, 147,
702 145, 142, 140, 137, 135, 133, 131, 128, 126, 124, 122, 120, 119, 117, 115,
703 113,111, 110, 108, 106, 105, 103, 102, 100, 99, 97, 96, 95, 93, 92, 91, 90,
704 88, 87, 86, 85, 84, 83, 82, 80, 79, 78, 77, 76, 75, 75, 74, 73, 72, 71,
705 70, 69, 68, 68, 67, 66, 65, 64, 64, 63, 62, 61, 61, 60, 59, 59, 58, 57,
706 57, 56, 56, 55, 55, 54, 54, 53, 53, 52, 52, 51, 51, 50, 50, 49, 49, 48, 48,
707 47, 47, 46, 46, 45, 45, 44, 44, 43, 43, 42, 42, 41, 41, 41, 41, 40, 40,
708 39, 39, 38, 38, 38, 38, 37, 37, 36, 36, 36, 36, 35, 35, 35, 35, 34, 34,33,
709 33, 33, 33, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 30, 30, 29, 29, 29, 29,
710 28, 28, 28, 28, 27, 27
711 };
712
713 if (frame->F0hz10 > 0)
714 {
715 /* T0 is 4* the number of samples in one pitch period */
716
717 kt_globals.T0 = (40 * kt_globals.samrate) / frame->F0hz10;
718
719
720 kt_globals.amp_voice = DBtoLIN(frame->AVdb_tmp);
721
722 /* Duration of period before amplitude modulation */
723
724 kt_globals.nmod = kt_globals.T0;
725 if (frame->AVdb_tmp > 0)
726 {
727 kt_globals.nmod >>= 1;
728 }
729
730 /* Breathiness of voicing waveform */
731
732 kt_globals.amp_breth = DBtoLIN(frame->Aturb) * 0.1;
733
734 /* Set open phase of glottal period where 40 <= open phase <= 263 */
735
736 kt_globals.nopen = 4 * frame->Kopen;
737
738 if ((kt_globals.glsource == IMPULSIVE) && (kt_globals.nopen > 263))
739 {
740 kt_globals.nopen = 263;
741 }
742
743 if (kt_globals.nopen >= (kt_globals.T0-1))
744 {
745 // printf("Warning: glottal open period cannot exceed T0, truncated\n");
746 kt_globals.nopen = kt_globals.T0 - 2;
747 }
748
749 if (kt_globals.nopen < 40)
750 {
751 /* F0 max = 1000 Hz */
752 // printf("Warning: minimum glottal open period is 10 samples.\n");
753 // printf("truncated, nopen = %d\n",kt_globals.nopen);
754 kt_globals.nopen = 40;
755 }
756
757
758 /* Reset a & b, which determine shape of "natural" glottal waveform */
759
760 kt_globals.pulse_shape_b = B0[kt_globals.nopen-40];
761 kt_globals.pulse_shape_a = (kt_globals.pulse_shape_b * kt_globals.nopen) * 0.333;
762
763 /* Reset width of "impulsive" glottal pulse */
764
765 temp = kt_globals.samrate / kt_globals.nopen;
766
767 setabc((long)0,temp,&(kt_globals.rsn[RGL]));
768
769 /* Make gain at F1 about constant */
770
771 temp1 = kt_globals.nopen *.00833;
772 kt_globals.rsn[RGL].a *= temp1 * temp1;
773
774 /*
775 Truncate skewness so as not to exceed duration of closed phase
776 of glottal period.
777 */
778
779
780 temp = kt_globals.T0 - kt_globals.nopen;
781 if (frame->Kskew > temp)
782 {
783 // printf("Kskew duration=%d > glottal closed period=%d, truncate\n", frame->Kskew, kt_globals.T0 - kt_globals.nopen);
784 frame->Kskew = temp;
785 }
786 if (skew >= 0)
787 {
788 skew = frame->Kskew;
789 }
790 else
791 {
792 skew = - frame->Kskew;
793 }
794
795 /* Add skewness to closed portion of voicing period */
796 kt_globals.T0 = kt_globals.T0 + skew;
797 skew = - skew;
798 }
799 else
800 {
801 kt_globals.T0 = 4; /* Default for f0 undefined */
802 kt_globals.amp_voice = 0.0;
803 kt_globals.nmod = kt_globals.T0;
804 kt_globals.amp_breth = 0.0;
805 kt_globals.pulse_shape_a = 0.0;
806 kt_globals.pulse_shape_b = 0.0;
807 }
808
809 /* Reset these pars pitch synchronously or at update rate if f0=0 */
810
811 if ((kt_globals.T0 != 4) || (kt_globals.ns == 0))
812 {
813 /* Set one-pole low-pass filter that tilts glottal source */
814
815 kt_globals.decay = (0.033 * frame->TLTdb);
816
817 if (kt_globals.decay > 0.0)
818 {
819 kt_globals.onemd = 1.0 - kt_globals.decay;
820 }
821 else
822 {
823 kt_globals.onemd = 1.0;
824 }
825 }
826 }
827
828
829
830 /*
831 function SETABC
832
833 Convert formant freqencies and bandwidth into resonator difference
834 equation constants.
835 */
836
837
838 static void setabc(long int f, long int bw, resonator_ptr rp)
839 {
840 double r;
841 double arg;
842
843 /* Let r = exp(-pi bw t) */
844 arg = kt_globals.minus_pi_t * bw;
845 r = exp(arg);
846
847 /* Let c = -r**2 */
848 rp->c = -(r * r);
849
850 /* Let b = r * 2*cos(2 pi f t) */
851 arg = kt_globals.two_pi_t * f;
852 rp->b = r * cos(arg) * 2.0;
853
854 /* Let a = 1.0 - b - c */
855 rp->a = 1.0 - rp->b - rp->c;
856 }
857
858
859 /*
860 function SETZEROABC
861
862 Convert formant freqencies and bandwidth into anti-resonator difference
863 equation constants.
864 */
865
866 static void setzeroabc(long int f, long int bw, resonator_ptr rp)
867 {
868 double r;
869 double arg;
870
871 f = -f;
872
873 if(f>=0)
874 {
875 f = -1;
876 }
877
878 /* First compute ordinary resonator coefficients */
879 /* Let r = exp(-pi bw t) */
880 arg = kt_globals.minus_pi_t * bw;
881 r = exp(arg);
882
883 /* Let c = -r**2 */
884 rp->c = -(r * r);
885
886 /* Let b = r * 2*cos(2 pi f t) */
887 arg = kt_globals.two_pi_t * f;
888 rp->b = r * cos(arg) * 2.;
889
890 /* Let a = 1.0 - b - c */
891 rp->a = 1.0 - rp->b - rp->c;
892
893 /* Now convert to antiresonator coefficients (a'=1/a, b'=b/a, c'=c/a) */
894 rp->a = 1.0 / rp->a;
895 rp->c *= -rp->a;
896 rp->b *= -rp->a;
897 }
898
899
900 /*
901 function GEN_NOISE
902
903 Random number generator (return a number between -8191 and +8191)
904 Noise spectrum is tilted down by soft low-pass filter having a pole near
905 the origin in the z-plane, i.e. output = input + (0.75 * lastoutput)
906 */
907
908
909 static double gen_noise(double noise)
910 {
911 long temp;
912 static double nlast;
913
914 temp = (long) getrandom(-8191,8191);
915 kt_globals.nrand = (long) temp;
916
917 noise = kt_globals.nrand + (0.75 * nlast);
918 nlast = noise;
919
920 return(noise);
921 }
922
923
924 /*
925 function DBTOLIN
926
927 Convert from decibels to a linear scale factor
928
929
930 Conversion table, db to linear, 87 dB --> 32767
931 86 dB --> 29491 (1 dB down = 0.5**1/6)
932 ...
933 81 dB --> 16384 (6 dB down = 0.5)
934 ...
935 0 dB --> 0
936
937 The just noticeable difference for a change in intensity of a vowel
938 is approximately 1 dB. Thus all amplitudes are quantized to 1 dB
939 steps.
940 */
941
942
943 static double DBtoLIN(long dB)
944 {
945 static short amptable[88] =
946 {
947 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 7,
948 8, 9, 10, 11, 13, 14, 16, 18, 20, 22, 25, 28, 32,
949 35, 40, 45, 51, 57, 64, 71, 80, 90, 101, 114, 128,
950 142, 159, 179, 202, 227, 256, 284, 318, 359, 405,
951 455, 512, 568, 638, 719, 881, 911, 1024, 1137, 1276,
952 1438, 1622, 1823, 2048, 2273, 2552, 2875, 3244, 3645,
953 4096, 4547, 5104, 5751, 6488, 7291, 8192, 9093, 10207,
954 11502, 12976, 14582, 16384, 18350, 20644, 23429,
955 26214, 29491, 32767 };
956
957 if ((dB < 0) || (dB > 87))
958 {
959 return(0);
960 }
961
962 return((double)(amptable[dB]) * 0.001);
963 }
964
965
966
967
968
969 extern voice_t *wvoice;
970 static klatt_peaks_t peaks[N_PEAKS];
971 static int end_wave;
972 static int klattp[N_KLATTP];
973 static double klattp1[N_KLATTP];
974 static double klattp_inc[N_KLATTP];
975
976 static int scale_wav_tab[] = {45,38,45,45}; // scale output from different voicing sources
977
978
979
980 int Wavegen_Klatt(int resume)
981 {//==========================
982 int pk;
983 int x;
984 int ix;
985
986 if(resume==0)
987 {
988 sample_count = 0;
989 }
990
991 while(sample_count < nsamples)
992 {
993 kt_frame.F0hz10 = (wdata.pitch * 10) / 4096;
994
995 // formants F6,F7,F8 are fixed values for cascade resonators, set in KlattInit()
996 // but F6 is used for parallel resonator
997 // F0 is used for the nasal zero
998 for(ix=0; ix < 6; ix++)
999 {
1000 kt_frame.Fhz[ix] = peaks[ix].freq;
1001 if(ix < 4)
1002 {
1003 kt_frame.Bhz[ix] = peaks[ix].bw;
1004 }
1005 }
1006 for(ix=1; ix < 7; ix++)
1007 {
1008 kt_frame.Ap[ix] = 0;
1009 }
1010
1011 kt_frame.AVdb = klattp[KLATT_AV];
1012 kt_frame.AVpdb = klattp[KLATT_AVp];
1013 kt_frame.AF = klattp[KLATT_Fric];
1014 kt_frame.AB = klattp[KLATT_FricBP];
1015 kt_frame.ASP = klattp[KLATT_Aspr];
1016 kt_frame.Aturb = klattp[KLATT_Turb];
1017 kt_frame.Kskew = klattp[KLATT_Skew];
1018 kt_frame.TLTdb = klattp[KLATT_Tilt];
1019 kt_frame.Kopen = klattp[KLATT_Kopen];
1020
1021 // advance formants
1022 for(pk=0; pk<N_PEAKS; pk++)
1023 {
1024 peaks[pk].freq1 += peaks[pk].freq_inc;
1025 peaks[pk].freq = (int)peaks[pk].freq1;
1026 peaks[pk].bw1 += peaks[pk].bw_inc;
1027 peaks[pk].bw = (int)peaks[pk].bw1;
1028 peaks[pk].bp1 += peaks[pk].bp_inc;
1029 peaks[pk].bp = (int)peaks[pk].bp1;
1030 peaks[pk].ap1 += peaks[pk].ap_inc;
1031 peaks[pk].ap = (int)peaks[pk].ap1;
1032 }
1033
1034 // advance other parameters
1035 for(ix=0; ix < N_KLATTP; ix++)
1036 {
1037 klattp1[ix] += klattp_inc[ix];
1038 klattp[ix] = (int)(klattp1[ix]);
1039 }
1040
1041 for(ix=0; ix<=6; ix++)
1042 {
1043 kt_frame.Fhz_next[ix] = peaks[ix].freq;
1044 if(ix < 4)
1045 {
1046 kt_frame.Bhz_next[ix] = peaks[ix].bw;
1047 }
1048 }
1049
1050 // advance the pitch
1051 wdata.pitch_ix += wdata.pitch_inc;
1052 if((ix = wdata.pitch_ix>>8) > 127) ix = 127;
1053 x = wdata.pitch_env[ix] * wdata.pitch_range;
1054 wdata.pitch = (x>>8) + wdata.pitch_base;
1055
1056 kt_globals.nspfr = (nsamples - sample_count);
1057 if(kt_globals.nspfr > STEPSIZE)
1058 kt_globals.nspfr = STEPSIZE;
1059
1060 if(parwave(&kt_frame) == 1)
1061 {
1062 return(1);
1063 }
1064 }
1065
1066 if(end_wave == 1)
1067 {
1068 // fade out to avoid a click
1069 kt_globals.fadeout = 64;
1070 end_wave = 0;
1071 sample_count -= 64;
1072 kt_globals.nspfr = 64;
1073 if(parwave(&kt_frame) == 1)
1074 {
1075 return(1);
1076 }
1077 }
1078
1079 return(0);
1080 }
1081
1082
1083 void SetSynth_Klatt(int length, int modn, frame_t *fr1, frame_t *fr2, voice_t *v, int control)
1084 {//===========================================================================================
1085 int ix;
1086 DOUBLEX next;
1087 int qix;
1088 int cmd;
1089 static frame_t prev_fr;
1090
1091 if(wvoice != NULL)
1092 {
1093 if((wvoice->klattv[0] > 0) && (wvoice->klattv[0] <=3 ))
1094 {
1095 kt_globals.glsource = wvoice->klattv[0];
1096 kt_globals.scale_wav = scale_wav_tab[kt_globals.glsource];
1097 }
1098 kt_globals.f0_flutter = wvoice->flutter/32;
1099 }
1100
1101 end_wave = 0;
1102 if(control & 2)
1103 {
1104 end_wave = 1; // fadeout at the end
1105 }
1106 if(control & 1)
1107 {
1108 end_wave = 1;
1109 for(qix=wcmdq_head+1;;qix++)
1110 {
1111 if(qix >= N_WCMDQ) qix = 0;
1112 if(qix == wcmdq_tail) break;
1113
1114 cmd = wcmdq[qix][0];
1115 if(cmd==WCMD_KLATT)
1116 {
1117 end_wave = 0; // next wave generation is from another spectrum
1118 break;
1119 }
1120 if((cmd==WCMD_WAVE) || (cmd==WCMD_PAUSE))
1121 break; // next is not from spectrum, so continue until end of wave cycle
1122 }
1123 }
1124
1125 {
1126 //FILE *f;
1127 //f=fopen("klatt_log","a");
1128 //fprintf(f,"len %4d (%3d %4d %4d) (%3d %4d %4d)\n",length,fr1->ffreq[1],fr1->ffreq[2],fr1->ffreq[3],fr2->ffreq[1],fr2->ffreq[2],fr2->ffreq[3]);
1129 //fclose(f);
1130 }
1131
1132 if(control & 1)
1133 {
1134 if(wdata.prev_was_synth == 0)
1135 {
1136 // A break, not following on from another synthesized sound.
1137 // Reset the synthesizer
1138 //reset_resonators(&kt_globals);
1139 parwave_init();
1140 }
1141 else
1142 {
1143 if((prev_fr.ffreq[1] != fr1->ffreq[1]) || (prev_fr.ffreq[2] != fr1->ffreq[2]))
1144 {
1145
1146 // fade out to avoid a click, but only up to the end of output buffer
1147 ix = (out_end - out_ptr)/2;
1148 if(ix > 64)
1149 ix = 64;
1150 kt_globals.fadeout = ix;
1151 kt_globals.nspfr = ix;
1152 parwave(&kt_frame);
1153
1154 //reset_resonators(&kt_globals);
1155 parwave_init();
1156 }
1157 }
1158 wdata.prev_was_synth = 1;
1159 memcpy(&prev_fr,fr2,sizeof(prev_fr));
1160 }
1161 if(fr2->frflags & FRFLAG_BREAK)
1162 {
1163 // fr2 = fr1;
1164 // reset_resonators(&kt_globals);
1165 }
1166
1167 for(ix=0; ix<N_KLATTP; ix++)
1168 {
1169 if((ix >= 5) && ((fr1->frflags & FRFLAG_KLATT) == 0))
1170 {
1171 klattp1[ix] = klattp[ix] = 0;
1172 klattp_inc[ix] = 0;
1173 }
1174 else
1175 {
1176 klattp1[ix] = klattp[ix] = fr1->klattp[ix];
1177 klattp_inc[ix] = (double)((fr2->klattp[ix] - klattp[ix]) * STEPSIZE)/length;
1178 }
1179
1180 // get klatt parameter adjustments for the voice
1181 // if((ix>0) && (ix < KLATT_AVp))
1182 // klattp1[ix] = klattp[ix] = (klattp[ix] + wvoice->klattv[ix]);
1183 }
1184
1185 nsamples = length;
1186
1187 for(ix=1; ix < 6; ix++)
1188 {
1189 peaks[ix].freq1 = (fr1->ffreq[ix] * v->freq[ix] / 256.0) + v->freqadd[ix];
1190 peaks[ix].freq = (int)(peaks[ix].freq1);
1191 next = (fr2->ffreq[ix] * v->freq[ix] / 256.0) + v->freqadd[ix];
1192 peaks[ix].freq_inc = ((next - peaks[ix].freq1) * STEPSIZE) / length;
1193
1194 if(ix < 4)
1195 {
1196 // klatt bandwidth for f1, f2, f3 (others are fixed)
1197 peaks[ix].bw1 = fr1->bw[ix] * 2;
1198 peaks[ix].bw = (int)(peaks[ix].bw1);
1199 next = fr2->bw[ix] * 2;
1200 peaks[ix].bw_inc = ((next - peaks[ix].bw1) * STEPSIZE) / length;
1201 }
1202 }
1203
1204 // nasal zero frequency
1205 peaks[0].freq1 = fr1->klattp[KLATT_FNZ] * 2;
1206 peaks[0].freq = (int)(peaks[0].freq1);
1207 next = fr2->klattp[KLATT_FNZ] * 2;
1208 peaks[0].freq_inc = ((next - peaks[0].freq1) * STEPSIZE) / length;
1209
1210 peaks[0].bw1 = 89;
1211 peaks[0].bw = 89;
1212 peaks[0].bw_inc = 0;
1213
1214 if(fr1->frflags & FRFLAG_KLATT)
1215 {
1216 // the frame contains additional parameters for parallel resonators
1217 for(ix=1; ix < 7; ix++)
1218 {
1219 peaks[ix].bp1 = fr1->klatt_bp[ix] * 4; // parallel bandwidth
1220 peaks[ix].bp = (int)(peaks[ix].bp1);
1221 next = fr2->klatt_bp[ix] * 2;
1222 peaks[ix].bp_inc = ((next - peaks[ix].bp1) * STEPSIZE) / length;
1223
1224 peaks[ix].ap1 = fr1->klatt_ap[ix]; // parallal amplitude
1225 peaks[ix].ap = (int)(peaks[ix].ap1);
1226 next = fr2->klatt_ap[ix] * 2;
1227 peaks[ix].ap_inc = ((next - peaks[ix].ap1) * STEPSIZE) / length;
1228 }
1229 }
1230 } // end of SetSynth_Klatt
1231
1232
1233 int Wavegen_Klatt2(int length, int modulation, int resume, frame_t *fr1, frame_t *fr2)
1234 {//===================================================================================
1235 if(resume==0)
1236 SetSynth_Klatt(length, modulation, fr1, fr2, wvoice, 1);
1237
1238 return(Wavegen_Klatt(resume));
1239 }
1240
1241
1242
1243 void KlattInit()
1244 {
1245 #define NUMBER_OF_SAMPLES 100
1246
1247 static short natural_samples[NUMBER_OF_SAMPLES]=
1248 {
1249 -310,-400,530,356,224,89,23,-10,-58,-16,461,599,536,701,770,
1250 605,497,461,560,404,110,224,131,104,-97,155,278,-154,-1165,
1251 -598,737,125,-592,41,11,-247,-10,65,92,80,-304,71,167,-1,122,
1252 233,161,-43,278,479,485,407,266,650,134,80,236,68,260,269,179,
1253 53,140,275,293,296,104,257,152,311,182,263,245,125,314,140,44,
1254 203,230,-235,-286,23,107,92,-91,38,464,443,176,98,-784,-2449,
1255 -1891,-1045,-1600,-1462,-1384,-1261,-949,-730
1256 };
1257 static short formant_hz[10] = {280,688,1064,2806,3260,3700,6500,7000,8000,280};
1258 static short bandwidth[10] = {89,160,70,160,200,200,500,500,500,89};
1259 static short parallel_amp[10] = { 0,59,59,59,59,59,59,0,0,0};
1260 static short parallel_bw[10] = {59,59,89,149,200,200,500,0,0,0};
1261
1262 int ix;
1263
1264 sample_count=0;
1265
1266 kt_globals.synthesis_model = CASCADE_PARALLEL;
1267 kt_globals.samrate = 22050;
1268
1269 kt_globals.glsource = IMPULSIVE; // IMPULSIVE, NATURAL, SAMPLED
1270 kt_globals.scale_wav = scale_wav_tab[kt_globals.glsource];
1271 kt_globals.natural_samples = natural_samples;
1272 kt_globals.num_samples = NUMBER_OF_SAMPLES;
1273 kt_globals.sample_factor = 3.0;
1274 kt_globals.nspfr = (kt_globals.samrate * 10) / 1000;
1275 kt_globals.outsl = 0;
1276 kt_globals.f0_flutter = 20;
1277
1278 parwave_init();
1279
1280 // set default values for frame parameters
1281 for(ix=0; ix<=9; ix++)
1282 {
1283 kt_frame.Fhz[ix] = formant_hz[ix];
1284 kt_frame.Bhz[ix] = bandwidth[ix];
1285 kt_frame.Ap[ix] = parallel_amp[ix];
1286 kt_frame.Bphz[ix] = parallel_bw[ix];
1287 }
1288 kt_frame.Bhz_next[F_NZ] = bandwidth[F_NZ];
1289
1290 kt_frame.F0hz10 = 1000;
1291 kt_frame.AVdb = 59; // 59
1292 kt_frame.ASP = 0;
1293 kt_frame.Kopen = 40; // 40
1294 kt_frame.Aturb = 0;
1295 kt_frame.TLTdb = 0;
1296 kt_frame.AF =50;
1297 kt_frame.Kskew = 0;
1298 kt_frame.AB = 0;
1299 kt_frame.AVpdb = 0;
1300 kt_frame.Gain0 = 60; // 62
1301 } // end of KlattInit
1302
1303 #endif // INCLUDE_KLATT

   
Visit the ZANavi Wiki