You cannot select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
	
	
		
			276 lines
		
	
	
		
			8.5 KiB
		
	
	
	
		
			C
		
	
		
		
			
		
	
	
			276 lines
		
	
	
		
			8.5 KiB
		
	
	
	
		
			C
		
	
| 
											10 years ago
										 | /* Copyright (C) 2002 Jean-Marc Valin 
 | ||
|  |    File: vbr.c | ||
|  | 
 | ||
|  |    VBR-related routines | ||
|  | 
 | ||
|  |    Redistribution and use in source and binary forms, with or without | ||
|  |    modification, are permitted provided that the following conditions | ||
|  |    are met: | ||
|  |     | ||
|  |    - Redistributions of source code must retain the above copyright | ||
|  |    notice, this list of conditions and the following disclaimer. | ||
|  |     | ||
|  |    - Redistributions in binary form must reproduce the above copyright | ||
|  |    notice, this list of conditions and the following disclaimer in the | ||
|  |    documentation and/or other materials provided with the distribution. | ||
|  |     | ||
|  |    - Neither the name of the Xiph.org Foundation nor the names of its | ||
|  |    contributors may be used to endorse or promote products derived from | ||
|  |    this software without specific prior written permission. | ||
|  |     | ||
|  |    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
|  |    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
|  |    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
|  |    A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR | ||
|  |    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | ||
|  |    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | ||
|  |    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | ||
|  |    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | ||
|  |    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||
|  |    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
|  |    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
|  | 
 | ||
|  | */ | ||
|  | 
 | ||
|  | #ifdef HAVE_CONFIG_H
 | ||
|  | #include "config.h"
 | ||
|  | #endif
 | ||
|  | 
 | ||
|  | #include "vbr.h"
 | ||
|  | #include <math.h>
 | ||
|  | 
 | ||
|  | 
 | ||
|  | #define sqr(x) ((x)*(x))
 | ||
|  | 
 | ||
|  | #define MIN_ENERGY 6000
 | ||
|  | #define NOISE_POW .3
 | ||
|  | 
 | ||
|  | #ifndef DISABLE_VBR
 | ||
|  | 
 | ||
|  | const float vbr_nb_thresh[9][11]={ | ||
|  |    {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /*   CNG   */ | ||
|  |    { 4.0f,  2.5f,  2.0f,  1.2f,  0.5f,  0.0f, -0.5f, -0.7f, -0.8f, -0.9f, -1.0f}, /*  2 kbps */ | ||
|  |    {10.0f,  6.5f,  5.2f,  4.5f,  3.9f,  3.5f,  3.0f,  2.5f,  2.3f,  1.8f,  1.0f}, /*  6 kbps */ | ||
|  |    {11.0f,  8.8f,  7.5f,  6.5f,  5.0f,  3.9f,  3.9f,  3.9f,  3.5f,  3.0f,  1.0f}, /*  8 kbps */ | ||
|  |    {11.0f, 11.0f,  9.9f,  8.5f,  7.0f,  6.0f,  4.5f,  4.0f,  4.0f,  4.0f,  2.0f}, /* 11 kbps */ | ||
|  |    {11.0f, 11.0f, 11.0f, 11.0f,  9.5f,  8.5f,  8.0f,  7.0f,  6.0f,  5.0f,  3.0f}, /* 15 kbps */ | ||
|  |    {11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f,  9.5f,  8.5f,  7.0f,  6.0f,  5.0f}, /* 18 kbps */ | ||
|  |    {11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f,  9.8f,  9.5f,  7.5f}, /* 24 kbps */  | ||
|  |    { 7.0f,  4.5f,  3.7f,  3.0f,  2.5f,  2.0f,  1.8f,  1.5f,  1.0f,  0.0f,  0.0f}  /*  4 kbps */ | ||
|  | }; | ||
|  | 
 | ||
|  | 
 | ||
|  | const float vbr_hb_thresh[5][11]={ | ||
|  |    {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /* silence */ | ||
|  |    {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /*  2 kbps */ | ||
|  |    {11.0f, 11.0f,  9.5f,  8.5f,  7.5f,  6.0f,  5.0f,  3.9f,  3.0f,  2.0f,  1.0f}, /*  6 kbps */ | ||
|  |    {11.0f, 11.0f, 11.0f, 11.0f, 11.0f,  9.5f,  8.7f,  7.8f,  7.0f,  6.5f,  4.0f}, /* 10 kbps */ | ||
|  |    {11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f,  9.8f,  7.5f,  5.5f}  /* 18 kbps */  | ||
|  | }; | ||
|  | 
 | ||
|  | const float vbr_uhb_thresh[2][11]={ | ||
|  |    {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /* silence */ | ||
|  |    { 3.9f,  2.5f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f, -1.0f}  /*  2 kbps */ | ||
|  | }; | ||
|  | 
 | ||
|  | void vbr_init(VBRState *vbr) | ||
|  | { | ||
|  |    int i; | ||
|  | 
 | ||
|  |    vbr->average_energy=0; | ||
|  |    vbr->last_energy=1; | ||
|  |    vbr->accum_sum=0; | ||
|  |    vbr->energy_alpha=.1; | ||
|  |    vbr->soft_pitch=0; | ||
|  |    vbr->last_pitch_coef=0; | ||
|  |    vbr->last_quality=0; | ||
|  | 
 | ||
|  |    vbr->noise_accum = .05*pow(MIN_ENERGY, NOISE_POW); | ||
|  |    vbr->noise_accum_count=.05; | ||
|  |    vbr->noise_level=vbr->noise_accum/vbr->noise_accum_count; | ||
|  |    vbr->consec_noise=0; | ||
|  | 
 | ||
|  | 
 | ||
|  |    for (i=0;i<VBR_MEMORY_SIZE;i++) | ||
|  |       vbr->last_log_energy[i] = log(MIN_ENERGY); | ||
|  | } | ||
|  | 
 | ||
|  | 
 | ||
|  | /*
 | ||
|  |   This function should analyse the signal and decide how critical the | ||
|  |   coding error will be perceptually. The following factors should be | ||
|  |   taken into account: | ||
|  | 
 | ||
|  |   -Attacks (positive energy derivative) should be coded with more bits | ||
|  | 
 | ||
|  |   -Stationary voiced segments should receive more bits | ||
|  | 
 | ||
|  |   -Segments with (very) low absolute energy should receive less bits (maybe | ||
|  |   only shaped noise?) | ||
|  | 
 | ||
|  |   -DTX for near-zero energy? | ||
|  | 
 | ||
|  |   -Stationary fricative segments should have less bits | ||
|  | 
 | ||
|  |   -Temporal masking: when energy slope is decreasing, decrease the bit-rate | ||
|  | 
 | ||
|  |   -Decrease bit-rate for males (low pitch)? | ||
|  | 
 | ||
|  |   -(wideband only) less bits in the high-band when signal is very  | ||
|  |   non-stationary (harder to notice high-frequency noise)??? | ||
|  | 
 | ||
|  | */ | ||
|  | 
 | ||
|  | float vbr_analysis(VBRState *vbr, spx_word16_t *sig, int len, int pitch, float pitch_coef) | ||
|  | { | ||
|  |    int i; | ||
|  |    float ener=0, ener1=0, ener2=0; | ||
|  |    float qual=7; | ||
|  |    int va; | ||
|  |    float log_energy; | ||
|  |    float non_st=0; | ||
|  |    float voicing; | ||
|  |    float pow_ener; | ||
|  | 
 | ||
|  |    for (i=0;i<len>>1;i++) | ||
|  |       ener1 += ((float)sig[i])*sig[i]; | ||
|  | 
 | ||
|  |    for (i=len>>1;i<len;i++) | ||
|  |       ener2 += ((float)sig[i])*sig[i]; | ||
|  |    ener=ener1+ener2; | ||
|  | 
 | ||
|  |    log_energy = log(ener+MIN_ENERGY); | ||
|  |    for (i=0;i<VBR_MEMORY_SIZE;i++) | ||
|  |       non_st += sqr(log_energy-vbr->last_log_energy[i]); | ||
|  |    non_st =  non_st/(30*VBR_MEMORY_SIZE); | ||
|  |    if (non_st>1) | ||
|  |       non_st=1; | ||
|  | 
 | ||
|  |    voicing = 3*(pitch_coef-.4)*fabs(pitch_coef-.4); | ||
|  |    vbr->average_energy = (1-vbr->energy_alpha)*vbr->average_energy + vbr->energy_alpha*ener; | ||
|  |    vbr->noise_level=vbr->noise_accum/vbr->noise_accum_count; | ||
|  |    pow_ener = pow(ener,NOISE_POW); | ||
|  |    if (vbr->noise_accum_count<.06 && ener>MIN_ENERGY) | ||
|  |       vbr->noise_accum = .05*pow_ener; | ||
|  | 
 | ||
|  |    if ((voicing<.3 && non_st < .2 && pow_ener < 1.2*vbr->noise_level) | ||
|  |        || (voicing<.3 && non_st < .05 && pow_ener < 1.5*vbr->noise_level) | ||
|  |        || (voicing<.4 && non_st < .05 && pow_ener < 1.2*vbr->noise_level) | ||
|  |        || (voicing<0 && non_st < .05)) | ||
|  |    { | ||
|  |       float tmp; | ||
|  |       va = 0; | ||
|  |       vbr->consec_noise++; | ||
|  |       if (pow_ener > 3*vbr->noise_level) | ||
|  |          tmp = 3*vbr->noise_level; | ||
|  |       else  | ||
|  |          tmp = pow_ener; | ||
|  |       if (vbr->consec_noise>=4) | ||
|  |       { | ||
|  |          vbr->noise_accum = .95*vbr->noise_accum + .05*tmp; | ||
|  |          vbr->noise_accum_count = .95*vbr->noise_accum_count + .05; | ||
|  |       } | ||
|  |    } else { | ||
|  |       va = 1; | ||
|  |       vbr->consec_noise=0; | ||
|  |    } | ||
|  | 
 | ||
|  |    if (pow_ener < vbr->noise_level && ener>MIN_ENERGY) | ||
|  |    { | ||
|  |       vbr->noise_accum = .95*vbr->noise_accum + .05*pow_ener; | ||
|  |       vbr->noise_accum_count = .95*vbr->noise_accum_count + .05;       | ||
|  |    } | ||
|  | 
 | ||
|  |    /* Checking for very low absolute energy */ | ||
|  |    if (ener < 30000) | ||
|  |    { | ||
|  |       qual -= .7; | ||
|  |       if (ener < 10000) | ||
|  |          qual-=.7; | ||
|  |       if (ener < 3000) | ||
|  |          qual-=.7; | ||
|  |    } else { | ||
|  |       float short_diff, long_diff; | ||
|  |       short_diff = log((ener+1)/(1+vbr->last_energy)); | ||
|  |       long_diff = log((ener+1)/(1+vbr->average_energy)); | ||
|  |       /*fprintf (stderr, "%f %f\n", short_diff, long_diff);*/ | ||
|  | 
 | ||
|  |       if (long_diff<-5) | ||
|  |          long_diff=-5; | ||
|  |       if (long_diff>2) | ||
|  |          long_diff=2; | ||
|  | 
 | ||
|  |       if (long_diff>0) | ||
|  |          qual += .6*long_diff; | ||
|  |       if (long_diff<0) | ||
|  |          qual += .5*long_diff; | ||
|  |       if (short_diff>0) | ||
|  |       { | ||
|  |          if (short_diff>5) | ||
|  |             short_diff=5; | ||
|  |          qual += .5*short_diff; | ||
|  |       } | ||
|  |       /* Checking for energy increases */ | ||
|  |       if (ener2 > 1.6*ener1) | ||
|  |          qual += .5; | ||
|  |    } | ||
|  |    vbr->last_energy = ener; | ||
|  |    vbr->soft_pitch = .6*vbr->soft_pitch + .4*pitch_coef; | ||
|  |    qual += 2.2*((pitch_coef-.4) + (vbr->soft_pitch-.4)); | ||
|  | 
 | ||
|  |    if (qual < vbr->last_quality) | ||
|  |       qual = .5*qual + .5*vbr->last_quality; | ||
|  |    if (qual<4) | ||
|  |       qual=4; | ||
|  |    if (qual>10) | ||
|  |       qual=10; | ||
|  |     | ||
|  |    /*
 | ||
|  |    if (vbr->consec_noise>=2) | ||
|  |       qual-=1.3; | ||
|  |    if (vbr->consec_noise>=5) | ||
|  |       qual-=1.3; | ||
|  |    if (vbr->consec_noise>=12) | ||
|  |       qual-=1.3; | ||
|  |    */ | ||
|  |    if (vbr->consec_noise>=3) | ||
|  |       qual=4; | ||
|  | 
 | ||
|  |    if (vbr->consec_noise) | ||
|  |       qual -= 1.0 * (log(3.0 + vbr->consec_noise)-log(3)); | ||
|  |    if (qual<0) | ||
|  |       qual=0; | ||
|  |     | ||
|  |    if (ener<60000) | ||
|  |    { | ||
|  |       if (vbr->consec_noise>2) | ||
|  |          qual-=0.5*(log(3.0 + vbr->consec_noise)-log(3)); | ||
|  |       if (ener<10000&&vbr->consec_noise>2) | ||
|  |          qual-=0.5*(log(3.0 + vbr->consec_noise)-log(3)); | ||
|  |       if (qual<0) | ||
|  |          qual=0; | ||
|  |       qual += .3*log(.0001+ener/60000.0); | ||
|  |    } | ||
|  |    if (qual<-1) | ||
|  |       qual=-1; | ||
|  | 
 | ||
|  |    /*printf ("%f %f %f %f %d\n", qual, voicing, non_st, pow_ener/(.01+vbr->noise_level), va);*/ | ||
|  | 
 | ||
|  |    vbr->last_pitch_coef = pitch_coef; | ||
|  |    vbr->last_quality = qual; | ||
|  | 
 | ||
|  |    for (i=VBR_MEMORY_SIZE-1;i>0;i--) | ||
|  |       vbr->last_log_energy[i] = vbr->last_log_energy[i-1]; | ||
|  |    vbr->last_log_energy[0] = log_energy; | ||
|  | 
 | ||
|  |    /*printf ("VBR: %f %f %f %d %f\n", (float)(log_energy-log(vbr->average_energy+MIN_ENERGY)), non_st, voicing, va, vbr->noise_level);*/ | ||
|  | 
 | ||
|  |    return qual; | ||
|  | } | ||
|  | 
 | ||
|  | void vbr_destroy(VBRState *vbr) | ||
|  | { | ||
|  | } | ||
|  | 
 | ||
|  | #endif /* #ifndef DISABLE_VBR */
 |