You cannot select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
	
	
		
			93 lines
		
	
	
		
			3.1 KiB
		
	
	
	
		
			C
		
	
		
		
			
		
	
	
			93 lines
		
	
	
		
			3.1 KiB
		
	
	
	
		
			C
		
	
| 
											10 years ago
										 | /* Copyright (C) 2002 Jean-Marc Valin */ | ||
|  | /**
 | ||
|  |    @file ltp_sse.h | ||
|  |    @brief Long-Term Prediction functions (SSE version) | ||
|  | */ | ||
|  | /*
 | ||
|  |    Redistribution and use in source and binary forms, with or without | ||
|  |    modification, are permitted provided that the following conditions | ||
|  |    are met: | ||
|  |     | ||
|  |    - Redistributions of source code must retain the above copyright | ||
|  |    notice, this list of conditions and the following disclaimer. | ||
|  |     | ||
|  |    - Redistributions in binary form must reproduce the above copyright | ||
|  |    notice, this list of conditions and the following disclaimer in the | ||
|  |    documentation and/or other materials provided with the distribution. | ||
|  |     | ||
|  |    - Neither the name of the Xiph.org Foundation nor the names of its | ||
|  |    contributors may be used to endorse or promote products derived from | ||
|  |    this software without specific prior written permission. | ||
|  |     | ||
|  |    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
|  |    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
|  |    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
|  |    A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR | ||
|  |    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | ||
|  |    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | ||
|  |    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | ||
|  |    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | ||
|  |    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||
|  |    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
|  |    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
|  | */ | ||
|  | 
 | ||
|  | #include <xmmintrin.h>
 | ||
|  | 
 | ||
|  | #define OVERRIDE_INNER_PROD
 | ||
|  | float inner_prod(const float *a, const float *b, int len) | ||
|  | { | ||
|  |    int i; | ||
|  |    float ret; | ||
|  |    __m128 sum = _mm_setzero_ps(); | ||
|  |    for (i=0;i<(len>>2);i+=2) | ||
|  |    { | ||
|  |       sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+0), _mm_loadu_ps(b+0))); | ||
|  |       sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+4), _mm_loadu_ps(b+4))); | ||
|  |       a += 8; | ||
|  |       b += 8; | ||
|  |    } | ||
|  |    sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum)); | ||
|  |    sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55)); | ||
|  |    _mm_store_ss(&ret, sum); | ||
|  |    return ret; | ||
|  | } | ||
|  | 
 | ||
|  | #define OVERRIDE_PITCH_XCORR
 | ||
|  | void pitch_xcorr(const float *_x, const float *_y, float *corr, int len, int nb_pitch, char *stack) | ||
|  | { | ||
|  |    int i, offset; | ||
|  |    VARDECL(__m128 *x); | ||
|  |    VARDECL(__m128 *y); | ||
|  |    int N, L; | ||
|  |    N = len>>2; | ||
|  |    L = nb_pitch>>2; | ||
|  |    ALLOC(x, N, __m128); | ||
|  |    ALLOC(y, N+L, __m128); | ||
|  |    for (i=0;i<N;i++) | ||
|  |       x[i] = _mm_loadu_ps(_x+(i<<2)); | ||
|  |    for (offset=0;offset<4;offset++) | ||
|  |    { | ||
|  |       for (i=0;i<N+L;i++) | ||
|  |          y[i] = _mm_loadu_ps(_y+(i<<2)+offset); | ||
|  |       for (i=0;i<L;i++) | ||
|  |       { | ||
|  |          int j; | ||
|  |          __m128 sum, *xx, *yy; | ||
|  |          sum = _mm_setzero_ps(); | ||
|  |          yy = y+i; | ||
|  |          xx = x; | ||
|  |          for (j=0;j<N;j+=2) | ||
|  |          { | ||
|  |             sum = _mm_add_ps(sum, _mm_mul_ps(xx[0], yy[0])); | ||
|  |             sum = _mm_add_ps(sum, _mm_mul_ps(xx[1], yy[1])); | ||
|  |             xx += 2; | ||
|  |             yy += 2; | ||
|  |          } | ||
|  |          sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum)); | ||
|  |          sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55)); | ||
|  |          _mm_store_ss(corr+nb_pitch-1-(i<<2)-offset, sum); | ||
|  |       } | ||
|  |    } | ||
|  | } |