You cannot select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
	
	
		
			93 lines
		
	
	
		
			3.1 KiB
		
	
	
	
		
			C
		
	
			
		
		
	
	
			93 lines
		
	
	
		
			3.1 KiB
		
	
	
	
		
			C
		
	
| /* Copyright (C) 2002 Jean-Marc Valin */
 | |
| /**
 | |
|    @file ltp_sse.h
 | |
|    @brief Long-Term Prediction functions (SSE version)
 | |
| */
 | |
| /*
 | |
|    Redistribution and use in source and binary forms, with or without
 | |
|    modification, are permitted provided that the following conditions
 | |
|    are met:
 | |
|    
 | |
|    - Redistributions of source code must retain the above copyright
 | |
|    notice, this list of conditions and the following disclaimer.
 | |
|    
 | |
|    - Redistributions in binary form must reproduce the above copyright
 | |
|    notice, this list of conditions and the following disclaimer in the
 | |
|    documentation and/or other materials provided with the distribution.
 | |
|    
 | |
|    - Neither the name of the Xiph.org Foundation nor the names of its
 | |
|    contributors may be used to endorse or promote products derived from
 | |
|    this software without specific prior written permission.
 | |
|    
 | |
|    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 | |
|    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 | |
|    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 | |
|    A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
 | |
|    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 | |
|    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 | |
|    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 | |
|    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 | |
|    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 | |
|    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 | |
|    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | |
| */
 | |
| 
 | |
| #include <xmmintrin.h>
 | |
| 
 | |
| #define OVERRIDE_INNER_PROD
 | |
| float inner_prod(const float *a, const float *b, int len)
 | |
| {
 | |
|    int i;
 | |
|    float ret;
 | |
|    __m128 sum = _mm_setzero_ps();
 | |
|    for (i=0;i<(len>>2);i+=2)
 | |
|    {
 | |
|       sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+0), _mm_loadu_ps(b+0)));
 | |
|       sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+4), _mm_loadu_ps(b+4)));
 | |
|       a += 8;
 | |
|       b += 8;
 | |
|    }
 | |
|    sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));
 | |
|    sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));
 | |
|    _mm_store_ss(&ret, sum);
 | |
|    return ret;
 | |
| }
 | |
| 
 | |
| #define OVERRIDE_PITCH_XCORR
 | |
| void pitch_xcorr(const float *_x, const float *_y, float *corr, int len, int nb_pitch, char *stack)
 | |
| {
 | |
|    int i, offset;
 | |
|    VARDECL(__m128 *x);
 | |
|    VARDECL(__m128 *y);
 | |
|    int N, L;
 | |
|    N = len>>2;
 | |
|    L = nb_pitch>>2;
 | |
|    ALLOC(x, N, __m128);
 | |
|    ALLOC(y, N+L, __m128);
 | |
|    for (i=0;i<N;i++)
 | |
|       x[i] = _mm_loadu_ps(_x+(i<<2));
 | |
|    for (offset=0;offset<4;offset++)
 | |
|    {
 | |
|       for (i=0;i<N+L;i++)
 | |
|          y[i] = _mm_loadu_ps(_y+(i<<2)+offset);
 | |
|       for (i=0;i<L;i++)
 | |
|       {
 | |
|          int j;
 | |
|          __m128 sum, *xx, *yy;
 | |
|          sum = _mm_setzero_ps();
 | |
|          yy = y+i;
 | |
|          xx = x;
 | |
|          for (j=0;j<N;j+=2)
 | |
|          {
 | |
|             sum = _mm_add_ps(sum, _mm_mul_ps(xx[0], yy[0]));
 | |
|             sum = _mm_add_ps(sum, _mm_mul_ps(xx[1], yy[1]));
 | |
|             xx += 2;
 | |
|             yy += 2;
 | |
|          }
 | |
|          sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));
 | |
|          sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));
 | |
|          _mm_store_ss(corr+nb_pitch-1-(i<<2)-offset, sum);
 | |
|       }
 | |
|    }
 | |
| }
 |