WebRTC AEC正規化誤差信号


誤差信号の大きさの変化が大きく、閾値の設定が困難であるため、誤差信号eを遠隔信号xに正規化し、閾値との比較が容易である.
static void ScaleErrorSignalSSE2(AecCore* aec, float ef[2][PART_LEN1]) {
	// extern __m128 _mm_set_ps1(float _W);
	//    __m128    ,Sets the four single-precision, floating-point values to w  
	//r0=r1=r2=r3=_W ,1e-10         0   
  const __m128 k1e_10f = _mm_set1_ps(1e-10f);
  const __m128 kMu = aec->extended_filter_enabled ? _mm_set1_ps(kExtendedMu)
                                                  : _mm_set1_ps(aec->normal_mu);
  const __m128 kThresh = aec->extended_filter_enabled
                             ? _mm_set1_ps(kExtendedErrorThreshold)
                             : _mm_set1_ps(aec->normal_error_threshold);

  int i;
  // vectorized code (four at once),    
  for (i = 0; i + 3 < PART_LEN1; i += 4) {
	  //         、      、      
    const __m128 xPow = _mm_loadu_ps(&aec->xPow[i]);
    const __m128 ef_re_base = _mm_loadu_ps(&ef[0][i]);
    const __m128 ef_im_base = _mm_loadu_ps(&ef[1][i]);

	//      1e-10,    0   
    const __m128 xPowPlus = _mm_add_ps(xPow, k1e_10f);
	//        ,      、            
    __m128 ef_re = _mm_div_ps(ef_re_base, xPowPlus);
    __m128 ef_im = _mm_div_ps(ef_im_base, xPowPlus);

	//           
    const __m128 ef_re2 = _mm_mul_ps(ef_re, ef_re);
    const __m128 ef_im2 = _mm_mul_ps(ef_im, ef_im);
    const __m128 ef_sum2 = _mm_add_ps(ef_re2, ef_im2);

	//           
    const __m128 absEf = _mm_sqrt_ps(ef_sum2);
	//  absEf      kThresh
    const __m128 bigger = _mm_cmpgt_ps(absEf, kThresh);

	//      +1e-10,  0   
    __m128 absEfPlus = _mm_add_ps(absEf, k1e_10f);

	//      /  (         ,      absEf    ,  >1 <1         )
    const __m128 absEfInv = _mm_div_ps(kThresh, absEfPlus);
    __m128 ef_re_if = _mm_mul_ps(ef_re, absEfInv);
    __m128 ef_im_if = _mm_mul_ps(ef_im, absEfInv);
    ef_re_if = _mm_and_ps(bigger, ef_re_if);
    ef_im_if = _mm_and_ps(bigger, ef_im_if);
	//       
    ef_re = _mm_andnot_ps(bigger, ef_re);
    ef_im = _mm_andnot_ps(bigger, ef_im);
	//      
    ef_re = _mm_or_ps(ef_re, ef_re_if);
    ef_im = _mm_or_ps(ef_im, ef_im_if);

	//          kMu   
    ef_re = _mm_mul_ps(ef_re, kMu);
    ef_im = _mm_mul_ps(ef_im, kMu);

	//      、           ef 
    _mm_storeu_ps(&ef[0][i], ef_re);
    _mm_storeu_ps(&ef[1][i], ef_im);
  }
  // scalar code for the remaining items.
  {
    const float mu =
        aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu;
    const float error_threshold = aec->extended_filter_enabled
                                      ? kExtendedErrorThreshold
                                      : aec->normal_error_threshold;
    for (; i < (PART_LEN1); i++) {
      float abs_ef;
	  //           ,    
      ef[0][i] /= (aec->xPow[i] + 1e-10f);
      ef[1][i] /= (aec->xPow[i] + 1e-10f);
      abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]);

	  //            
      if (abs_ef > error_threshold) {
		  //      =   /  ;
        abs_ef = error_threshold / (abs_ef + 1e-10f);
        ef[0][i] *= abs_ef;
        ef[1][i] *= abs_ef;
      }

      //         
      ef[0][i] *= mu;
      ef[1][i] *= mu;
    }
  }
}