mirror of
https://github.com/azahar-emu/soundtouch
synced 2025-11-07 15:40:04 +01:00
Improvements to help compiler autovectorization
Refactored FIRfilter and TDStretch hot-spot routines to help compiler perform more efficient autovectorization. Benchmarked: - 2x/3x improvement in gcc-generated x86 SIMD code execution times for SSE2/AVX instruction extensions accordingly, when hand-tuned SSE intrinsics were disabled. Hand-tuned SSE code still is slightly faster than gcc-produced AVX. - 2.4x improvement for cumulative ARM NEON tunings when compared to previous SoundTouch release. Signed-off-by: Olli Parviainen <oparviai'at'iki.fi>
This commit is contained in:
parent
a911a1e986
commit
bf3cec0244
@ -149,8 +149,9 @@ namespace soundtouch
|
|||||||
|
|
||||||
// floating point samples
|
// floating point samples
|
||||||
typedef float SAMPLETYPE;
|
typedef float SAMPLETYPE;
|
||||||
// data type for sample accumulation: Use double to utilize full precision.
|
// data type for sample accumulation: Use float also here to enable
|
||||||
typedef double LONG_SAMPLETYPE;
|
// efficient autovectorization
|
||||||
|
typedef float LONG_SAMPLETYPE;
|
||||||
|
|
||||||
#ifdef SOUNDTOUCH_ALLOW_X86_OPTIMIZATIONS
|
#ifdef SOUNDTOUCH_ALLOW_X86_OPTIMIZATIONS
|
||||||
// Allow SSE optimizations
|
// Allow SSE optimizations
|
||||||
@ -159,6 +160,12 @@ namespace soundtouch
|
|||||||
|
|
||||||
#endif // SOUNDTOUCH_INTEGER_SAMPLES
|
#endif // SOUNDTOUCH_INTEGER_SAMPLES
|
||||||
|
|
||||||
|
#if ((SOUNDTOUCH_ALLOW_SSE) || (__SSE__) || (SOUNDTOUCH_USE_NEON))
|
||||||
|
#if SOUNDTOUCH_ALLOW_NONEXACT_SIMD_OPTIMIZATION
|
||||||
|
#define ST_SIMD_AVOID_UNALIGNED
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// define ST_NO_EXCEPTION_HANDLING switch to disable throwing std exceptions:
|
// define ST_NO_EXCEPTION_HANDLING switch to disable throwing std exceptions:
|
||||||
|
|||||||
@ -60,12 +60,14 @@ FIRFilter::FIRFilter()
|
|||||||
length = 0;
|
length = 0;
|
||||||
lengthDiv8 = 0;
|
lengthDiv8 = 0;
|
||||||
filterCoeffs = NULL;
|
filterCoeffs = NULL;
|
||||||
|
filterCoeffsStereo = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
FIRFilter::~FIRFilter()
|
FIRFilter::~FIRFilter()
|
||||||
{
|
{
|
||||||
delete[] filterCoeffs;
|
delete[] filterCoeffs;
|
||||||
|
delete[] filterCoeffsStereo;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -78,28 +80,26 @@ uint FIRFilter::evaluateFilterStereo(SAMPLETYPE *dest, const SAMPLETYPE *src, ui
|
|||||||
// because division is much slower operation than multiplying.
|
// because division is much slower operation than multiplying.
|
||||||
double dScaler = 1.0 / (double)resultDivider;
|
double dScaler = 1.0 / (double)resultDivider;
|
||||||
#endif
|
#endif
|
||||||
|
// hint compiler autovectorization that loop length is divisible by 8
|
||||||
|
int ilength = length & -8;
|
||||||
|
|
||||||
assert(length != 0);
|
assert((length != 0) && (length == ilength) && (src != NULL) && (dest != NULL) && (filterCoeffs != NULL));
|
||||||
assert(src != NULL);
|
|
||||||
assert(dest != NULL);
|
|
||||||
assert(filterCoeffs != NULL);
|
|
||||||
|
|
||||||
end = 2 * (numSamples - length);
|
end = 2 * (numSamples - ilength);
|
||||||
|
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for
|
||||||
for (j = 0; j < end; j += 2)
|
for (j = 0; j < end; j += 2)
|
||||||
{
|
{
|
||||||
const SAMPLETYPE *ptr;
|
const SAMPLETYPE *ptr;
|
||||||
LONG_SAMPLETYPE suml, sumr;
|
LONG_SAMPLETYPE suml, sumr;
|
||||||
uint i;
|
|
||||||
|
|
||||||
suml = sumr = 0;
|
suml = sumr = 0;
|
||||||
ptr = src + j;
|
ptr = src + j;
|
||||||
|
|
||||||
for (i = 0; i < length; i ++)
|
for (int i = 0; i < ilength; i ++)
|
||||||
{
|
{
|
||||||
suml += ptr[2 * i] * filterCoeffs[i];
|
suml += ptr[2 * i] * filterCoeffsStereo[2 * i];
|
||||||
sumr += ptr[2 * i + 1] * filterCoeffs[i];
|
sumr += ptr[2 * i + 1] * filterCoeffsStereo[2 * i + 1];
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef SOUNDTOUCH_INTEGER_SAMPLES
|
#ifdef SOUNDTOUCH_INTEGER_SAMPLES
|
||||||
@ -109,14 +109,11 @@ uint FIRFilter::evaluateFilterStereo(SAMPLETYPE *dest, const SAMPLETYPE *src, ui
|
|||||||
suml = (suml < -32768) ? -32768 : (suml > 32767) ? 32767 : suml;
|
suml = (suml < -32768) ? -32768 : (suml > 32767) ? 32767 : suml;
|
||||||
// saturate to 16 bit integer limits
|
// saturate to 16 bit integer limits
|
||||||
sumr = (sumr < -32768) ? -32768 : (sumr > 32767) ? 32767 : sumr;
|
sumr = (sumr < -32768) ? -32768 : (sumr > 32767) ? 32767 : sumr;
|
||||||
#else
|
|
||||||
suml *= dScaler;
|
|
||||||
sumr *= dScaler;
|
|
||||||
#endif // SOUNDTOUCH_INTEGER_SAMPLES
|
#endif // SOUNDTOUCH_INTEGER_SAMPLES
|
||||||
dest[j] = (SAMPLETYPE)suml;
|
dest[j] = (SAMPLETYPE)suml;
|
||||||
dest[j + 1] = (SAMPLETYPE)sumr;
|
dest[j + 1] = (SAMPLETYPE)sumr;
|
||||||
}
|
}
|
||||||
return numSamples - length;
|
return numSamples - ilength;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -130,18 +127,21 @@ uint FIRFilter::evaluateFilterMono(SAMPLETYPE *dest, const SAMPLETYPE *src, uint
|
|||||||
double dScaler = 1.0 / (double)resultDivider;
|
double dScaler = 1.0 / (double)resultDivider;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
assert(length != 0);
|
// hint compiler autovectorization that loop length is divisible by 8
|
||||||
|
int ilength = length & -8;
|
||||||
|
|
||||||
end = numSamples - length;
|
assert(ilength != 0);
|
||||||
|
|
||||||
|
end = numSamples - ilength;
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for
|
||||||
for (j = 0; j < end; j ++)
|
for (j = 0; j < end; j ++)
|
||||||
{
|
{
|
||||||
const SAMPLETYPE *pSrc = src + j;
|
const SAMPLETYPE *pSrc = src + j;
|
||||||
LONG_SAMPLETYPE sum;
|
LONG_SAMPLETYPE sum;
|
||||||
uint i;
|
int i;
|
||||||
|
|
||||||
sum = 0;
|
sum = 0;
|
||||||
for (i = 0; i < length; i ++)
|
for (i = 0; i < ilength; i ++)
|
||||||
{
|
{
|
||||||
sum += pSrc[i] * filterCoeffs[i];
|
sum += pSrc[i] * filterCoeffs[i];
|
||||||
}
|
}
|
||||||
@ -149,8 +149,6 @@ uint FIRFilter::evaluateFilterMono(SAMPLETYPE *dest, const SAMPLETYPE *src, uint
|
|||||||
sum >>= resultDivFactor;
|
sum >>= resultDivFactor;
|
||||||
// saturate to 16 bit integer limits
|
// saturate to 16 bit integer limits
|
||||||
sum = (sum < -32768) ? -32768 : (sum > 32767) ? 32767 : sum;
|
sum = (sum < -32768) ? -32768 : (sum > 32767) ? 32767 : sum;
|
||||||
#else
|
|
||||||
sum *= dScaler;
|
|
||||||
#endif // SOUNDTOUCH_INTEGER_SAMPLES
|
#endif // SOUNDTOUCH_INTEGER_SAMPLES
|
||||||
dest[j] = (SAMPLETYPE)sum;
|
dest[j] = (SAMPLETYPE)sum;
|
||||||
}
|
}
|
||||||
@ -174,14 +172,18 @@ uint FIRFilter::evaluateFilterMulti(SAMPLETYPE *dest, const SAMPLETYPE *src, uin
|
|||||||
assert(filterCoeffs != NULL);
|
assert(filterCoeffs != NULL);
|
||||||
assert(numChannels < 16);
|
assert(numChannels < 16);
|
||||||
|
|
||||||
end = numChannels * (numSamples - length);
|
// hint compiler autovectorization that loop length is divisible by 8
|
||||||
|
int ilength = length & -8;
|
||||||
|
|
||||||
|
end = numChannels * (numSamples - ilength);
|
||||||
|
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for
|
||||||
for (j = 0; j < end; j += numChannels)
|
for (j = 0; j < end; j += numChannels)
|
||||||
{
|
{
|
||||||
const SAMPLETYPE *ptr;
|
const SAMPLETYPE *ptr;
|
||||||
LONG_SAMPLETYPE sums[16];
|
LONG_SAMPLETYPE sums[16];
|
||||||
uint c, i;
|
uint c;
|
||||||
|
int i;
|
||||||
|
|
||||||
for (c = 0; c < numChannels; c ++)
|
for (c = 0; c < numChannels; c ++)
|
||||||
{
|
{
|
||||||
@ -190,7 +192,7 @@ uint FIRFilter::evaluateFilterMulti(SAMPLETYPE *dest, const SAMPLETYPE *src, uin
|
|||||||
|
|
||||||
ptr = src + j;
|
ptr = src + j;
|
||||||
|
|
||||||
for (i = 0; i < length; i ++)
|
for (i = 0; i < ilength; i ++)
|
||||||
{
|
{
|
||||||
SAMPLETYPE coef=filterCoeffs[i];
|
SAMPLETYPE coef=filterCoeffs[i];
|
||||||
for (c = 0; c < numChannels; c ++)
|
for (c = 0; c < numChannels; c ++)
|
||||||
@ -204,13 +206,11 @@ uint FIRFilter::evaluateFilterMulti(SAMPLETYPE *dest, const SAMPLETYPE *src, uin
|
|||||||
{
|
{
|
||||||
#ifdef SOUNDTOUCH_INTEGER_SAMPLES
|
#ifdef SOUNDTOUCH_INTEGER_SAMPLES
|
||||||
sums[c] >>= resultDivFactor;
|
sums[c] >>= resultDivFactor;
|
||||||
#else
|
|
||||||
sums[c] *= dScaler;
|
|
||||||
#endif // SOUNDTOUCH_INTEGER_SAMPLES
|
#endif // SOUNDTOUCH_INTEGER_SAMPLES
|
||||||
dest[j+c] = (SAMPLETYPE)sums[c];
|
dest[j+c] = (SAMPLETYPE)sums[c];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return numSamples - length;
|
return numSamples - ilength;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -222,6 +222,13 @@ void FIRFilter::setCoefficients(const SAMPLETYPE *coeffs, uint newLength, uint u
|
|||||||
assert(newLength > 0);
|
assert(newLength > 0);
|
||||||
if (newLength % 8) ST_THROW_RT_ERROR("FIR filter length not divisible by 8");
|
if (newLength % 8) ST_THROW_RT_ERROR("FIR filter length not divisible by 8");
|
||||||
|
|
||||||
|
#ifdef SOUNDTOUCH_FLOAT_SAMPLES
|
||||||
|
// scale coefficients already here if using floating samples
|
||||||
|
double scale = 1.0 / resultDivider;
|
||||||
|
#else
|
||||||
|
short scale = 1;
|
||||||
|
#endif
|
||||||
|
|
||||||
lengthDiv8 = newLength / 8;
|
lengthDiv8 = newLength / 8;
|
||||||
length = lengthDiv8 * 8;
|
length = lengthDiv8 * 8;
|
||||||
assert(length == newLength);
|
assert(length == newLength);
|
||||||
@ -231,7 +238,16 @@ void FIRFilter::setCoefficients(const SAMPLETYPE *coeffs, uint newLength, uint u
|
|||||||
|
|
||||||
delete[] filterCoeffs;
|
delete[] filterCoeffs;
|
||||||
filterCoeffs = new SAMPLETYPE[length];
|
filterCoeffs = new SAMPLETYPE[length];
|
||||||
memcpy(filterCoeffs, coeffs, length * sizeof(SAMPLETYPE));
|
delete[] filterCoeffsStereo;
|
||||||
|
filterCoeffsStereo = new SAMPLETYPE[length*2];
|
||||||
|
for (uint i = 0; i < length; i ++)
|
||||||
|
{
|
||||||
|
filterCoeffs[i] = (SAMPLETYPE)(coeffs[i] * scale);
|
||||||
|
// create also stereo set of filter coefficients: this allows compiler
|
||||||
|
// to autovectorize filter evaluation much more efficiently
|
||||||
|
filterCoeffsStereo[2 * i] = (SAMPLETYPE)(coeffs[i] * scale);
|
||||||
|
filterCoeffsStereo[2 * i + 1] = (SAMPLETYPE)(coeffs[i] * scale);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -57,6 +57,7 @@ protected:
|
|||||||
|
|
||||||
// Memory for filter coefficients
|
// Memory for filter coefficients
|
||||||
SAMPLETYPE *filterCoeffs;
|
SAMPLETYPE *filterCoeffs;
|
||||||
|
SAMPLETYPE *filterCoeffsStereo;
|
||||||
|
|
||||||
virtual uint evaluateFilterStereo(SAMPLETYPE *dest,
|
virtual uint evaluateFilterStereo(SAMPLETYPE *dest,
|
||||||
const SAMPLETYPE *src,
|
const SAMPLETYPE *src,
|
||||||
|
|||||||
@ -142,7 +142,7 @@ int InterpolateLinearInteger::transposeMulti(SAMPLETYPE *dest, const SAMPLETYPE
|
|||||||
LONG_SAMPLETYPE temp, vol1;
|
LONG_SAMPLETYPE temp, vol1;
|
||||||
|
|
||||||
assert(iFract < SCALE);
|
assert(iFract < SCALE);
|
||||||
vol1 = (SCALE - iFract);
|
vol1 = (LONG_SAMPLETYPE)(SCALE - iFract);
|
||||||
for (int c = 0; c < numChannels; c ++)
|
for (int c = 0; c < numChannels; c ++)
|
||||||
{
|
{
|
||||||
temp = vol1 * src[c] + iFract * src[c + numChannels];
|
temp = vol1 * src[c] + iFract * src[c + numChannels];
|
||||||
|
|||||||
@ -54,11 +54,6 @@ using namespace soundtouch;
|
|||||||
|
|
||||||
#define max(x, y) (((x) > (y)) ? (x) : (y))
|
#define max(x, y) (((x) > (y)) ? (x) : (y))
|
||||||
|
|
||||||
#if defined(SOUNDTOUCH_USE_NEON) && defined(SOUNDTOUCH_ALLOW_NONEXACT_SIMD_OPTIMIZATION)
|
|
||||||
// SIMD mode, allow shortcuts to avoid operations that aren't aligned to 16-byte boundary
|
|
||||||
#define ST_SIMD_AVOID_UNALIGNED
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/*****************************************************************************
|
/*****************************************************************************
|
||||||
*
|
*
|
||||||
* Constant definitions
|
* Constant definitions
|
||||||
@ -207,7 +202,7 @@ void TDStretch::overlapMono(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput) const
|
|||||||
m1 = (SAMPLETYPE)0;
|
m1 = (SAMPLETYPE)0;
|
||||||
m2 = (SAMPLETYPE)overlapLength;
|
m2 = (SAMPLETYPE)overlapLength;
|
||||||
|
|
||||||
for (i = 0; i < overlapLength ; i ++)
|
for (i = 0; i < overlapLength ; i ++)
|
||||||
{
|
{
|
||||||
pOutput[i] = (pInput[i] * m1 + pMidBuffer[i] * m2 ) / overlapLength;
|
pOutput[i] = (pInput[i] * m1 + pMidBuffer[i] * m2 ) / overlapLength;
|
||||||
m1 += 1;
|
m1 += 1;
|
||||||
@ -315,7 +310,7 @@ int TDStretch::seekBestOverlapPositionFull(const SAMPLETYPE *refPos)
|
|||||||
bestCorr = (bestCorr + 0.1) * 0.75;
|
bestCorr = (bestCorr + 0.1) * 0.75;
|
||||||
|
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for
|
||||||
for (i = 1; i < seekLength; i ++)
|
for (i = 1; i < seekLength; i ++)
|
||||||
{
|
{
|
||||||
double corr;
|
double corr;
|
||||||
// Calculates correlation value for the mixing position corresponding to 'i'
|
// Calculates correlation value for the mixing position corresponding to 'i'
|
||||||
@ -682,18 +677,16 @@ void TDStretch::processSamples()
|
|||||||
isBeginning = false;
|
isBeginning = false;
|
||||||
int skip = (int)(tempo * overlapLength + 0.5);
|
int skip = (int)(tempo * overlapLength + 0.5);
|
||||||
|
|
||||||
#ifdef SOUNDTOUCH_ALLOW_NONEXACT_SIMD_OPTIMIZATION
|
#ifdef ST_SIMD_AVOID_UNALIGNED
|
||||||
#ifdef SOUNDTOUCH_ALLOW_SSE
|
// in SIMD mode, round the skip amount to value corresponding to aligned memory address
|
||||||
// if SSE mode, round the skip amount to value corresponding to aligned memory address
|
if (channels == 1)
|
||||||
if (channels == 1)
|
{
|
||||||
{
|
skip &= -4;
|
||||||
skip &= -4;
|
}
|
||||||
}
|
else if (channels == 2)
|
||||||
else if (channels == 2)
|
{
|
||||||
{
|
skip &= -2;
|
||||||
skip &= -2;
|
}
|
||||||
}
|
|
||||||
#endif
|
|
||||||
#endif
|
#endif
|
||||||
skipFract -= skip;
|
skipFract -= skip;
|
||||||
assert(nominalSkip >= -skipFract);
|
assert(nominalSkip >= -skipFract);
|
||||||
@ -823,7 +816,7 @@ void TDStretch::overlapStereo(short *poutput, const short *input) const
|
|||||||
short temp;
|
short temp;
|
||||||
int cnt2;
|
int cnt2;
|
||||||
|
|
||||||
for (i = 0; i < overlapLength ; i ++)
|
for (i = 0; i < overlapLength ; i ++)
|
||||||
{
|
{
|
||||||
temp = (short)(overlapLength - i);
|
temp = (short)(overlapLength - i);
|
||||||
cnt2 = 2 * i;
|
cnt2 = 2 * i;
|
||||||
@ -897,9 +890,12 @@ double TDStretch::calcCrossCorr(const short *mixingPos, const short *compare, do
|
|||||||
if (((ulongptr)mixingPos) & 15) return -1e50;
|
if (((ulongptr)mixingPos) & 15) return -1e50;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// hint compiler autovectorization that loop length is divisible by 8
|
||||||
|
int ilength = (channels * overlapLength) & -8;
|
||||||
|
|
||||||
corr = lnorm = 0;
|
corr = lnorm = 0;
|
||||||
// Same routine for stereo and mono
|
// Same routine for stereo and mono
|
||||||
for (i = 0; i < channels * overlapLength; i += 2)
|
for (i = 0; i < ilength; i += 2)
|
||||||
{
|
{
|
||||||
corr += (mixingPos[i] * compare[i] +
|
corr += (mixingPos[i] * compare[i] +
|
||||||
mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBitsNorm;
|
mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBitsNorm;
|
||||||
@ -931,6 +927,9 @@ double TDStretch::calcCrossCorrAccumulate(const short *mixingPos, const short *c
|
|||||||
long lnorm;
|
long lnorm;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
|
// hint compiler autovectorization that loop length is divisible by 8
|
||||||
|
int ilength = (channels * overlapLength) & -8;
|
||||||
|
|
||||||
// cancel first normalizer tap from previous round
|
// cancel first normalizer tap from previous round
|
||||||
lnorm = 0;
|
lnorm = 0;
|
||||||
for (i = 1; i <= channels; i ++)
|
for (i = 1; i <= channels; i ++)
|
||||||
@ -940,7 +939,7 @@ double TDStretch::calcCrossCorrAccumulate(const short *mixingPos, const short *c
|
|||||||
|
|
||||||
corr = 0;
|
corr = 0;
|
||||||
// Same routine for stereo and mono.
|
// Same routine for stereo and mono.
|
||||||
for (i = 0; i < channels * overlapLength; i += 2)
|
for (i = 0; i < ilength; i += 2)
|
||||||
{
|
{
|
||||||
corr += (mixingPos[i] * compare[i] +
|
corr += (mixingPos[i] * compare[i] +
|
||||||
mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBitsNorm;
|
mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBitsNorm;
|
||||||
@ -1053,9 +1052,12 @@ double TDStretch::calcCrossCorr(const float *mixingPos, const float *compare, do
|
|||||||
if (((ulongptr)mixingPos) & 15) return -1e50;
|
if (((ulongptr)mixingPos) & 15) return -1e50;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// hint compiler autovectorization that loop length is divisible by 8
|
||||||
|
int ilength = (channels * overlapLength) & -8;
|
||||||
|
|
||||||
corr = norm = 0;
|
corr = norm = 0;
|
||||||
// Same routine for stereo and mono
|
// Same routine for stereo and mono
|
||||||
for (i = 0; i < channels * overlapLength; i ++)
|
for (i = 0; i < ilength; i ++)
|
||||||
{
|
{
|
||||||
corr += mixingPos[i] * compare[i];
|
corr += mixingPos[i] * compare[i];
|
||||||
norm += mixingPos[i] * mixingPos[i];
|
norm += mixingPos[i] * mixingPos[i];
|
||||||
@ -1080,8 +1082,11 @@ double TDStretch::calcCrossCorrAccumulate(const float *mixingPos, const float *c
|
|||||||
norm -= mixingPos[-i] * mixingPos[-i];
|
norm -= mixingPos[-i] * mixingPos[-i];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// hint compiler autovectorization that loop length is divisible by 8
|
||||||
|
int ilength = (channels * overlapLength) & -8;
|
||||||
|
|
||||||
// Same routine for stereo and mono
|
// Same routine for stereo and mono
|
||||||
for (i = 0; i < channels * overlapLength; i ++)
|
for (i = 0; i < ilength; i ++)
|
||||||
{
|
{
|
||||||
corr += mixingPos[i] * compare[i];
|
corr += mixingPos[i] * compare[i];
|
||||||
}
|
}
|
||||||
|
|||||||
@ -80,7 +80,7 @@ double TDStretchSSE::calcCrossCorr(const float *pV1, const float *pV2, double &a
|
|||||||
// Compile-time define SOUNDTOUCH_ALLOW_NONEXACT_SIMD_OPTIMIZATION is provided
|
// Compile-time define SOUNDTOUCH_ALLOW_NONEXACT_SIMD_OPTIMIZATION is provided
|
||||||
// for choosing if this little cheating is allowed.
|
// for choosing if this little cheating is allowed.
|
||||||
|
|
||||||
#ifdef SOUNDTOUCH_ALLOW_NONEXACT_SIMD_OPTIMIZATION
|
#ifdef ST_SIMD_AVOID_UNALIGNED
|
||||||
// Little cheating allowed, return valid correlation only for
|
// Little cheating allowed, return valid correlation only for
|
||||||
// aligned locations, meaning every second round for stereo sound.
|
// aligned locations, meaning every second round for stereo sound.
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user