Fixed integer overflow bug in integer versions of cross-correlation routines.

This commit is contained in:
oparviai 2014-01-06 19:40:40 +00:00
parent a61c28e36a
commit 746a90d610
3 changed files with 17 additions and 16 deletions

View File

@ -747,6 +747,7 @@ submitted bugfixes since SoundTouch v1.3.1: </p>
<li> Sandro Cumerlato</li> <li> Sandro Cumerlato</li>
<li> Justin Frankel </li> <li> Justin Frankel </li>
<li> Jason Garland </li> <li> Jason Garland </li>
<li> Masa H. </li>
<li> Takashi Iwai </li> <li> Takashi Iwai </li>
<li> Mathias Möhl</li> <li> Mathias Möhl</li>
<li> Yuval Naveh </li> <li> Yuval Naveh </li>

View File

@ -742,12 +742,12 @@ double TDStretch::calcCrossCorr(const short *mixingPos, const short *compare) co
for (i = 0; i < channels * overlapLength; i += 4) for (i = 0; i < channels * overlapLength; i += 4)
{ {
corr += (mixingPos[i] * compare[i] + corr += (mixingPos[i] * compare[i] +
mixingPos[i + 1] * compare[i + 1] + mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBits; // notice: do intermediate division here to avoid integer overflow
mixingPos[i + 2] * compare[i + 2] + corr += (mixingPos[i + 2] * compare[i + 2] +
mixingPos[i + 3] * compare[i + 3]) >> overlapDividerBits; mixingPos[i + 3] * compare[i + 3]) >> overlapDividerBits;
norm += (mixingPos[i] * mixingPos[i] + norm += (mixingPos[i] * mixingPos[i] +
mixingPos[i + 1] * mixingPos[i + 1] + mixingPos[i + 1] * mixingPos[i + 1]) >> overlapDividerBits; // notice: do intermediate division here to avoid integer overflow
mixingPos[i + 2] * mixingPos[i + 2] + norm += (mixingPos[i + 2] * mixingPos[i + 2] +
mixingPos[i + 3] * mixingPos[i + 3]) >> overlapDividerBits; mixingPos[i + 3] * mixingPos[i + 3]) >> overlapDividerBits;
} }

View File

@ -93,19 +93,19 @@ double TDStretchMMX::calcCrossCorr(const short *pV1, const short *pV2) const
// _mm_add_pi32 : 2*32bit add // _mm_add_pi32 : 2*32bit add
// _m_psrad : 32bit right-shift // _m_psrad : 32bit right-shift
temp = _mm_add_pi32(_mm_madd_pi16(pVec1[0], pVec2[0]), temp = _mm_add_pi32(_mm_sra_pi32(_mm_madd_pi16(pVec1[0], pVec2[0]), shifter),
_mm_madd_pi16(pVec1[1], pVec2[1])); _mm_sra_pi32(_mm_madd_pi16(pVec1[1], pVec2[1]), shifter));
temp2 = _mm_add_pi32(_mm_madd_pi16(pVec1[0], pVec1[0]), temp2 = _mm_add_pi32(_mm_sra_pi32(_mm_madd_pi16(pVec1[0], pVec1[0]), shifter),
_mm_madd_pi16(pVec1[1], pVec1[1])); _mm_sra_pi32(_mm_madd_pi16(pVec1[1], pVec1[1]), shifter));
accu = _mm_add_pi32(accu, _mm_sra_pi32(temp, shifter)); accu = _mm_add_pi32(accu, temp);
normaccu = _mm_add_pi32(normaccu, _mm_sra_pi32(temp2, shifter)); normaccu = _mm_add_pi32(normaccu, temp2);
temp = _mm_add_pi32(_mm_madd_pi16(pVec1[2], pVec2[2]), temp = _mm_add_pi32(_mm_sra_pi32(_mm_madd_pi16(pVec1[2], pVec2[2]), shifter),
_mm_madd_pi16(pVec1[3], pVec2[3])); _mm_sra_pi32(_mm_madd_pi16(pVec1[3], pVec2[3]), shifter));
temp2 = _mm_add_pi32(_mm_madd_pi16(pVec1[2], pVec1[2]), temp2 = _mm_add_pi32(_mm_sra_pi32(_mm_madd_pi16(pVec1[2], pVec1[2]), shifter),
_mm_madd_pi16(pVec1[3], pVec1[3])); _mm_sra_pi32(_mm_madd_pi16(pVec1[3], pVec1[3]), shifter));
accu = _mm_add_pi32(accu, _mm_sra_pi32(temp, shifter)); accu = _mm_add_pi32(accu, temp);
normaccu = _mm_add_pi32(normaccu, _mm_sra_pi32(temp2, shifter)); normaccu = _mm_add_pi32(normaccu, temp2);
pVec1 += 4; pVec1 += 4;
pVec2 += 4; pVec2 += 4;