mirror of
https://github.com/azahar-emu/soundtouch
synced 2025-11-07 07:30:02 +01:00
Time stretch routine improvements:
- improved sound quality - streamlined code
This commit is contained in:
parent
1f6a68a6a3
commit
557bf9d6e4
@ -39,6 +39,7 @@
|
|||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
#include <time.h>
|
||||||
#include "RunParameters.h"
|
#include "RunParameters.h"
|
||||||
#include "WavFile.h"
|
#include "WavFile.h"
|
||||||
#include "SoundTouch.h"
|
#include "SoundTouch.h"
|
||||||
@ -172,7 +173,6 @@ static void setup(SoundTouch *pSoundTouch, const WavInFile *inFile, const RunPar
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// Processes the sound
|
// Processes the sound
|
||||||
static void process(SoundTouch *pSoundTouch, WavInFile *inFile, WavOutFile *outFile)
|
static void process(SoundTouch *pSoundTouch, WavInFile *inFile, WavOutFile *outFile)
|
||||||
{
|
{
|
||||||
@ -309,8 +309,11 @@ int main(const int nParams, const char * const paramStr[])
|
|||||||
// Setup the 'SoundTouch' object for processing the sound
|
// Setup the 'SoundTouch' object for processing the sound
|
||||||
setup(&soundTouch, inFile, params);
|
setup(&soundTouch, inFile, params);
|
||||||
|
|
||||||
|
// clock_t cs = clock(); // for benchmarking processing duration
|
||||||
// Process the sound
|
// Process the sound
|
||||||
process(&soundTouch, inFile, outFile);
|
process(&soundTouch, inFile, outFile);
|
||||||
|
// clock_t ce = clock(); // for benchmarking processing duration
|
||||||
|
// printf("duration: %lf\n", (double)(ce-cs)/CLOCKS_PER_SEC);
|
||||||
|
|
||||||
// Close WAV file handles & dispose of the objects
|
// Close WAV file handles & dispose of the objects
|
||||||
delete inFile;
|
delete inFile;
|
||||||
|
|||||||
@ -90,7 +90,7 @@ TDStretch::TDStretch() : FIFOProcessor(&outputBuffer)
|
|||||||
channels = 2;
|
channels = 2;
|
||||||
|
|
||||||
pMidBuffer = NULL;
|
pMidBuffer = NULL;
|
||||||
pRefMidBufferUnaligned = NULL;
|
pMidBufferUnaligned = NULL;
|
||||||
overlapLength = 0;
|
overlapLength = 0;
|
||||||
|
|
||||||
bAutoSeqSetting = TRUE;
|
bAutoSeqSetting = TRUE;
|
||||||
@ -110,8 +110,7 @@ TDStretch::TDStretch() : FIFOProcessor(&outputBuffer)
|
|||||||
|
|
||||||
TDStretch::~TDStretch()
|
TDStretch::~TDStretch()
|
||||||
{
|
{
|
||||||
delete[] pMidBuffer;
|
delete[] pMidBufferUnaligned;
|
||||||
delete[] pRefMidBufferUnaligned;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -195,12 +194,17 @@ void TDStretch::getParameters(int *pSampleRate, int *pSequenceMs, int *pSeekWind
|
|||||||
// Overlaps samples in 'midBuffer' with the samples in 'pInput'
|
// Overlaps samples in 'midBuffer' with the samples in 'pInput'
|
||||||
void TDStretch::overlapMono(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput) const
|
void TDStretch::overlapMono(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput) const
|
||||||
{
|
{
|
||||||
int i, itemp;
|
int i;
|
||||||
|
SAMPLETYPE m1, m2;
|
||||||
|
|
||||||
|
m1 = (SAMPLETYPE)0;
|
||||||
|
m2 = (SAMPLETYPE)overlapLength;
|
||||||
|
|
||||||
for (i = 0; i < overlapLength ; i ++)
|
for (i = 0; i < overlapLength ; i ++)
|
||||||
{
|
{
|
||||||
itemp = overlapLength - i;
|
pOutput[i] = (pInput[i] * m1 + pMidBuffer[i] * m2 ) / overlapLength;
|
||||||
pOutput[i] = (pInput[i] * i + pMidBuffer[i] * itemp ) / overlapLength; // >> overlapDividerBits;
|
m1 += 1;
|
||||||
|
m2 -= 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -246,35 +250,17 @@ BOOL TDStretch::isQuickSeekEnabled() const
|
|||||||
// Seeks for the optimal overlap-mixing position.
|
// Seeks for the optimal overlap-mixing position.
|
||||||
int TDStretch::seekBestOverlapPosition(const SAMPLETYPE *refPos)
|
int TDStretch::seekBestOverlapPosition(const SAMPLETYPE *refPos)
|
||||||
{
|
{
|
||||||
if (channels == 2)
|
|
||||||
{
|
|
||||||
// stereo sound
|
|
||||||
if (bQuickSeek)
|
if (bQuickSeek)
|
||||||
{
|
{
|
||||||
return seekBestOverlapPositionStereoQuick(refPos);
|
return seekBestOverlapPositionQuick(refPos);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
return seekBestOverlapPositionStereo(refPos);
|
return seekBestOverlapPositionFull(refPos);
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// mono sound
|
|
||||||
if (bQuickSeek)
|
|
||||||
{
|
|
||||||
return seekBestOverlapPositionMonoQuick(refPos);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
return seekBestOverlapPositionMono(refPos);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// Overlaps samples in 'midBuffer' with the samples in 'pInputBuffer' at position
|
// Overlaps samples in 'midBuffer' with the samples in 'pInputBuffer' at position
|
||||||
// of 'ovlPos'.
|
// of 'ovlPos'.
|
||||||
inline void TDStretch::overlap(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput, uint ovlPos) const
|
inline void TDStretch::overlap(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput, uint ovlPos) const
|
||||||
@ -291,22 +277,18 @@ inline void TDStretch::overlap(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput, ui
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// Seeks for the optimal overlap-mixing position. The 'stereo' version of the
|
// Seeks for the optimal overlap-mixing position. The 'stereo' version of the
|
||||||
// routine
|
// routine
|
||||||
//
|
//
|
||||||
// The best position is determined as the position where the two overlapped
|
// The best position is determined as the position where the two overlapped
|
||||||
// sample sequences are 'most alike', in terms of the highest cross-correlation
|
// sample sequences are 'most alike', in terms of the highest cross-correlation
|
||||||
// value over the overlapping period
|
// value over the overlapping period
|
||||||
int TDStretch::seekBestOverlapPositionStereo(const SAMPLETYPE *refPos)
|
int TDStretch::seekBestOverlapPositionFull(const SAMPLETYPE *refPos)
|
||||||
{
|
{
|
||||||
int bestOffs;
|
int bestOffs;
|
||||||
double bestCorr, corr;
|
double bestCorr, corr;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
// Slopes the amplitudes of the 'midBuffer' samples
|
|
||||||
precalcCorrReferenceStereo();
|
|
||||||
|
|
||||||
bestCorr = FLT_MIN;
|
bestCorr = FLT_MIN;
|
||||||
bestOffs = 0;
|
bestOffs = 0;
|
||||||
|
|
||||||
@ -316,7 +298,7 @@ int TDStretch::seekBestOverlapPositionStereo(const SAMPLETYPE *refPos)
|
|||||||
{
|
{
|
||||||
// Calculates correlation value for the mixing position corresponding
|
// Calculates correlation value for the mixing position corresponding
|
||||||
// to 'i'
|
// to 'i'
|
||||||
corr = (double)calcCrossCorrStereo(refPos + 2 * i, pRefMidBuffer);
|
corr = calcCrossCorr(refPos + channels * i, pMidBuffer);
|
||||||
// heuristic rule to slightly favour values close to mid of the range
|
// heuristic rule to slightly favour values close to mid of the range
|
||||||
double tmp = (double)(2 * i - seekLength) / (double)seekLength;
|
double tmp = (double)(2 * i - seekLength) / (double)seekLength;
|
||||||
corr = ((corr + 0.1) * (1.0 - 0.25 * tmp * tmp));
|
corr = ((corr + 0.1) * (1.0 - 0.25 * tmp * tmp));
|
||||||
@ -341,16 +323,13 @@ int TDStretch::seekBestOverlapPositionStereo(const SAMPLETYPE *refPos)
|
|||||||
// The best position is determined as the position where the two overlapped
|
// The best position is determined as the position where the two overlapped
|
||||||
// sample sequences are 'most alike', in terms of the highest cross-correlation
|
// sample sequences are 'most alike', in terms of the highest cross-correlation
|
||||||
// value over the overlapping period
|
// value over the overlapping period
|
||||||
int TDStretch::seekBestOverlapPositionStereoQuick(const SAMPLETYPE *refPos)
|
int TDStretch::seekBestOverlapPositionQuick(const SAMPLETYPE *refPos)
|
||||||
{
|
{
|
||||||
int j;
|
int j;
|
||||||
int bestOffs;
|
int bestOffs;
|
||||||
double bestCorr, corr;
|
double bestCorr, corr;
|
||||||
int scanCount, corrOffset, tempOffset;
|
int scanCount, corrOffset, tempOffset;
|
||||||
|
|
||||||
// Slopes the amplitude of the 'midBuffer' samples
|
|
||||||
precalcCorrReferenceStereo();
|
|
||||||
|
|
||||||
bestCorr = FLT_MIN;
|
bestCorr = FLT_MIN;
|
||||||
bestOffs = _scanOffsets[0][0];
|
bestOffs = _scanOffsets[0][0];
|
||||||
corrOffset = 0;
|
corrOffset = 0;
|
||||||
@ -372,7 +351,7 @@ int TDStretch::seekBestOverlapPositionStereoQuick(const SAMPLETYPE *refPos)
|
|||||||
|
|
||||||
// Calculates correlation value for the mixing position corresponding
|
// Calculates correlation value for the mixing position corresponding
|
||||||
// to 'tempOffset'
|
// to 'tempOffset'
|
||||||
corr = (double)calcCrossCorrStereo(refPos + 2 * tempOffset, pRefMidBuffer);
|
corr = (double)calcCrossCorr(refPos + channels * tempOffset, pMidBuffer);
|
||||||
// heuristic rule to slightly favour values close to mid of the range
|
// heuristic rule to slightly favour values close to mid of the range
|
||||||
double tmp = (double)(2 * tempOffset - seekLength) / seekLength;
|
double tmp = (double)(2 * tempOffset - seekLength) / seekLength;
|
||||||
corr = ((corr + 0.1) * (1.0 - 0.25 * tmp * tmp));
|
corr = ((corr + 0.1) * (1.0 - 0.25 * tmp * tmp));
|
||||||
@ -395,111 +374,6 @@ int TDStretch::seekBestOverlapPositionStereoQuick(const SAMPLETYPE *refPos)
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
// Seeks for the optimal overlap-mixing position. The 'mono' version of the
|
|
||||||
// routine
|
|
||||||
//
|
|
||||||
// The best position is determined as the position where the two overlapped
|
|
||||||
// sample sequences are 'most alike', in terms of the highest cross-correlation
|
|
||||||
// value over the overlapping period
|
|
||||||
int TDStretch::seekBestOverlapPositionMono(const SAMPLETYPE *refPos)
|
|
||||||
{
|
|
||||||
int bestOffs;
|
|
||||||
double bestCorr, corr;
|
|
||||||
int tempOffset;
|
|
||||||
const SAMPLETYPE *compare;
|
|
||||||
|
|
||||||
// Slopes the amplitude of the 'midBuffer' samples
|
|
||||||
precalcCorrReferenceMono();
|
|
||||||
|
|
||||||
bestCorr = FLT_MIN;
|
|
||||||
bestOffs = 0;
|
|
||||||
|
|
||||||
// Scans for the best correlation value by testing each possible position
|
|
||||||
// over the permitted range.
|
|
||||||
for (tempOffset = 0; tempOffset < seekLength; tempOffset ++)
|
|
||||||
{
|
|
||||||
compare = refPos + tempOffset;
|
|
||||||
|
|
||||||
// Calculates correlation value for the mixing position corresponding
|
|
||||||
// to 'tempOffset'
|
|
||||||
corr = (double)calcCrossCorrMono(pRefMidBuffer, compare);
|
|
||||||
// heuristic rule to slightly favour values close to mid of the range
|
|
||||||
double tmp = (double)(2 * tempOffset - seekLength) / seekLength;
|
|
||||||
corr = ((corr + 0.1) * (1.0 - 0.25 * tmp * tmp));
|
|
||||||
|
|
||||||
// Checks for the highest correlation value
|
|
||||||
if (corr > bestCorr)
|
|
||||||
{
|
|
||||||
bestCorr = corr;
|
|
||||||
bestOffs = tempOffset;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// clear cross correlation routine state if necessary (is so e.g. in MMX routines).
|
|
||||||
clearCrossCorrState();
|
|
||||||
|
|
||||||
return bestOffs;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// Seeks for the optimal overlap-mixing position. The 'mono' version of the
|
|
||||||
// routine
|
|
||||||
//
|
|
||||||
// The best position is determined as the position where the two overlapped
|
|
||||||
// sample sequences are 'most alike', in terms of the highest cross-correlation
|
|
||||||
// value over the overlapping period
|
|
||||||
int TDStretch::seekBestOverlapPositionMonoQuick(const SAMPLETYPE *refPos)
|
|
||||||
{
|
|
||||||
int j;
|
|
||||||
int bestOffs;
|
|
||||||
double bestCorr, corr;
|
|
||||||
int scanCount, corrOffset, tempOffset;
|
|
||||||
|
|
||||||
// Slopes the amplitude of the 'midBuffer' samples
|
|
||||||
precalcCorrReferenceMono();
|
|
||||||
|
|
||||||
bestCorr = FLT_MIN;
|
|
||||||
bestOffs = _scanOffsets[0][0];
|
|
||||||
corrOffset = 0;
|
|
||||||
tempOffset = 0;
|
|
||||||
|
|
||||||
// Scans for the best correlation value using four-pass hierarchical search.
|
|
||||||
//
|
|
||||||
// The look-up table 'scans' has hierarchical position adjusting steps.
|
|
||||||
// In first pass the routine searhes for the highest correlation with
|
|
||||||
// relatively coarse steps, then rescans the neighbourhood of the highest
|
|
||||||
// correlation with better resolution and so on.
|
|
||||||
for (scanCount = 0;scanCount < 4; scanCount ++)
|
|
||||||
{
|
|
||||||
j = 0;
|
|
||||||
while (_scanOffsets[scanCount][j])
|
|
||||||
{
|
|
||||||
tempOffset = corrOffset + _scanOffsets[scanCount][j];
|
|
||||||
if (tempOffset >= seekLength) break;
|
|
||||||
|
|
||||||
// Calculates correlation value for the mixing position corresponding
|
|
||||||
// to 'tempOffset'
|
|
||||||
corr = (double)calcCrossCorrMono(refPos + tempOffset, pRefMidBuffer);
|
|
||||||
// heuristic rule to slightly favour values close to mid of the range
|
|
||||||
double tmp = (double)(2 * tempOffset - seekLength) / seekLength;
|
|
||||||
corr = ((corr + 0.1) * (1.0 - 0.25 * tmp * tmp));
|
|
||||||
|
|
||||||
// Checks for the highest correlation value
|
|
||||||
if (corr > bestCorr)
|
|
||||||
{
|
|
||||||
bestCorr = corr;
|
|
||||||
bestOffs = tempOffset;
|
|
||||||
}
|
|
||||||
j ++;
|
|
||||||
}
|
|
||||||
corrOffset = bestOffs;
|
|
||||||
}
|
|
||||||
// clear cross correlation routine state if necessary (is so e.g. in MMX routines).
|
|
||||||
clearCrossCorrState();
|
|
||||||
|
|
||||||
return bestOffs;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/// clear cross correlation routine state if necessary
|
/// clear cross correlation routine state if necessary
|
||||||
void TDStretch::clearCrossCorrState()
|
void TDStretch::clearCrossCorrState()
|
||||||
{
|
{
|
||||||
@ -712,15 +586,13 @@ void TDStretch::acceptNewOverlapLength(int newOverlapLength)
|
|||||||
|
|
||||||
if (overlapLength > prevOvl)
|
if (overlapLength > prevOvl)
|
||||||
{
|
{
|
||||||
delete[] pMidBuffer;
|
delete[] pMidBufferUnaligned;
|
||||||
delete[] pRefMidBufferUnaligned;
|
|
||||||
|
pMidBufferUnaligned = new SAMPLETYPE[overlapLength * 2 + 16 / sizeof(SAMPLETYPE)];
|
||||||
|
// ensure that 'pMidBuffer' is aligned to 16 byte boundary for efficiency
|
||||||
|
pMidBuffer = (SAMPLETYPE *)((((ulong)pMidBufferUnaligned) + 15) & (ulong)-16);
|
||||||
|
|
||||||
pMidBuffer = new SAMPLETYPE[overlapLength * 2];
|
|
||||||
clearMidBuffer();
|
clearMidBuffer();
|
||||||
|
|
||||||
pRefMidBufferUnaligned = new SAMPLETYPE[2 * overlapLength + 16 / sizeof(SAMPLETYPE)];
|
|
||||||
// ensure that 'pRefMidBuffer' is aligned to 16 byte boundary for efficiency
|
|
||||||
pRefMidBuffer = (SAMPLETYPE *)((((ulong)pRefMidBufferUnaligned) + 15) & (ulong)-16);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -777,43 +649,6 @@ TDStretch * TDStretch::newInstance()
|
|||||||
|
|
||||||
#ifdef SOUNDTOUCH_INTEGER_SAMPLES
|
#ifdef SOUNDTOUCH_INTEGER_SAMPLES
|
||||||
|
|
||||||
// Slopes the amplitude of the 'midBuffer' samples so that cross correlation
|
|
||||||
// is faster to calculate
|
|
||||||
void TDStretch::precalcCorrReferenceStereo()
|
|
||||||
{
|
|
||||||
int i, cnt2;
|
|
||||||
int temp, temp2;
|
|
||||||
|
|
||||||
for (i=0 ; i < (int)overlapLength ;i ++)
|
|
||||||
{
|
|
||||||
temp = i * (overlapLength - i);
|
|
||||||
cnt2 = i * 2;
|
|
||||||
|
|
||||||
temp2 = (pMidBuffer[cnt2] * temp) / slopingDivider;
|
|
||||||
pRefMidBuffer[cnt2] = (short)(temp2);
|
|
||||||
temp2 = (pMidBuffer[cnt2 + 1] * temp) / slopingDivider;
|
|
||||||
pRefMidBuffer[cnt2 + 1] = (short)(temp2);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// Slopes the amplitude of the 'midBuffer' samples so that cross correlation
|
|
||||||
// is faster to calculate
|
|
||||||
void TDStretch::precalcCorrReferenceMono()
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
long temp;
|
|
||||||
long temp2;
|
|
||||||
|
|
||||||
for (i=0 ; i < (int)overlapLength ;i ++)
|
|
||||||
{
|
|
||||||
temp = i * (overlapLength - i);
|
|
||||||
temp2 = (pMidBuffer[i] * temp) / slopingDivider;
|
|
||||||
pRefMidBuffer[i] = (short)temp2;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// Overlaps samples in 'midBuffer' with the samples in 'input'. The 'Stereo'
|
// Overlaps samples in 'midBuffer' with the samples in 'input'. The 'Stereo'
|
||||||
// version of the routine.
|
// version of the routine.
|
||||||
void TDStretch::overlapStereo(short *poutput, const short *input) const
|
void TDStretch::overlapStereo(short *poutput, const short *input) const
|
||||||
@ -864,44 +699,32 @@ void TDStretch::calculateOverlapLength(int aoverlapMs)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
long TDStretch::calcCrossCorrMono(const short *mixingPos, const short *compare) const
|
double TDStretch::calcCrossCorr(const short *mixingPos, const short *compare) const
|
||||||
{
|
{
|
||||||
long corr;
|
long corr;
|
||||||
long norm;
|
long norm;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
corr = norm = 0;
|
corr = norm = 0;
|
||||||
for (i = 1; i < overlapLength; i ++)
|
// Same routine for stereo and mono. For stereo, unroll loop for better
|
||||||
{
|
// efficiency and gives slightly better resolution against rounding.
|
||||||
corr += (mixingPos[i] * compare[i]) >> overlapDividerBits;
|
// For mono it same routine, just unrolls loop by factor of 4
|
||||||
norm += (mixingPos[i] * mixingPos[i]) >> overlapDividerBits;
|
for (i = 0; i < channels * overlapLength; i += 4)
|
||||||
}
|
|
||||||
|
|
||||||
// Normalize result by dividing by sqrt(norm) - this step is easiest
|
|
||||||
// done using floating point operation
|
|
||||||
if (norm == 0) norm = 1; // to avoid div by zero
|
|
||||||
return (long)((double)corr * SHRT_MAX / sqrt((double)norm));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
long TDStretch::calcCrossCorrStereo(const short *mixingPos, const short *compare) const
|
|
||||||
{
|
|
||||||
long corr;
|
|
||||||
long norm;
|
|
||||||
int i;
|
|
||||||
|
|
||||||
corr = norm = 0;
|
|
||||||
for (i = 2; i < 2 * overlapLength; i += 2)
|
|
||||||
{
|
{
|
||||||
corr += (mixingPos[i] * compare[i] +
|
corr += (mixingPos[i] * compare[i] +
|
||||||
mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBits;
|
mixingPos[i + 1] * compare[i + 1] +
|
||||||
norm += (mixingPos[i] * mixingPos[i] + mixingPos[i + 1] * mixingPos[i + 1]) >> overlapDividerBits;
|
mixingPos[i + 2] * compare[i + 2] +
|
||||||
|
mixingPos[i + 3] * compare[i + 3]) >> overlapDividerBits;
|
||||||
|
norm += (mixingPos[i] * mixingPos[i] +
|
||||||
|
mixingPos[i + 1] * mixingPos[i + 1] +
|
||||||
|
mixingPos[i + 2] * mixingPos[i + 2] +
|
||||||
|
mixingPos[i + 3] * mixingPos[i + 3]) >> overlapDividerBits;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Normalize result by dividing by sqrt(norm) - this step is easiest
|
// Normalize result by dividing by sqrt(norm) - this step is easiest
|
||||||
// done using floating point operation
|
// done using floating point operation
|
||||||
if (norm == 0) norm = 1; // to avoid div by zero
|
if (norm == 0) norm = 1; // to avoid div by zero
|
||||||
return (long)((double)corr * SHRT_MAX / sqrt((double)norm));
|
return (double)corr / sqrt((double)norm);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // SOUNDTOUCH_INTEGER_SAMPLES
|
#endif // SOUNDTOUCH_INTEGER_SAMPLES
|
||||||
@ -913,57 +736,26 @@ long TDStretch::calcCrossCorrStereo(const short *mixingPos, const short *compare
|
|||||||
|
|
||||||
#ifdef SOUNDTOUCH_FLOAT_SAMPLES
|
#ifdef SOUNDTOUCH_FLOAT_SAMPLES
|
||||||
|
|
||||||
|
|
||||||
// Slopes the amplitude of the 'midBuffer' samples so that cross correlation
|
|
||||||
// is faster to calculate
|
|
||||||
void TDStretch::precalcCorrReferenceStereo()
|
|
||||||
{
|
|
||||||
int i, cnt2;
|
|
||||||
float temp;
|
|
||||||
|
|
||||||
for (i=0 ; i < (int)overlapLength ;i ++)
|
|
||||||
{
|
|
||||||
temp = (float)i * (float)(overlapLength - i);
|
|
||||||
cnt2 = i * 2;
|
|
||||||
pRefMidBuffer[cnt2] = (float)(pMidBuffer[cnt2] * temp);
|
|
||||||
pRefMidBuffer[cnt2 + 1] = (float)(pMidBuffer[cnt2 + 1] * temp);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// Slopes the amplitude of the 'midBuffer' samples so that cross correlation
|
|
||||||
// is faster to calculate
|
|
||||||
void TDStretch::precalcCorrReferenceMono()
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
float temp;
|
|
||||||
|
|
||||||
for (i=0 ; i < (int)overlapLength ;i ++)
|
|
||||||
{
|
|
||||||
temp = (float)i * (float)(overlapLength - i);
|
|
||||||
pRefMidBuffer[i] = (float)(pMidBuffer[i] * temp);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// Overlaps samples in 'midBuffer' with the samples in 'pInput'
|
// Overlaps samples in 'midBuffer' with the samples in 'pInput'
|
||||||
void TDStretch::overlapStereo(float *pOutput, const float *pInput) const
|
void TDStretch::overlapStereo(float *pOutput, const float *pInput) const
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
int cnt2;
|
|
||||||
float fTemp;
|
|
||||||
float fScale;
|
float fScale;
|
||||||
float fi;
|
float f1;
|
||||||
|
float f2;
|
||||||
|
|
||||||
fScale = 1.0f / (float)overlapLength;
|
fScale = 1.0f / (float)overlapLength;
|
||||||
|
|
||||||
for (i = 0; i < (int)overlapLength ; i ++)
|
f1 = 0;
|
||||||
|
f2 = 1.0f;
|
||||||
|
|
||||||
|
for (i = 0; i < 2 * (int)overlapLength ; i += 2)
|
||||||
{
|
{
|
||||||
fTemp = (float)(overlapLength - i) * fScale;
|
pOutput[i + 0] = pInput[i + 0] * f1 + pMidBuffer[i + 0] * f2;
|
||||||
fi = (float)i * fScale;
|
pOutput[i + 1] = pInput[i + 1] * f1 + pMidBuffer[i + 1] * f2;
|
||||||
cnt2 = 2 * i;
|
|
||||||
pOutput[cnt2 + 0] = pInput[cnt2 + 0] * fi + pMidBuffer[cnt2 + 0] * fTemp;
|
f1 += fScale;
|
||||||
pOutput[cnt2 + 1] = pInput[cnt2 + 1] * fi + pMidBuffer[cnt2 + 1] * fTemp;
|
f2 -= fScale;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -984,38 +776,29 @@ void TDStretch::calculateOverlapLength(int overlapInMsec)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
double TDStretch::calcCrossCorr(const float *mixingPos, const float *compare) const
|
||||||
double TDStretch::calcCrossCorrMono(const float *mixingPos, const float *compare) const
|
|
||||||
{
|
{
|
||||||
double corr;
|
double corr;
|
||||||
double norm;
|
double norm;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
corr = norm = 0;
|
corr = norm = 0;
|
||||||
for (i = 1; i < overlapLength; i ++)
|
// Same routine for stereo and mono. For Stereo, unroll by factor of 2.
|
||||||
{
|
// For mono it's same routine yet unrollsd by factor of 4.
|
||||||
corr += mixingPos[i] * compare[i];
|
for (i = 0; i < channels * overlapLength; i += 4)
|
||||||
norm += mixingPos[i] * mixingPos[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
if (norm < 1e-9) norm = 1.0; // to avoid div by zero
|
|
||||||
return corr / sqrt(norm);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
double TDStretch::calcCrossCorrStereo(const float *mixingPos, const float *compare) const
|
|
||||||
{
|
|
||||||
double corr;
|
|
||||||
double norm;
|
|
||||||
int i;
|
|
||||||
|
|
||||||
corr = norm = 0;
|
|
||||||
for (i = 2; i < 2 * overlapLength; i += 2)
|
|
||||||
{
|
{
|
||||||
corr += mixingPos[i] * compare[i] +
|
corr += mixingPos[i] * compare[i] +
|
||||||
mixingPos[i + 1] * compare[i + 1];
|
mixingPos[i + 1] * compare[i + 1];
|
||||||
|
|
||||||
norm += mixingPos[i] * mixingPos[i] +
|
norm += mixingPos[i] * mixingPos[i] +
|
||||||
mixingPos[i + 1] * mixingPos[i + 1];
|
mixingPos[i + 1] * mixingPos[i + 1];
|
||||||
|
|
||||||
|
// unroll the loop for better CPU efficiency:
|
||||||
|
corr += mixingPos[i + 2] * compare[i + 2] +
|
||||||
|
mixingPos[i + 3] * compare[i + 3];
|
||||||
|
|
||||||
|
norm += mixingPos[i + 2] * mixingPos[i + 2] +
|
||||||
|
mixingPos[i + 3] * mixingPos[i + 3];
|
||||||
}
|
}
|
||||||
|
|
||||||
if (norm < 1e-9) norm = 1.0; // to avoid div by zero
|
if (norm < 1e-9) norm = 1.0; // to avoid div by zero
|
||||||
|
|||||||
@ -115,8 +115,7 @@ protected:
|
|||||||
float tempo;
|
float tempo;
|
||||||
|
|
||||||
SAMPLETYPE *pMidBuffer;
|
SAMPLETYPE *pMidBuffer;
|
||||||
SAMPLETYPE *pRefMidBuffer;
|
SAMPLETYPE *pMidBufferUnaligned;
|
||||||
SAMPLETYPE *pRefMidBufferUnaligned;
|
|
||||||
int overlapLength;
|
int overlapLength;
|
||||||
int seekLength;
|
int seekLength;
|
||||||
int seekWindowLength;
|
int seekWindowLength;
|
||||||
@ -140,13 +139,10 @@ protected:
|
|||||||
virtual void clearCrossCorrState();
|
virtual void clearCrossCorrState();
|
||||||
void calculateOverlapLength(int overlapMs);
|
void calculateOverlapLength(int overlapMs);
|
||||||
|
|
||||||
virtual LONG_SAMPLETYPE calcCrossCorrStereo(const SAMPLETYPE *mixingPos, const SAMPLETYPE *compare) const;
|
virtual double calcCrossCorr(const SAMPLETYPE *mixingPos, const SAMPLETYPE *compare) const;
|
||||||
virtual LONG_SAMPLETYPE calcCrossCorrMono(const SAMPLETYPE *mixingPos, const SAMPLETYPE *compare) const;
|
|
||||||
|
|
||||||
virtual int seekBestOverlapPositionStereo(const SAMPLETYPE *refPos);
|
virtual int seekBestOverlapPositionFull(const SAMPLETYPE *refPos);
|
||||||
virtual int seekBestOverlapPositionStereoQuick(const SAMPLETYPE *refPos);
|
virtual int seekBestOverlapPositionQuick(const SAMPLETYPE *refPos);
|
||||||
virtual int seekBestOverlapPositionMono(const SAMPLETYPE *refPos);
|
|
||||||
virtual int seekBestOverlapPositionMonoQuick(const SAMPLETYPE *refPos);
|
|
||||||
int seekBestOverlapPosition(const SAMPLETYPE *refPos);
|
int seekBestOverlapPosition(const SAMPLETYPE *refPos);
|
||||||
|
|
||||||
virtual void overlapStereo(SAMPLETYPE *output, const SAMPLETYPE *input) const;
|
virtual void overlapStereo(SAMPLETYPE *output, const SAMPLETYPE *input) const;
|
||||||
@ -155,9 +151,6 @@ protected:
|
|||||||
void clearMidBuffer();
|
void clearMidBuffer();
|
||||||
void overlap(SAMPLETYPE *output, const SAMPLETYPE *input, uint ovlPos) const;
|
void overlap(SAMPLETYPE *output, const SAMPLETYPE *input, uint ovlPos) const;
|
||||||
|
|
||||||
void precalcCorrReferenceMono();
|
|
||||||
void precalcCorrReferenceStereo();
|
|
||||||
|
|
||||||
void calcSeqParameters();
|
void calcSeqParameters();
|
||||||
|
|
||||||
/// Changes the tempo of the given sound samples.
|
/// Changes the tempo of the given sound samples.
|
||||||
@ -254,7 +247,7 @@ public:
|
|||||||
class TDStretchMMX : public TDStretch
|
class TDStretchMMX : public TDStretch
|
||||||
{
|
{
|
||||||
protected:
|
protected:
|
||||||
long calcCrossCorrStereo(const short *mixingPos, const short *compare) const;
|
double calcCrossCorr(const short *mixingPos, const short *compare) const;
|
||||||
virtual void overlapStereo(short *output, const short *input) const;
|
virtual void overlapStereo(short *output, const short *input) const;
|
||||||
virtual void clearCrossCorrState();
|
virtual void clearCrossCorrState();
|
||||||
};
|
};
|
||||||
@ -266,7 +259,7 @@ public:
|
|||||||
class TDStretchSSE : public TDStretch
|
class TDStretchSSE : public TDStretch
|
||||||
{
|
{
|
||||||
protected:
|
protected:
|
||||||
double calcCrossCorrStereo(const float *mixingPos, const float *compare) const;
|
double calcCrossCorr(const float *mixingPos, const float *compare) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif /// SOUNDTOUCH_ALLOW_SSE
|
#endif /// SOUNDTOUCH_ALLOW_SSE
|
||||||
|
|||||||
@ -68,7 +68,7 @@ using namespace soundtouch;
|
|||||||
|
|
||||||
|
|
||||||
// Calculates cross correlation of two buffers
|
// Calculates cross correlation of two buffers
|
||||||
long TDStretchMMX::calcCrossCorrStereo(const short *pV1, const short *pV2) const
|
double TDStretchMMX::calcCrossCorr(const short *pV1, const short *pV2) const
|
||||||
{
|
{
|
||||||
const __m64 *pVec1, *pVec2;
|
const __m64 *pVec1, *pVec2;
|
||||||
__m64 shifter;
|
__m64 shifter;
|
||||||
@ -82,9 +82,9 @@ long TDStretchMMX::calcCrossCorrStereo(const short *pV1, const short *pV2) const
|
|||||||
shifter = _m_from_int(overlapDividerBits);
|
shifter = _m_from_int(overlapDividerBits);
|
||||||
normaccu = accu = _mm_setzero_si64();
|
normaccu = accu = _mm_setzero_si64();
|
||||||
|
|
||||||
// Process 4 parallel sets of 2 * stereo samples each during each
|
// Process 4 parallel sets of 2 * stereo samples or 4 * mono samples
|
||||||
// round to improve CPU-level parallellization.
|
// during each round for improved CPU-level parallellization.
|
||||||
for (i = 0; i < overlapLength / 8; i ++)
|
for (i = 0; i < channels * overlapLength / 16; i ++)
|
||||||
{
|
{
|
||||||
__m64 temp, temp2;
|
__m64 temp, temp2;
|
||||||
|
|
||||||
@ -126,7 +126,8 @@ long TDStretchMMX::calcCrossCorrStereo(const short *pV1, const short *pV2) const
|
|||||||
// Normalize result by dividing by sqrt(norm) - this step is easiest
|
// Normalize result by dividing by sqrt(norm) - this step is easiest
|
||||||
// done using floating point operation
|
// done using floating point operation
|
||||||
if (norm == 0) norm = 1; // to avoid div by zero
|
if (norm == 0) norm = 1; // to avoid div by zero
|
||||||
return (long)((double)corr * USHRT_MAX / sqrt((double)norm));
|
|
||||||
|
return (double)corr / sqrt((double)norm);
|
||||||
// Note: Warning about the missing EMMS instruction is harmless
|
// Note: Warning about the missing EMMS instruction is harmless
|
||||||
// as it'll be called elsewhere.
|
// as it'll be called elsewhere.
|
||||||
}
|
}
|
||||||
|
|||||||
@ -71,7 +71,7 @@ using namespace soundtouch;
|
|||||||
#include <math.h>
|
#include <math.h>
|
||||||
|
|
||||||
// Calculates cross correlation of two buffers
|
// Calculates cross correlation of two buffers
|
||||||
double TDStretchSSE::calcCrossCorrStereo(const float *pV1, const float *pV2) const
|
double TDStretchSSE::calcCrossCorr(const float *pV1, const float *pV2) const
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
const float *pVec1;
|
const float *pVec1;
|
||||||
@ -110,8 +110,9 @@ double TDStretchSSE::calcCrossCorrStereo(const float *pV1, const float *pV2) con
|
|||||||
pVec2 = (const __m128*)pV2;
|
pVec2 = (const __m128*)pV2;
|
||||||
vSum = vNorm = _mm_setzero_ps();
|
vSum = vNorm = _mm_setzero_ps();
|
||||||
|
|
||||||
// Unroll the loop by factor of 4 * 4 operations
|
// Unroll the loop by factor of 4 * 4 operations. Use same routine for
|
||||||
for (i = 0; i < overlapLength / 8; i ++)
|
// stereo & mono, for mono it just means twice the amount of unrolling.
|
||||||
|
for (i = 0; i < channels * overlapLength / 16; i ++)
|
||||||
{
|
{
|
||||||
__m128 vTemp;
|
__m128 vTemp;
|
||||||
// vSum += pV1[0..3] * pV2[0..3]
|
// vSum += pV1[0..3] * pV2[0..3]
|
||||||
@ -152,7 +153,7 @@ double TDStretchSSE::calcCrossCorrStereo(const float *pV1, const float *pV2) con
|
|||||||
|
|
||||||
// Calculates the cross-correlation value between 'pV1' and 'pV2' vectors
|
// Calculates the cross-correlation value between 'pV1' and 'pV2' vectors
|
||||||
corr = norm = 0.0;
|
corr = norm = 0.0;
|
||||||
for (i = 0; i < overlapLength / 8; i ++)
|
for (i = 0; i < channels * overlapLength / 16; i ++)
|
||||||
{
|
{
|
||||||
corr += pV1[0] * pV2[0] +
|
corr += pV1[0] * pV2[0] +
|
||||||
pV1[1] * pV2[1] +
|
pV1[1] * pV2[1] +
|
||||||
@ -178,74 +179,6 @@ double TDStretchSSE::calcCrossCorrStereo(const float *pV1, const float *pV2) con
|
|||||||
}
|
}
|
||||||
return corr / sqrt(norm);
|
return corr / sqrt(norm);
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* This is a bit outdated, corresponding routine in assembler. This may be teeny-weeny bit
|
|
||||||
faster than intrinsic version, but more difficult to maintain & get compiled on multiple
|
|
||||||
platforms.
|
|
||||||
|
|
||||||
uint overlapLengthLocal = overlapLength;
|
|
||||||
float corr;
|
|
||||||
|
|
||||||
_asm
|
|
||||||
{
|
|
||||||
// Very important note: data in 'pV2' _must_ be aligned to
|
|
||||||
// 16-byte boundary!
|
|
||||||
|
|
||||||
// give prefetch hints to CPU of what data are to be needed soonish
|
|
||||||
// give more aggressive hints on pV1 as that changes while pV2 stays
|
|
||||||
// same between runs
|
|
||||||
prefetcht0 [pV1]
|
|
||||||
prefetcht0 [pV2]
|
|
||||||
prefetcht0 [pV1 + 32]
|
|
||||||
|
|
||||||
mov eax, dword ptr pV1
|
|
||||||
mov ebx, dword ptr pV2
|
|
||||||
|
|
||||||
xorps xmm0, xmm0
|
|
||||||
|
|
||||||
mov ecx, overlapLengthLocal
|
|
||||||
shr ecx, 3 // div by eight
|
|
||||||
|
|
||||||
loop1:
|
|
||||||
prefetcht0 [eax + 64] // give a prefetch hint to CPU what data are to be needed soonish
|
|
||||||
prefetcht0 [ebx + 32] // give a prefetch hint to CPU what data are to be needed soonish
|
|
||||||
movups xmm1, [eax]
|
|
||||||
mulps xmm1, [ebx]
|
|
||||||
addps xmm0, xmm1
|
|
||||||
|
|
||||||
movups xmm2, [eax + 16]
|
|
||||||
mulps xmm2, [ebx + 16]
|
|
||||||
addps xmm0, xmm2
|
|
||||||
|
|
||||||
prefetcht0 [eax + 96] // give a prefetch hint to CPU what data are to be needed soonish
|
|
||||||
prefetcht0 [ebx + 64] // give a prefetch hint to CPU what data are to be needed soonish
|
|
||||||
|
|
||||||
movups xmm3, [eax + 32]
|
|
||||||
mulps xmm3, [ebx + 32]
|
|
||||||
addps xmm0, xmm3
|
|
||||||
|
|
||||||
movups xmm4, [eax + 48]
|
|
||||||
mulps xmm4, [ebx + 48]
|
|
||||||
addps xmm0, xmm4
|
|
||||||
|
|
||||||
add eax, 64
|
|
||||||
add ebx, 64
|
|
||||||
|
|
||||||
dec ecx
|
|
||||||
jnz loop1
|
|
||||||
|
|
||||||
// add the four floats of xmm0 together and return the result.
|
|
||||||
|
|
||||||
movhlps xmm1, xmm0 // move 3 & 4 of xmm0 to 1 & 2 of xmm1
|
|
||||||
addps xmm1, xmm0
|
|
||||||
movaps xmm2, xmm1
|
|
||||||
shufps xmm2, xmm2, 0x01 // move 2 of xmm2 as 1 of xmm2
|
|
||||||
addss xmm2, xmm1
|
|
||||||
movss corr, xmm2
|
|
||||||
}
|
|
||||||
|
|
||||||
return (double)corr;
|
|
||||||
*/
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user