// This means in a separable convolution, for each pass:
// - if point filtering: 12+1+12 texture fetches are needed
// - if linear filtering: 6+1+6 texture fetches are needed
#define NUM_WEIGHTS (1+12)
void GenGaussian( int imageWidth, float sigma, std::vector<float>& weights, std::vector<float>& offsets, int& decimations, bool interlaced )
{
float stepx = interlaced ? 2.0f : 1.0f;
// Generate gaussian curve weights, and at the same time decide whether
// to scale the image down before filtering, and how much (decimations).
for( decimations = 0; ; ++decimations, imageWidth /= 2 )
{
weights.clear();
offsets.clear();
if( sigma == 0 )
{
weights.push_back(1.0f);
offsets.push_back(0.0f);
break;
}
if( imageWidth == 1 )
break;
float x = 0.0f;
float g;
do
{
g = exp(-x*x/(2.0f*sigma));
weights.push_back( g );
offsets.push_back( x );
x += stepx * (1 << decimations);
} while( g >= 1.0f/255.0f );
if( weights.size() <= NUM_WEIGHTS )
break;
}
// If we decimated all the way down to imageWidth 1,
// no need to filter at all.
if( weights.empty() )
return;
for( size_t i = weights.size(); i < NUM_WEIGHTS; ++i )
{
weights.push_back(0.0f);
offsets.push_back(0.0f);
}
// Normalize the weight table so we net 1.0 when filtering.
// Note that entries 1..n are scaled by two, because the
// pixel shader will use them for the samples both to the
// left and right of the center sample.
// |...|3|2|1|0|1|2|3|...|
float sum = 0.0f;
for( size_t i = 0; i < weights.size(); ++i )
sum += weights[i] * (i > 0 ? 2.0f : 1.0f);
for( size_t i = 0; i < weights.size(); ++i )
weights[i] /= sum;
}
void OptimizeBilinear( const std::vector<float> weights,
const std::vector<float> offsets,
std::vector<float>& bweights,
std::vector<float>& boffsets )
{
bweights.push_back(weights[0]);
boffsets.push_back(0.0f);
for( size_t i = 1; i < weights.size(); i += 2 )
{
float w1 = weights[i + 0];
float w2 = weights[i + 1];
float o0 = offsets[i + 0];
float o1 = offsets[i + 1];
float w12 = w1 + w2; // overall weight for bilinear sample
float k = w12 > 0.0001f ? w2/w12 : 0.0f; // bilinear 'k'
bweights.push_back( w12 );
boffsets.push_back( o0 + k * (o1-o0) );
}
}
void CLJob::SetupHBlur( engine::PassConfig& pc, int& decim )
{
std::vector<float> weights, offsets, bweights, boffsets;
GenGaussian( m_Width, m_CLProps.m_HBlur, weights, offsets, decim, false );
if( !weights.empty() )
{
OptimizeBilinear( weights, offsets, bweights, boffsets );
for( size_t i = 0; i < bweights.size(); ++i )
{
pc.SetVsParam( 4+i, boffsets[i]/m_Width, 0.0f, bweights[i], 0.0f );
pc.SetPsParam( 4+i, boffsets[i]/m_Width, 0.0f, bweights[i], 0.0f );
}
}
}