1. // This means in a separable convolution, for each pass:
  2. // - if point filtering: 12+1+12 texture fetches are needed
  3. // - if linear filtering: 6+1+6 texture fetches are needed
  4. #define NUM_WEIGHTS (1+12)
  5.  
  6. void GenGaussian( int imageWidth, float sigma, std::vector<float>& weights, std::vector<float>& offsets, int& decimations, bool interlaced )
  7. {
  8. float stepx = interlaced ? 2.0f : 1.0f;
  9.  
  10. // Generate gaussian curve weights, and at the same time decide whether
  11. // to scale the image down before filtering, and how much (decimations).
  12. for( decimations = 0; ; ++decimations, imageWidth /= 2 )
  13. {
  14. weights.clear();
  15. offsets.clear();
  16.  
  17. if( sigma == 0 )
  18. {
  19. weights.push_back(1.0f);
  20. offsets.push_back(0.0f);
  21. break;
  22. }
  23.  
  24. if( imageWidth == 1 )
  25. break;
  26.  
  27. float x = 0.0f;
  28. float g;
  29. do
  30. {
  31. g = exp(-x*x/(2.0f*sigma));
  32. weights.push_back( g );
  33. offsets.push_back( x );
  34. x += stepx * (1 << decimations);
  35.  
  36. } while( g >= 1.0f/255.0f );
  37.  
  38. if( weights.size() <= NUM_WEIGHTS )
  39. break;
  40. }
  41.  
  42. // If we decimated all the way down to imageWidth 1,
  43. // no need to filter at all.
  44. if( weights.empty() )
  45. return;
  46.  
  47. for( size_t i = weights.size(); i < NUM_WEIGHTS; ++i )
  48. {
  49. weights.push_back(0.0f);
  50. offsets.push_back(0.0f);
  51. }
  52.  
  53. // Normalize the weight table so we net 1.0 when filtering.
  54. // Note that entries 1..n are scaled by two, because the
  55. // pixel shader will use them for the samples both to the
  56. // left and right of the center sample.
  57. // |...|3|2|1|0|1|2|3|...|
  58.  
  59. float sum = 0.0f;
  60. for( size_t i = 0; i < weights.size(); ++i )
  61. sum += weights[i] * (i > 0 ? 2.0f : 1.0f);
  62.  
  63. for( size_t i = 0; i < weights.size(); ++i )
  64. weights[i] /= sum;
  65. }
  66.  
  67. void OptimizeBilinear( const std::vector<float> weights,
  68. const std::vector<float> offsets,
  69. std::vector<float>& bweights,
  70. std::vector<float>& boffsets )
  71. {
  72. bweights.push_back(weights[0]);
  73. boffsets.push_back(0.0f);
  74. for( size_t i = 1; i < weights.size(); i += 2 )
  75. {
  76. float w1 = weights[i + 0];
  77. float w2 = weights[i + 1];
  78. float o0 = offsets[i + 0];
  79. float o1 = offsets[i + 1];
  80. float w12 = w1 + w2; // overall weight for bilinear sample
  81. float k = w12 > 0.0001f ? w2/w12 : 0.0f; // bilinear 'k'
  82.  
  83. bweights.push_back( w12 );
  84. boffsets.push_back( o0 + k * (o1-o0) );
  85. }
  86. }
  87.  
  88. void CLJob::SetupHBlur( engine::PassConfig& pc, int& decim )
  89. {
  90. std::vector<float> weights, offsets, bweights, boffsets;
  91. GenGaussian( m_Width, m_CLProps.m_HBlur, weights, offsets, decim, false );
  92.  
  93. if( !weights.empty() )
  94. {
  95. OptimizeBilinear( weights, offsets, bweights, boffsets );
  96.  
  97. for( size_t i = 0; i < bweights.size(); ++i )
  98. {
  99. pc.SetVsParam( 4+i, boffsets[i]/m_Width, 0.0f, bweights[i], 0.0f );
  100. pc.SetPsParam( 4+i, boffsets[i]/m_Width, 0.0f, bweights[i], 0.0f );
  101. }
  102. }
  103. }
  104.