Files
obs-studio/libobs/data/bicubic_scale.effect
jpark37 2721ac4a85 libobs: Optimize bicubic shader
Use bilinear filtering to reduce 16 taps to 9 for the regular path. This
works because the middle weights are always between 0 and 1, allowing
texture coordinates to be placed strategically to sample correct ratios.
I'm not sure about the undistort path, so I've left that alone.

Also remove weight normalization. I'm not seeing that make even a small
difference.

Intel HD Graphics 530, D3D11
644x478 -> 1323x1080: 1790 us -> 1279 us
1920x1080 -> 1280x720: 1301 us -> 918 us

References:
https://entropymine.com/imageworsener/bicubic/
http://vec3.ca/bicubic-filtering-in-fewer-taps/
http://developer.download.nvidia.com/books/HTML/gpugems/gpugems_ch24.html
2019-07-25 22:21:11 -07:00

189 lines
4.6 KiB
Plaintext

/*
* bicubic sharper (better for downscaling)
* note - this shader is adapted from the GPL bsnes shader, very good stuff
* there.
*/
uniform float4x4 ViewProj;
uniform texture2d image;
uniform float2 base_dimension;
uniform float2 base_dimension_i;
uniform float undistort_factor = 1.0;
sampler_state textureSampler {
Filter = Linear;
AddressU = Clamp;
AddressV = Clamp;
};
struct VertData {
float4 pos : POSITION;
float2 uv : TEXCOORD0;
};
struct VertOut {
float2 uv : TEXCOORD0;
float4 pos : POSITION;
};
struct FragData {
float2 uv : TEXCOORD0;
};
VertOut VSDefault(VertData v_in)
{
VertOut vert_out;
vert_out.uv = v_in.uv;
vert_out.pos = mul(float4(v_in.pos.xyz, 1.0), ViewProj);
return vert_out;
}
float weight(float x)
{
float ax = abs(x);
/* Sharper version. May look better in some cases. B=0, C=0.75 */
if (ax < 2.0) {
float six_i = 1.0 / 6.0;
float x_squared = x * x;
if (ax < 1.0) {
return (x_squared * (7.5 * ax + (-13.5))) * six_i + 1.0;
}
return (x_squared * ((-4.5) * ax + 22.5) + (-36.0) * ax) * six_i + 3.0;
}
return 0.0;
}
float4 weight4(float x)
{
return float4(
weight(x - 2.0),
weight(x - 1.0),
weight(x),
weight(x + 1.0));
}
float AspectUndistortX(float x, float a)
{
// The higher the power, the longer the linear part will be.
return (1.0 - a) * (x * x * x * x * x) + a * x;
}
float AspectUndistortU(float u)
{
// Normalize texture coord to -1.0 to 1.0 range, and back.
return AspectUndistortX((u - 0.5) * 2.0, undistort_factor) * 0.5 + 0.5;
}
float2 undistort_coord(float xpos, float ypos)
{
return float2(AspectUndistortU(xpos), ypos);
}
float4 undistort_pixel(float xpos, float ypos)
{
return image.Sample(textureSampler, undistort_coord(xpos, ypos));
}
float4 undistort_line(float4 xpos, float ypos, float4 rowtaps)
{
return undistort_pixel(xpos.x, ypos) * rowtaps.x +
undistort_pixel(xpos.y, ypos) * rowtaps.y +
undistort_pixel(xpos.z, ypos) * rowtaps.z +
undistort_pixel(xpos.w, ypos) * rowtaps.w;
}
float4 DrawBicubic(FragData f_in, bool undistort)
{
float2 stepxy = base_dimension_i;
float2 pos = f_in.uv + stepxy * 0.5;
float2 f = frac(pos * base_dimension);
float4 rowtaps = weight4(1.0 - f.x);
float4 coltaps = weight4(1.0 - f.y);
float2 uv0 = (-1.5 - f) * stepxy + pos;
float2 uv1 = uv0 + stepxy;
float2 uv2 = uv1 + stepxy;
float2 uv3 = uv2 + stepxy;
if (undistort) {
float4 xpos = float4(uv0.x, uv1.x, uv2.x, uv3.x);
return undistort_line(xpos, uv0.y, rowtaps) * coltaps.x +
undistort_line(xpos, uv1.y, rowtaps) * coltaps.y +
undistort_line(xpos, uv2.y, rowtaps) * coltaps.z +
undistort_line(xpos, uv3.y, rowtaps) * coltaps.w;
}
float u_weight_sum = rowtaps.y + rowtaps.z;
float u_middle_offset = rowtaps.z * stepxy.x / u_weight_sum;
float u_middle = uv1.x + u_middle_offset;
float v_weight_sum = coltaps.y + coltaps.z;
float v_middle_offset = coltaps.z * stepxy.y / v_weight_sum;
float v_middle = uv1.y + v_middle_offset;
int2 coord_top_left = int2(max(uv0 * base_dimension, 0.5));
int2 coord_bottom_right = int2(min(uv3 * base_dimension, base_dimension - 0.5));
float4 top = image.Load(int3(coord_top_left, 0)) * rowtaps.x;
top += image.Sample(textureSampler, float2(u_middle, uv0.y)) * u_weight_sum;
top += image.Load(int3(coord_bottom_right.x, coord_top_left.y, 0)) * rowtaps.w;
float4 total = top * coltaps.x;
float4 middle = image.Sample(textureSampler, float2(uv0.x, v_middle)) * rowtaps.x;
middle += image.Sample(textureSampler, float2(u_middle, v_middle)) * u_weight_sum;
middle += image.Sample(textureSampler, float2(uv3.x, v_middle)) * rowtaps.w;
total += middle * v_weight_sum;
float4 bottom = image.Load(int3(coord_top_left.x, coord_bottom_right.y, 0)) * rowtaps.x;
bottom += image.Sample(textureSampler, float2(u_middle, uv3.y)) * u_weight_sum;
bottom += image.Load(int3(coord_bottom_right, 0)) * rowtaps.w;
total += bottom * coltaps.w;
return total;
}
float4 PSDrawBicubicRGBA(FragData f_in, bool undistort) : TARGET
{
return DrawBicubic(f_in, undistort);
}
float4 PSDrawBicubicRGBADivide(FragData f_in) : TARGET
{
float4 rgba = DrawBicubic(f_in, false);
float alpha = rgba.a;
float multiplier = (alpha > 0.0) ? (1.0 / alpha) : 0.0;
return float4(rgba.rgb * multiplier, alpha);
}
technique Draw
{
pass
{
vertex_shader = VSDefault(v_in);
pixel_shader = PSDrawBicubicRGBA(f_in, false);
}
}
technique DrawAlphaDivide
{
pass
{
vertex_shader = VSDefault(v_in);
pixel_shader = PSDrawBicubicRGBADivide(f_in);
}
}
technique DrawUndistort
{
pass
{
vertex_shader = VSDefault(v_in);
pixel_shader = PSDrawBicubicRGBA(f_in, true);
}
}