mpv-conf/shaders/ravu-r2-yuv.glsl
2021-12-03 20:50:08 +08:00

356 lines
34 KiB
GLSL

//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//!DESC RAVU (step1, yuv, r2, compute)
//!HOOK NATIVE
//!BIND HOOKED
//!BIND ravu_lut2
//!SAVE ravu_int11
//!WHEN HOOKED.w OUTPUT.w / 0.707106 < HOOKED.h OUTPUT.h / 0.707106 < * LUMA.w 0 > *
//!COMPUTE 32 8
shared vec3 inp0[385];
shared float inp_luma0[385];
void hook() {
ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize);
int local_pos = int(gl_LocalInvocationID.x) * 11 + int(gl_LocalInvocationID.y);
for (int id = int(gl_LocalInvocationIndex); id < 385; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) {
int x = id / 11, y = id % 11;
inp0[id] = HOOKED_tex(HOOKED_pt * vec2(float(group_base.x+x)+(-0.5), float(group_base.y+y)+(-0.5))).xyz;
inp_luma0[id] = inp0[id].x;
}
groupMemoryBarrier();
barrier();
{
float luma0 = inp_luma0[local_pos + 0];
float luma4 = inp_luma0[local_pos + 11];
float luma5 = inp_luma0[local_pos + 12];
float luma6 = inp_luma0[local_pos + 13];
float luma7 = inp_luma0[local_pos + 14];
float luma1 = inp_luma0[local_pos + 1];
float luma8 = inp_luma0[local_pos + 22];
float luma9 = inp_luma0[local_pos + 23];
float luma10 = inp_luma0[local_pos + 24];
float luma11 = inp_luma0[local_pos + 25];
float luma2 = inp_luma0[local_pos + 2];
float luma12 = inp_luma0[local_pos + 33];
float luma13 = inp_luma0[local_pos + 34];
float luma14 = inp_luma0[local_pos + 35];
float luma15 = inp_luma0[local_pos + 36];
float luma3 = inp_luma0[local_pos + 3];
vec3 abd = vec3(0.0);
float gx, gy;
gx = (luma4-luma0);
gy = (luma1-luma0);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (luma5-luma1);
gy = (luma2-luma0)/2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma6-luma2);
gy = (luma3-luma1)/2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma7-luma3);
gy = (luma3-luma2);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (luma8-luma0)/2.0;
gy = (luma5-luma4);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma9-luma1)/2.0;
gy = (luma6-luma4)/2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (luma10-luma2)/2.0;
gy = (luma7-luma5)/2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (luma11-luma3)/2.0;
gy = (luma7-luma6);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma12-luma4)/2.0;
gy = (luma9-luma8);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma13-luma5)/2.0;
gy = (luma10-luma8)/2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (luma14-luma6)/2.0;
gy = (luma11-luma9)/2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (luma15-luma7)/2.0;
gy = (luma11-luma10);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma12-luma8);
gy = (luma13-luma12);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (luma13-luma9);
gy = (luma14-luma12)/2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma14-luma10);
gy = (luma15-luma13)/2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma15-luma11);
gy = (luma15-luma14);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
float a = abd.x, b = abd.y, d = abd.z;
float T = a + d, D = a * d - b * b;
float delta = sqrt(max(T * T / 4.0 - D, 0.0));
float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta;
float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2);
float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7);
float lambda = sqrtL1;
float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7);
float angle = floor(theta * 24.0 / 3.141592653589793);
float strength = clamp(floor(log2(lambda * 2000.0 + 1.192092896e-7)), 0.0, 8.0);
float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5);
float coord_y = ((angle * 9.0 + strength) * 3.0 + coherence + 0.5) / 648.0;
vec3 res = vec3(0.0);
vec4 w;
w = texture(ravu_lut2, vec2(0.25, coord_y));
res += (inp0[local_pos + 0] + inp0[local_pos + 36]) * w[0];
res += (inp0[local_pos + 1] + inp0[local_pos + 35]) * w[1];
res += (inp0[local_pos + 2] + inp0[local_pos + 34]) * w[2];
res += (inp0[local_pos + 3] + inp0[local_pos + 33]) * w[3];
w = texture(ravu_lut2, vec2(0.75, coord_y));
res += (inp0[local_pos + 11] + inp0[local_pos + 25]) * w[0];
res += (inp0[local_pos + 12] + inp0[local_pos + 24]) * w[1];
res += (inp0[local_pos + 13] + inp0[local_pos + 23]) * w[2];
res += (inp0[local_pos + 14] + inp0[local_pos + 22]) * w[3];
res = clamp(res, 0.0, 1.0);
imageStore(out_image, ivec2(gl_GlobalInvocationID), vec4(res, 1.0));
}
}
//!DESC RAVU (step2, yuv, r2, compute)
//!HOOK NATIVE
//!BIND HOOKED
//!BIND ravu_lut2
//!BIND ravu_int11
//!WIDTH 2 HOOKED.w *
//!HEIGHT 2 HOOKED.h *
//!OFFSET -0.500000 -0.500000
//!WHEN HOOKED.w OUTPUT.w / 0.707106 < HOOKED.h OUTPUT.h / 0.707106 < * LUMA.w 0 > *
//!COMPUTE 64 16 32 8
shared vec3 inp0[385];
shared float inp_luma0[385];
shared vec3 inp1[385];
shared float inp_luma1[385];
void hook() {
ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize);
int local_pos = int(gl_LocalInvocationID.x) * 11 + int(gl_LocalInvocationID.y);
for (int id = int(gl_LocalInvocationIndex); id < 385; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) {
int x = id / 11, y = id % 11;
inp0[id] = ravu_int11_tex(ravu_int11_pt * vec2(float(group_base.x+x)+(-1.5), float(group_base.y+y)+(-1.5))).xyz;
inp_luma0[id] = inp0[id].x;
}
for (int id = int(gl_LocalInvocationIndex); id < 385; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) {
int x = id / 11, y = id % 11;
inp1[id] = HOOKED_tex(HOOKED_pt * vec2(float(group_base.x+x)+(-0.5), float(group_base.y+y)+(-0.5))).xyz;
inp_luma1[id] = inp1[id].x;
}
groupMemoryBarrier();
barrier();
{
float luma8 = inp_luma0[local_pos + 12];
float luma5 = inp_luma0[local_pos + 13];
float luma2 = inp_luma0[local_pos + 14];
float luma13 = inp_luma0[local_pos + 23];
float luma10 = inp_luma0[local_pos + 24];
float luma7 = inp_luma0[local_pos + 25];
float luma0 = inp_luma0[local_pos + 2];
float luma15 = inp_luma0[local_pos + 35];
float luma12 = inp_luma1[local_pos + 11];
float luma9 = inp_luma1[local_pos + 12];
float luma6 = inp_luma1[local_pos + 13];
float luma3 = inp_luma1[local_pos + 14];
float luma4 = inp_luma1[local_pos + 1];
float luma14 = inp_luma1[local_pos + 23];
float luma11 = inp_luma1[local_pos + 24];
float luma1 = inp_luma1[local_pos + 2];
vec3 abd = vec3(0.0);
float gx, gy;
gx = (luma4-luma0);
gy = (luma1-luma0);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (luma5-luma1);
gy = (luma2-luma0)/2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma6-luma2);
gy = (luma3-luma1)/2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma7-luma3);
gy = (luma3-luma2);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (luma8-luma0)/2.0;
gy = (luma5-luma4);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma9-luma1)/2.0;
gy = (luma6-luma4)/2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (luma10-luma2)/2.0;
gy = (luma7-luma5)/2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (luma11-luma3)/2.0;
gy = (luma7-luma6);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma12-luma4)/2.0;
gy = (luma9-luma8);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma13-luma5)/2.0;
gy = (luma10-luma8)/2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (luma14-luma6)/2.0;
gy = (luma11-luma9)/2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (luma15-luma7)/2.0;
gy = (luma11-luma10);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma12-luma8);
gy = (luma13-luma12);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (luma13-luma9);
gy = (luma14-luma12)/2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma14-luma10);
gy = (luma15-luma13)/2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma15-luma11);
gy = (luma15-luma14);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
float a = abd.x, b = abd.y, d = abd.z;
float T = a + d, D = a * d - b * b;
float delta = sqrt(max(T * T / 4.0 - D, 0.0));
float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta;
float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2);
float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7);
float lambda = sqrtL1;
float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7);
float angle = floor(theta * 24.0 / 3.141592653589793);
float strength = clamp(floor(log2(lambda * 2000.0 + 1.192092896e-7)), 0.0, 8.0);
float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5);
float coord_y = ((angle * 9.0 + strength) * 3.0 + coherence + 0.5) / 648.0;
vec3 res = vec3(0.0);
vec4 w;
w = texture(ravu_lut2, vec2(0.25, coord_y));
res += (inp0[local_pos + 2] + inp0[local_pos + 35]) * w[0];
res += (inp1[local_pos + 2] + inp1[local_pos + 23]) * w[1];
res += (inp0[local_pos + 14] + inp0[local_pos + 23]) * w[2];
res += (inp1[local_pos + 14] + inp1[local_pos + 11]) * w[3];
w = texture(ravu_lut2, vec2(0.75, coord_y));
res += (inp1[local_pos + 1] + inp1[local_pos + 24]) * w[0];
res += (inp0[local_pos + 13] + inp0[local_pos + 24]) * w[1];
res += (inp1[local_pos + 13] + inp1[local_pos + 12]) * w[2];
res += (inp0[local_pos + 25] + inp0[local_pos + 12]) * w[3];
res = clamp(res, 0.0, 1.0);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(0, 1), vec4(res, 1.0));
}
{
float luma4 = inp_luma0[local_pos + 12];
float luma1 = inp_luma0[local_pos + 13];
float luma12 = inp_luma0[local_pos + 22];
float luma9 = inp_luma0[local_pos + 23];
float luma6 = inp_luma0[local_pos + 24];
float luma3 = inp_luma0[local_pos + 25];
float luma14 = inp_luma0[local_pos + 34];
float luma11 = inp_luma0[local_pos + 35];
float luma8 = inp_luma1[local_pos + 11];
float luma5 = inp_luma1[local_pos + 12];
float luma2 = inp_luma1[local_pos + 13];
float luma0 = inp_luma1[local_pos + 1];
float luma13 = inp_luma1[local_pos + 22];
float luma10 = inp_luma1[local_pos + 23];
float luma7 = inp_luma1[local_pos + 24];
float luma15 = inp_luma1[local_pos + 34];
vec3 abd = vec3(0.0);
float gx, gy;
gx = (luma4-luma0);
gy = (luma1-luma0);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (luma5-luma1);
gy = (luma2-luma0)/2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma6-luma2);
gy = (luma3-luma1)/2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma7-luma3);
gy = (luma3-luma2);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (luma8-luma0)/2.0;
gy = (luma5-luma4);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma9-luma1)/2.0;
gy = (luma6-luma4)/2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (luma10-luma2)/2.0;
gy = (luma7-luma5)/2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (luma11-luma3)/2.0;
gy = (luma7-luma6);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma12-luma4)/2.0;
gy = (luma9-luma8);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma13-luma5)/2.0;
gy = (luma10-luma8)/2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (luma14-luma6)/2.0;
gy = (luma11-luma9)/2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (luma15-luma7)/2.0;
gy = (luma11-luma10);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma12-luma8);
gy = (luma13-luma12);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (luma13-luma9);
gy = (luma14-luma12)/2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma14-luma10);
gy = (luma15-luma13)/2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma15-luma11);
gy = (luma15-luma14);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
float a = abd.x, b = abd.y, d = abd.z;
float T = a + d, D = a * d - b * b;
float delta = sqrt(max(T * T / 4.0 - D, 0.0));
float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta;
float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2);
float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7);
float lambda = sqrtL1;
float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7);
float angle = floor(theta * 24.0 / 3.141592653589793);
float strength = clamp(floor(log2(lambda * 2000.0 + 1.192092896e-7)), 0.0, 8.0);
float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5);
float coord_y = ((angle * 9.0 + strength) * 3.0 + coherence + 0.5) / 648.0;
vec3 res = vec3(0.0);
vec4 w;
w = texture(ravu_lut2, vec2(0.25, coord_y));
res += (inp1[local_pos + 1] + inp1[local_pos + 34]) * w[0];
res += (inp0[local_pos + 13] + inp0[local_pos + 34]) * w[1];
res += (inp1[local_pos + 13] + inp1[local_pos + 22]) * w[2];
res += (inp0[local_pos + 25] + inp0[local_pos + 22]) * w[3];
w = texture(ravu_lut2, vec2(0.75, coord_y));
res += (inp0[local_pos + 12] + inp0[local_pos + 35]) * w[0];
res += (inp1[local_pos + 12] + inp1[local_pos + 23]) * w[1];
res += (inp0[local_pos + 24] + inp0[local_pos + 23]) * w[2];
res += (inp1[local_pos + 24] + inp1[local_pos + 11]) * w[3];
res = clamp(res, 0.0, 1.0);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(1, 0), vec4(res, 1.0));
}
vec3 res;
res = inp0[local_pos + 24];
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(1, 1), vec4(res, 1.0));
res = inp1[local_pos + 12];
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(0, 0), vec4(res, 1.0));
}
//!TEXTURE ravu_lut2
//!SIZE 2 648
//!FORMAT rgba16hf
//!FILTER NEAREST
