From 954a20008b5caf120f4e6d6ab10848ed15ec706a Mon Sep 17 00:00:00 2001
From: sudacode <suda@sudacode.com>
Date: Sun, 19 Jan 2025 02:20:58 -0800
Subject: [PATCH] update

---
 input.conf              |   3 +-
 shaders/CAS-scaled.glsl | 415 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 417 insertions(+), 1 deletion(-)
 create mode 100644 shaders/CAS-scaled.glsl

diff --git a/input.conf b/input.conf
index 6aa8f54..0752f4d 100644
--- a/input.conf
+++ b/input.conf
@@ -191,7 +191,8 @@
 # n cycle_values af loudnorm=I=-30 loudnorm=I=-15 anull
 CTRL+1 no-osd change-list glsl-shaders set "~~/shaders/FSR.glsl"; show-text "FSR"
 CTRL+2 no-osd change-list glsl-shaders set "~~/shaders/NVScaler.glsl"; show-text "NIS"
-CTRL+3 no-osd change-list glsl-shaders set "~~/shaders/FSR.glsl:~~/shaders/NVScaler.glsl"; show-text "FSR & NIS"
+CTRL+3 no-osd change-list glsl-shaders set "~~/shaders/CAS-scaled.glsl"; show-text "NIS"
+CTRL+3 no-osd change-list glsl-shaders set "~~/shaders/FSR.glsl:~~/shaders/NVScaler.glsl:~~/shaders/CAS-scaled.glsl"; show-text "All shaders"
 
 CTRL+0 no-osd change-list glsl-shaders clr ""; show-text "GLSL shaders cleared"
 
diff --git a/shaders/CAS-scaled.glsl b/shaders/CAS-scaled.glsl
new file mode 100644
index 0000000..90897bf
--- /dev/null
+++ b/shaders/CAS-scaled.glsl
@@ -0,0 +1,415 @@
+// LICENSE
+// =======
+// Copyright (c) 2017-2019 Advanced Micro Devices, Inc. All rights reserved.
+// -------
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
+// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+// -------
+// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the
+// Software.
+// -------
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+// WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR
+// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+// FidelityFX CAS v1.0.2 by AMD
+// ported to mpv by agyild
+
+// Changelog
+// Optimized texture lookups for OpenGL 4.0+, DirectX 10+, and OpenGL ES 3.1+
+// Changed rcp + mul operations to div for better clarity when CAS_GO_SLOWER is set to 1, since the compiler should automatically
+// optimize those instructions anyway.
+// Made it directly operate on LUMA plane, since the original shader was operating on LUMA by deriving it from RGB. This should
+// cause a major increase in performance, especially on OpenGL 4.0+ renderers (4 texture lookups vs. 16)
+// Removed transparency preservation mechanism since the alpha channel is a separate source plan than LUMA
+// Added custom gamma curve support for relinearization
+// Removed final blending between the original and the sharpened pixels since it was redundant
+//
+// Notes
+// Per AMD's guidelines only upscales content up to 4x (e.g., 1080p -> 2160p, 720p -> 1440p etc.) and everything else in between,
+// that means CAS will scale up to 4x at maximum, and any further scaling will be processed by mpv's scalers
+//
+// The filter is designed to run in linear light, and does have an optional relinerization and delinearization pass which
+// assumes BT.1886 content by default. Do not forget to change SOURCE_TRC and TARGET_TRC variables depending
+// on what kind of content the filter is running on. You might want to create seperate versions of the file with different
+// colorspace values, and apply them via autoprofiles. Note that running in non-linear light will result in oversharpening.
+
+//!HOOK LUMA
+//!BIND HOOKED
+//!DESC FidelityFX Upsampling and Sharpening v1.0.2 (Relinearization)
+//!WHEN OUTPUT.w OUTPUT.h * LUMA.w LUMA.h * / 1.0 >
+
+// User variables - Relinearization
+// Compatibility
+#define SOURCE_TRC 4 // Is needed to convert from source colorspace to linear light. 0 = None (Skip conversion), 1 = Rec709, 2 = PQ, 3 = sRGB, 4 = BT.1886, 5 = HLG, 6 = Custom
+#define CUSTOM_GAMMA 2.2 // Custom power gamma curve to use if and when SOURCE_TRC is 6.
+
+// Shader code
+
+float From709(float rec709) {
+	return max(min(rec709 / float(4.5), float(0.081)), pow((rec709 + float(0.099)) / float(1.099), float(1.0 / 0.45)));
+}
+
+float FromPq(float pq) {
+	float p = pow(pq, float(0.0126833));
+	return (pow(clamp(p - float(0.835938), 0.0, 1.0) / (float(18.8516) - float(18.6875) * p), float(6.27739)));
+}
+
+float FromSrgb(float srgb) {
+	return max(min(srgb / 12.92, float(0.04045)), pow((srgb + float(0.055)) / float(1.055), float(2.4)));
+}
+
+float FromHlg(float hlg) {
+	const float a = 0.17883277;
+	const float b = 0.28466892;
+	const float c = 0.55991073;
+
+	float linear;
+	if (hlg >= 0.0 && hlg <= 0.5) {
+		linear = pow(hlg, 2.0) / 3.0;
+	} else {
+		linear = (exp((hlg - c) / a) + b) / 12.0;
+	}
+
+	return linear;
+}
+
+vec4 hook() {
+	vec4 col = HOOKED_tex(HOOKED_pos);
+	col.r = clamp(col.r, 0.0, 1.0);
+#if (SOURCE_TRC == 1)
+	col.r = From709(col.r);
+#elif (SOURCE_TRC == 2)
+	col.r = FromPq(col.r);
+#elif (SOURCE_TRC == 3)
+	col.r = FromSrgb(col.r);
+#elif (SOURCE_TRC == 4)
+	col.r = pow(col.r, float(2.4));
+#elif (SOURCE_TRC == 5)
+	col.r = FromHlg(col.r);
+#elif (SOURCE_TRC == 6)
+	col.r = pow(col.r, float(CUSTOM_GAMMA));
+#endif
+	return col;
+}
+
+//!HOOK LUMA
+//!BIND HOOKED
+//!DESC FidelityFX Upsampling and Sharpening v1.0.2
+//!WHEN OUTPUT.w OUTPUT.h * LUMA.w LUMA.h * / 1.0 >
+//!WIDTH OUTPUT.w OUTPUT.w LUMA.w 2 * < * LUMA.w 2 * OUTPUT.w LUMA.w 2 * > * + OUTPUT.w OUTPUT.w LUMA.w 2 * = * +
+//!HEIGHT OUTPUT.h OUTPUT.h LUMA.h 2 * < * LUMA.h 2 * OUTPUT.h LUMA.h 2 * > * + OUTPUT.h OUTPUT.h LUMA.h 2 * = * +
+
+// User variables - Upsampling and Sharpening
+// Intensity
+#define SHARPENING 0.0 // Adjusts the range the shader adapts to high contrast (0 is not all the way off).  Higher values = more high contrast sharpening. 0.0 to 1.0.
+
+// Performance
+#define CAS_BETTER_DIAGONALS 1 // If set to 0, drops certain math and texture lookup operations for better performance. This is only useful on pre-OpenGL 4.0 renderers and there is no need to disable it otherwise. 0 or 1.
+#define CAS_GO_SLOWER 0 // If set to 1, disables the use of optimized approximate transcendental functions which might slightly increase accuracy in exchange of performance. 0 or 1.
+
+// Compatibility
+#define TARGET_TRC 4 // Is needed to convert from source colorspace to target colorspace. 0 = None (Skip conversion), 1 = Rec709, 2 = PQ, 3 = sRGB, 4 = BT.1886, 5 = HLG, 6 = Custom
+#define CUSTOM_GAMMA 2.2 // Custom power gamma curve to use if and when TARGET_TRC is 6.
+
+// Shader code
+
+float To709(float linear) {
+	return max(min(linear * float(4.5), float(0.018)), float(1.099) * pow(linear, float(0.45)) - float(0.099));
+}
+
+float ToPq(float linear) {
+	float p = pow(linear, float(0.159302));
+	return pow((float(0.835938) + float(18.8516) * p) / (float(1.0) + float(18.6875) * p), float(78.8438));
+}
+
+float ToSrgb(float linear) {
+	return max(min(linear * float(12.92), float(0.0031308)), float(1.055) * pow(linear, float(0.41666)) - float(0.055));
+}
+
+float ToHlg(float linear) {
+	const float a = 0.17883277;
+	const float b = 0.28466892;
+	const float c = 0.55991073;
+
+	float hlg;
+	if (linear <= 1.0 / 12.0) {
+		hlg = sqrt(3.0 * linear);
+	} else {
+		hlg = a * log(12.0 * linear - b) + c;
+	}
+
+	return hlg;
+}
+
+#if (CAS_GO_SLOWER == 0)
+
+float APrxLoSqrtF1(float a) {
+	return uintBitsToFloat((floatBitsToUint(a) >> uint(1)) + uint(0x1fbc4639));
+}
+
+float APrxLoRcpF1(float a) {
+	return uintBitsToFloat(uint(0x7ef07ebb) - floatBitsToUint(a));
+}
+
+float APrxMedRcpF1(float a) {
+	float b = uintBitsToFloat(uint(0x7ef19fff) - floatBitsToUint(a));
+	return b * (-b * a + float(2.0));
+}
+
+#endif
+
+vec4 hook()
+{
+	// Scaling algorithm adaptively interpolates between nearest 4 results of the non-scaling algorithm.
+	//  a b c d
+	//  e f g h
+	//  i j k l
+	//  m n o p
+	// Working these 4 results.
+	//  +-----+-----+
+	//  |     |     |
+	//  |  f..|..g  |
+	//  |  .  |  .  |
+	//  +-----+-----+
+	//  |  .  |  .  |
+	//  |  j..|..k  |
+	//  |     |     |
+	//  +-----+-----+
+
+	vec2 pp = HOOKED_pos * HOOKED_size - 0.5;
+	vec2 fp = floor(pp);
+	pp -= fp;
+
+#if (defined(HOOKED_gather) && (__VERSION__ >= 400 || (GL_ES && __VERSION__ >= 310)))
+	vec4 abef = HOOKED_gather(vec2((fp - vec2(0.5)) * HOOKED_pt), 0);
+
+	float b = abef.z;
+	float e = abef.x;
+	float f = abef.y;
+
+	vec4 cdgh = HOOKED_gather(vec2((fp + vec2(1.5, -0.5)) * HOOKED_pt), 0);
+
+	float c = cdgh.w;
+	float g = cdgh.x;
+	float h = cdgh.y;
+
+	vec4 ijmn = HOOKED_gather(vec2((fp + vec2(-0.5, 1.5)) * HOOKED_pt), 0);
+
+	float i = ijmn.w;
+	float j = ijmn.z;
+	float n = ijmn.y;
+
+	vec4 klop = HOOKED_gather(vec2((fp + vec2(1.5)) * HOOKED_pt), 0);
+
+	float k = klop.w;
+	float l = klop.z;
+	float o = klop.x;
+
+	#if (CAS_BETTER_DIAGONALS == 1)
+		float a = abef.w;
+		float d = cdgh.z;
+		float m = ijmn.x;
+		float p = klop.y;
+	#endif
+#else
+	ivec2 sp = ivec2(fp);
+
+	#if (CAS_BETTER_DIAGONALS == 1)
+		float a = texelFetch(HOOKED_raw, sp + ivec2(-1, -1), 0).r * HOOKED_mul;
+		float d = texelFetch(HOOKED_raw, sp + ivec2( 2, -1), 0).r * HOOKED_mul;
+		float m = texelFetch(HOOKED_raw, sp + ivec2(-1,  2), 0).r * HOOKED_mul;
+		float p = texelFetch(HOOKED_raw, sp + ivec2( 2,  2), 0).r * HOOKED_mul;
+	#endif
+
+	float b = texelFetch(HOOKED_raw, sp + ivec2( 0, -1), 0).r * HOOKED_mul;
+	float e = texelFetch(HOOKED_raw, sp + ivec2(-1,  0), 0).r * HOOKED_mul;
+	float f = texelFetch(HOOKED_raw, sp                , 0).r * HOOKED_mul;
+
+	float c = texelFetch(HOOKED_raw, sp + ivec2( 1, -1), 0).r * HOOKED_mul;
+	float g = texelFetch(HOOKED_raw, sp + ivec2( 1,  0), 0).r * HOOKED_mul;
+	float h = texelFetch(HOOKED_raw, sp + ivec2( 2,  0), 0).r * HOOKED_mul;
+
+	float i = texelFetch(HOOKED_raw, sp + ivec2(-1,  1), 0).r * HOOKED_mul;
+	float j = texelFetch(HOOKED_raw, sp + ivec2( 0,  1), 0).r * HOOKED_mul;
+	float n = texelFetch(HOOKED_raw, sp + ivec2( 0,  2), 0).r * HOOKED_mul;
+
+	float k = texelFetch(HOOKED_raw, sp + ivec2( 1,  1), 0).r * HOOKED_mul;
+	float l = texelFetch(HOOKED_raw, sp + ivec2( 2,  1), 0).r * HOOKED_mul;
+	float o = texelFetch(HOOKED_raw, sp + ivec2( 1,  2), 0).r * HOOKED_mul;
+#endif
+
+	// Soft min and max.
+	// These are 2.0x bigger (factored out the extra multiply).
+	//  a b c             b
+	//  e f g * 0.5  +  e f g * 0.5  [F]
+	//  i j k             j
+
+	float mnfL = min(min(b, min(e, f)), min(g, j));
+	float mxfL = max(max(b, max(e, f)), max(g, j));
+
+#if (CAS_BETTER_DIAGONALS == 1)
+	float mnfL2 = min(min(mnfL, min(a, c)), min(i, k));
+	mnfL += mnfL2;
+
+	float mxfL2 = max(max(mxfL, max(a, c)), max(i, k));
+	mxfL += mxfL2;
+#endif
+
+	//  b c d             c
+	//  f g h * 0.5  +  f g h * 0.5  [G]
+	//  j k l             k
+	float mngL = min(min(c, min(f, g)), min(h, k));
+	float mxgL = max(max(c, max(f, g)), max(h, k));
+#if (CAS_BETTER_DIAGONALS == 1)
+	float mngL2 = min(min(mngL, min(b, d)), min(j, l));
+	mngL += mngL2;
+
+	float mxgL2 = max(max(mxgL, max(b, d)), max(j, l));
+	mxgL += mxgL2;
+#endif
+
+	//  e f g             f
+	//  i j k * 0.5  +  i j k * 0.5  [J]
+	//  m n o             n
+	float mnjL  = min(min(f, min(i, j)), min(k, n));
+	float mxjL  = max(max(f, max(i, j)), max(k, n));
+#if (CAS_BETTER_DIAGONALS == 1)
+	float mnjL2 = min(min(mnjL, min(e, g)), min(m, o));
+	mnjL += mnjL2;
+
+	float mxjL2 = max(max(mxjL, max(e, g)), max(m, o));
+	mxjL += mxjL2;
+#endif
+
+	//  f g h             g
+	//  j k l * 0.5  +  j k l * 0.5  [K]
+	//  n o p             o
+	float mnkL = min(min(g, min(j, k)), min(l, o));
+	float mxkL = max(max(g, max(j, k)), max(l, o));
+#if (CAS_BETTER_DIAGONALS == 1)
+	float mnkL2 = min(min(mnkL, min(f, h)), min(n, p));
+	mnkL += mnkL2;
+
+	float mxkL2 = max(max(mxkL, max(f, h)), max(n, p));
+	mxkL += mxkL2;
+#endif
+
+	// Smooth minimum distance to signal limit divided by smooth max.
+	const float bdval = bool(CAS_BETTER_DIAGONALS) ? 2.0 : 1.0;
+#if (CAS_GO_SLOWER == 1)
+	float ampfL = clamp(min(mnfL, bdval - mxfL) / mxfL, 0.0, 1.0);
+	float ampgL = clamp(min(mngL, bdval - mxgL) / mxgL, 0.0, 1.0);
+	float ampjL = clamp(min(mnjL, bdval - mxjL) / mxjL, 0.0, 1.0);
+	float ampkL = clamp(min(mnkL, bdval - mxkL) / mxkL, 0.0, 1.0);
+#else
+	float ampfL = clamp(min(mnfL, bdval - mxfL) * APrxLoRcpF1(mxfL), 0.0, 1.0);
+	float ampgL = clamp(min(mngL, bdval - mxgL) * APrxLoRcpF1(mxgL), 0.0, 1.0);
+	float ampjL = clamp(min(mnjL, bdval - mxjL) * APrxLoRcpF1(mxjL), 0.0, 1.0);
+	float ampkL = clamp(min(mnkL, bdval - mxkL) * APrxLoRcpF1(mxkL), 0.0, 1.0);
+#endif
+
+	// Shaping amount of sharpening.
+#if (CAS_GO_SLOWER == 1)
+	ampfL = sqrt(ampfL);
+	ampgL = sqrt(ampgL);
+	ampjL = sqrt(ampjL);
+	ampkL = sqrt(ampkL);
+#else
+	ampfL = APrxLoSqrtF1(ampfL);
+	ampgL = APrxLoSqrtF1(ampgL);
+	ampjL = APrxLoSqrtF1(ampjL);
+	ampkL = APrxLoSqrtF1(ampkL);
+#endif
+
+	// Filter shape.
+	//  0 w 0
+	//  w 1 w
+	//  0 w 0
+
+	const float peak = -(mix(8.0, 5.0, clamp(SHARPENING, 0.0, 1.0)));
+	float wfL = ampfL / peak;
+	float wgL = ampgL / peak;
+	float wjL = ampjL / peak;
+	float wkL = ampkL / peak;
+
+	// Blend between 4 results.
+	//  s t
+	//  u v
+	float s = (1.0 - pp.x) * (1.0 - pp.y);
+	float t = pp.x * (1.0 - pp.y);
+	float u = (1.0 - pp.x) * pp.y;
+	float v = pp.x * pp.y;
+
+	// Thin edges to hide bilinear interpolation (helps diagonals).
+	const float thinB = 0.03125; // 1.0 / 32.0
+
+#if (CAS_GO_SLOWER == 1)
+	s /= thinB + mxfL - mnfL;
+	t /= thinB + mxgL - mngL;
+	u /= thinB + mxjL - mnjL;
+	v /= thinB + mxkL - mnkL;
+#else
+	s *= APrxLoRcpF1(thinB + mxfL - mnfL);
+	t *= APrxLoRcpF1(thinB + mxgL - mngL);
+	u *= APrxLoRcpF1(thinB + mxjL - mnjL);
+	v *= APrxLoRcpF1(thinB + mxkL - mnkL);
+#endif
+
+	// Final weighting.
+	//    b c
+	//  e f g h
+	//  i j k l
+	//    n o
+	//  _____  _____  _____  _____
+	//         fs        gt
+	//
+	//  _____  _____  _____  _____
+	//  fs      s gt  fs  t     gt
+	//         ju        kv
+	//  _____  _____  _____  _____
+	//         fs        gt
+	//  ju      u kv  ju  v     kv
+	//  _____  _____  _____  _____
+	//
+	//         ju        kv
+	float qbeL = wfL * s;
+	float qchL = wgL * t;
+	float qfL  = wgL * t + wjL * u + s;
+	float qgL  = wfL * s + wkL * v + t;
+	float qjL  = wfL * s + wkL * v + u;
+	float qkL  = wgL * t + wjL * u + v;
+	float qinL = wjL * u;
+	float qloL = wkL * v;
+
+	// Filter.
+	vec4 pix = vec4(0.0, 0.0, 0.0, 1.0);
+	float W = 2.0 * qbeL + 2.0 * qchL + 2.0 * qinL + 2.0 * qloL + qfL + qgL + qjL + qkL;
+	pix.r = b * qbeL + e * qbeL + c * qchL + h * qchL + i * qinL + n * qinL + l * qloL + o * qloL + f * qfL + g * qgL + j * qjL + k * qkL;
+#if (CAS_GO_SLOWER == 1)
+	pix.r /= W;
+#else
+	pix.r *= APrxMedRcpF1(W);
+#endif
+
+	pix.r = clamp(pix.r, 0.0, 1.0);
+
+#if (TARGET_TRC == 1)
+	pix.r = To709(pix.r);
+#elif (TARGET_TRC == 2)
+	pix.r = ToPq(pix.r);
+#elif (TARGET_TRC == 3)
+	pix.r = ToSrgb(pix.r);
+#elif (TARGET_TRC == 4)
+	pix.r = pow(pix.r, float(1.0 / 2.4));
+#elif (TARGET_TRC == 5)
+	pix.r = ToHlg(pix.r);
+#elif (TARGET_TRC == 6)
+	pix.r = pow(pix.r, float(1.0 / CUSTOM_GAMMA));
+#endif
+
+	return pix;
+}
\ No newline at end of file