1408 lines
208 KiB
GLSL
1408 lines
208 KiB
GLSL
// MIT License
|
|
|
|
// Copyright (c) 2024 Joao Chrisostomo, Kacper Michajłow
|
|
|
|
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
// of this software and associated documentation files (the "Software"), to deal
|
|
// in the Software without restriction, including without limitation the rights
|
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
// copies of the Software, and to permit persons to whom the Software is
|
|
// furnished to do so, subject to the following conditions:
|
|
|
|
// The above copyright notice and this permission notice shall be included in all
|
|
// copies or substantial portions of the Software.
|
|
|
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
// SOFTWARE.
|
|
|
|
//!DESC ArtCNN C4F16 DS (Conv2D)
|
|
//!COMPUTE 24 32 12 16
|
|
//!HOOK LUMA
|
|
//!BIND LUMA
|
|
//!SAVE conv2d
|
|
//!WIDTH LUMA.w 2.0 *
|
|
//!HEIGHT LUMA.h 2.0 *
|
|
//!COMPONENTS 4
|
|
//!WHEN OUTPUT.w LUMA.w / 1.3 > OUTPUT.h LUMA.h / 1.3 > *
|
|
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
|
|
#ifdef GL_EXT_shader_explicit_arithmetic_types_float16
|
|
# define V4 f16vec4
|
|
# define M4 f16mat4
|
|
# define F float16_t
|
|
#else
|
|
# define V4 vec4
|
|
# define M4 mat4
|
|
# define F float
|
|
#endif
|
|
|
|
const ivec2 ksize = ivec2(3, 3);
|
|
const ivec2 offset = ksize / 2;
|
|
const ivec2 wg_size = ivec2(gl_WorkGroupSize);
|
|
const ivec2 isize = wg_size + ksize - 1;
|
|
shared F inp[1][isize.y][isize.x];
|
|
void hook() {
|
|
const uvec2 local_xy = gl_LocalInvocationID.xy;
|
|
ivec2 base = ivec2(gl_WorkGroupID) * wg_size;
|
|
for (uint y = local_xy.y; y < isize.y; y += wg_size.y) {
|
|
for (uint x = local_xy.x; x < isize.x; x += wg_size.x) {
|
|
const ivec2 input_base = (base + ivec2(x,y) - offset) * ivec2(1, 1);
|
|
inp[0][y][x] = F(LUMA_mul * texelFetch(LUMA_raw, input_base + ivec2(0, 0), 0).x);
|
|
}
|
|
}
|
|
|
|
barrier();
|
|
V4 result0 = V4(-0.0049340418, -0.012793901, -0.01654616, -0.04911682);
|
|
V4 result1 = V4(-0.029540002, -0.0068046413, -0.00023085524, -0.00035294317);
|
|
V4 result2 = V4(0.029907363, -0.004545475, -0.0008227237, -0.00086534343);
|
|
V4 result3 = V4(-0.002919027, 0.002460784, 0.012082119, -0.063332476);
|
|
const F inp_0_0_0 = inp[0][local_xy.y + 0][local_xy.x + 0];
|
|
const F inp_0_1_0 = inp[0][local_xy.y + 0][local_xy.x + 1];
|
|
const F inp_0_2_0 = inp[0][local_xy.y + 0][local_xy.x + 2];
|
|
const F inp_0_0_1 = inp[0][local_xy.y + 1][local_xy.x + 0];
|
|
const F inp_0_1_1 = inp[0][local_xy.y + 1][local_xy.x + 1];
|
|
const F inp_0_2_1 = inp[0][local_xy.y + 1][local_xy.x + 2];
|
|
const F inp_0_0_2 = inp[0][local_xy.y + 2][local_xy.x + 0];
|
|
const F inp_0_1_2 = inp[0][local_xy.y + 2][local_xy.x + 1];
|
|
const F inp_0_2_2 = inp[0][local_xy.y + 2][local_xy.x + 2];
|
|
result0 += V4(-0.028834578, -0.14101993, 0.007882108, 0.03927023) * inp_0_0_0;
|
|
result0 += V4(0.0381181, 0.15082009, 0.1299179, 0.124353915) * inp_0_1_0;
|
|
result0 += V4(-0.0026651656, 0.10701949, -0.10302223, 0.06252015) * inp_0_2_0;
|
|
result0 += V4(-0.017014246, 0.056569446, 0.013070459, 0.1673612) * inp_0_0_1;
|
|
result0 += V4(-0.039012846, -0.083705, -0.3763227, 0.27351955) * inp_0_1_1;
|
|
result0 += V4(-0.21789941, -0.13512278, 0.107165866, 0.11088684) * inp_0_2_1;
|
|
result0 += V4(0.026416743, 0.15055776, -0.026394987, 0.09503178) * inp_0_0_2;
|
|
result0 += V4(0.11317181, -0.09849969, 0.24554619, -0.006209404) * inp_0_1_2;
|
|
result0 += V4(0.18810558, 0.015041951, 0.01617648, 0.09057816) * inp_0_2_2;
|
|
result1 += V4(0.078574546, -0.07280724, -0.09120986, 0.019795248) * inp_0_0_0;
|
|
result1 += V4(-0.067474775, -0.019562894, -0.0067152088, -0.18367955) * inp_0_1_0;
|
|
result1 += V4(0.05455347, 0.11168705, 0.07079753, 0.14639775) * inp_0_2_0;
|
|
result1 += V4(-0.05000137, 0.09589531, 0.09779115, -0.038142927) * inp_0_0_1;
|
|
result1 += V4(0.104602836, -0.16130356, 0.14888051, 0.17604049) * inp_0_1_1;
|
|
result1 += V4(-0.11296293, 0.26848587, -0.21014185, -0.13524422) * inp_0_2_1;
|
|
result1 += V4(0.06099483, -0.023948722, 0.022344239, 0.013350393) * inp_0_0_2;
|
|
result1 += V4(0.09386803, -0.23937507, -0.13151713, 0.025719114) * inp_0_1_2;
|
|
result1 += V4(-0.05785345, 0.062087506, 0.10226412, -0.022276009) * inp_0_2_2;
|
|
result2 += V4(-0.06026112, 0.10904248, -0.11761055, -0.06018391) * inp_0_0_0;
|
|
result2 += V4(0.085813195, -0.03787966, 0.16202296, 0.0041552093) * inp_0_1_0;
|
|
result2 += V4(0.060139276, -0.005811277, 0.0070230174, -0.0069510895) * inp_0_2_0;
|
|
result2 += V4(-0.17221731, 0.28638083, 0.34320375, 0.17658837) * inp_0_0_1;
|
|
result2 += V4(-0.27984276, -0.7016683, -0.30903038, -0.08029049) * inp_0_1_1;
|
|
result2 += V4(0.13250893, 0.124444515, -0.059002075, -0.04446707) * inp_0_2_1;
|
|
result2 += V4(0.03323799, 0.12117096, -0.14812368, -0.036965195) * inp_0_0_2;
|
|
result2 += V4(0.1977777, 0.06994207, 0.10603352, 0.16904502) * inp_0_1_2;
|
|
result2 += V4(-0.1536847, -0.004283535, 0.009282772, -0.12100757) * inp_0_2_2;
|
|
result3 += V4(0.059486274, -0.11239365, 0.21015224, -0.090100385) * inp_0_0_0;
|
|
result3 += V4(-0.15950659, 0.18551329, 0.34821057, 0.16747488) * inp_0_1_0;
|
|
result3 += V4(0.010258027, -0.029954039, 0.05202862, 0.056675363) * inp_0_2_0;
|
|
result3 += V4(0.036862228, -0.003042499, 0.028995909, -0.18852086) * inp_0_0_1;
|
|
result3 += V4(0.017469667, -0.1588153, -0.38980985, -0.008847585) * inp_0_1_1;
|
|
result3 += V4(0.042114146, 0.12621936, -0.17563884, 0.20885424) * inp_0_2_1;
|
|
result3 += V4(-0.06928606, 0.10595441, -0.058485802, -0.073390864) * inp_0_0_2;
|
|
result3 += V4(0.090648934, -0.04735434, -0.15482384, 0.026818749) * inp_0_1_2;
|
|
result3 += V4(-0.013016346, -0.06682891, -0.024198746, 0.16552028) * inp_0_2_2;
|
|
const ivec2 output_base = ivec2(gl_GlobalInvocationID) * ivec2(2, 2);
|
|
imageStore(out_image, output_base + ivec2(0, 0), result0);
|
|
imageStore(out_image, output_base + ivec2(1, 0), result1);
|
|
imageStore(out_image, output_base + ivec2(0, 1), result2);
|
|
imageStore(out_image, output_base + ivec2(1, 1), result3);
|
|
}
|
|
|
|
//!DESC ArtCNN C4F16 DS (Conv2D-1-ReLU)
|
|
//!COMPUTE 24 32 12 16
|
|
//!HOOK LUMA
|
|
//!BIND conv2d
|
|
//!SAVE conv2d_1
|
|
//!WIDTH LUMA.w 2.0 *
|
|
//!HEIGHT LUMA.h 2.0 *
|
|
//!COMPONENTS 4
|
|
//!WHEN OUTPUT.w LUMA.w / 1.3 > OUTPUT.h LUMA.h / 1.3 > *
|
|
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
|
|
#ifdef GL_EXT_shader_explicit_arithmetic_types_float16
|
|
# define V4 f16vec4
|
|
# define M4 f16mat4
|
|
# define F float16_t
|
|
#else
|
|
# define V4 vec4
|
|
# define M4 mat4
|
|
# define F float
|
|
#endif
|
|
|
|
const ivec2 ksize = ivec2(3, 3);
|
|
const ivec2 offset = ksize / 2;
|
|
const ivec2 wg_size = ivec2(gl_WorkGroupSize);
|
|
const ivec2 isize = wg_size + ksize - 1;
|
|
shared V4 inp[4][isize.y][isize.x];
|
|
void hook() {
|
|
const uvec2 local_xy = gl_LocalInvocationID.xy;
|
|
ivec2 base = ivec2(gl_WorkGroupID) * wg_size;
|
|
for (uint y = local_xy.y; y < isize.y; y += wg_size.y) {
|
|
for (uint x = local_xy.x; x < isize.x; x += wg_size.x) {
|
|
const ivec2 input_base = (base + ivec2(x,y) - offset) * ivec2(2, 2);
|
|
inp[0][y][x] = V4(conv2d_mul * texelFetch(conv2d_raw, input_base + ivec2(0, 0), 0));
|
|
inp[1][y][x] = V4(conv2d_mul * texelFetch(conv2d_raw, input_base + ivec2(1, 0), 0));
|
|
inp[2][y][x] = V4(conv2d_mul * texelFetch(conv2d_raw, input_base + ivec2(0, 1), 0));
|
|
inp[3][y][x] = V4(conv2d_mul * texelFetch(conv2d_raw, input_base + ivec2(1, 1), 0));
|
|
}
|
|
}
|
|
|
|
barrier();
|
|
V4 result0 = V4(0.011309091, 0.011720209, 0.07717452, 0.012043687);
|
|
V4 result1 = V4(0.04592391, -0.009706806, -0.011586145, 0.014143103);
|
|
V4 result2 = V4(0.012079102, 0.02975359, 0.008218081, 0.01839072);
|
|
V4 result3 = V4(0.0058121867, -0.0034202328, 0.0009524132, -0.02947478);
|
|
const V4 inp_0_0_0 = inp[0][local_xy.y + 0][local_xy.x + 0];
|
|
const V4 inp_0_1_0 = inp[0][local_xy.y + 0][local_xy.x + 1];
|
|
const V4 inp_0_2_0 = inp[0][local_xy.y + 0][local_xy.x + 2];
|
|
const V4 inp_0_0_1 = inp[0][local_xy.y + 1][local_xy.x + 0];
|
|
const V4 inp_0_1_1 = inp[0][local_xy.y + 1][local_xy.x + 1];
|
|
const V4 inp_0_2_1 = inp[0][local_xy.y + 1][local_xy.x + 2];
|
|
const V4 inp_0_0_2 = inp[0][local_xy.y + 2][local_xy.x + 0];
|
|
const V4 inp_0_1_2 = inp[0][local_xy.y + 2][local_xy.x + 1];
|
|
const V4 inp_0_2_2 = inp[0][local_xy.y + 2][local_xy.x + 2];
|
|
result0 += M4(-0.09877198, 0.38928044, -0.09383465, 0.079032965, 0.107005835, 0.1136302, 0.041581154, -0.052157145, -0.35730338, 0.27816653, -0.06807881, -0.067746386, -0.006262232, -0.06413886, -0.12926531, 0.028121412) * inp_0_0_0;
|
|
result0 += M4(-0.2730201, 0.12183424, -0.000594066, 0.06456225, 0.63564765, 0.16603222, -0.011430876, 0.10374776, -0.5786755, -0.52282465, -0.15073062, 0.017066224, -0.36250365, 0.09160107, 0.039119765, 0.10774151) * inp_0_1_0;
|
|
result0 += M4(0.30126116, -0.1014905, 0.08349073, 0.0063151238, 0.120676085, -0.17403236, 0.18270256, -0.057362948, -0.28110328, 0.106739074, -0.035695158, 0.06990916, -0.09574672, -0.018324012, -0.09960064, -0.057645217) * inp_0_2_0;
|
|
result0 += M4(-0.17198816, -0.093343705, -0.006382152, -0.18036872, -0.2148838, -0.17424193, 0.22078043, -0.1732282, 0.26076397, 0.1474919, -0.01922907, 0.010837412, 0.028024467, 0.07845438, -0.015646966, 0.027136652) * inp_0_0_1;
|
|
result0 += M4(-0.043947384, -0.15946668, -0.046214677, -0.11894077, -0.4706527, 0.20794135, 0.07293495, -0.035387393, -0.46785754, 0.65764594, 0.039504908, -0.4142682, 0.034218732, 0.032816306, 0.106478676, 0.12423036) * inp_0_1_1;
|
|
result0 += M4(-0.3440009, 0.024391247, -0.0644611, -0.07619978, -0.4256126, -0.10989194, -0.057316273, -0.05037822, 0.2577093, 0.07522628, 0.060620584, -0.13478145, -0.046803165, 0.06498437, 0.05821759, 0.040200662) * inp_0_2_1;
|
|
result0 += M4(0.079720534, 0.049182344, -0.01734871, 0.2098411, -0.06080854, -0.10493844, -0.0061250627, 0.23049541, 0.27885517, -0.18456793, 0.04471378, -0.1519946, -0.047879137, -0.05675035, -0.11165134, -0.020727035) * inp_0_0_2;
|
|
result0 += M4(0.05443745, 0.012744339, -0.051926363, 0.12922677, 0.08167649, 0.09113499, -0.06406348, 0.45999956, 0.25078163, -0.18583958, -0.27785185, 0.8986888, 0.0048544668, -0.040135045, -0.060791086, 0.022054149) * inp_0_1_2;
|
|
result0 += M4(-0.15824191, -0.063602105, 0.024466638, 0.018670492, 0.18217057, -0.2717177, -0.16882227, -0.17058909, 0.119718775, -0.08420973, 0.12789485, -0.10119966, 0.043762792, -0.070947856, 0.04055627, -0.07537116) * inp_0_2_2;
|
|
result1 += M4(-0.07460643, -0.054178257, 0.00027177262, 0.184799, -0.046359397, 0.059696186, 0.033557095, 0.07526354, 0.07089547, -0.07476439, -0.035536528, 0.024276823, 0.07413596, -0.06979502, -0.06169341, -0.12059578) * inp_0_0_0;
|
|
result1 += M4(0.14608821, -0.07439824, 0.03865178, -0.086880915, -0.11553095, 0.11800374, 0.13214925, 0.046812996, -0.037123796, -0.018989379, -0.05062097, 0.0021832588, 0.13339424, -0.055192936, -0.11149385, -0.04801339) * inp_0_1_0;
|
|
result1 += M4(0.10018024, 0.0015277287, 0.024594266, -0.10473088, -0.10727858, 0.2637988, 0.0025090645, -0.027918315, 0.08310345, 0.11183597, -0.11561776, 0.10297722, -0.112052426, -0.0393831, -0.06215935, 0.14460996) * inp_0_2_0;
|
|
result1 += M4(-0.045010846, 0.2596987, -0.061746594, 0.3915752, -0.1178524, -0.040817495, 0.15932876, 0.18028739, -0.060169153, 0.28849506, -0.059507165, 0.07859005, 0.058386788, -0.060939044, -0.03840868, -0.10951335) * inp_0_0_1;
|
|
result1 += M4(0.11402059, -0.18600419, 0.20650828, -0.040762946, 0.20945579, -0.22200839, -0.07005221, -0.11765, -0.20845462, 0.18816432, 0.9522476, -0.036692392, 0.16586697, 0.068688564, -0.11158684, -0.08792759) * inp_0_1_1;
|
|
result1 += M4(0.05377238, -0.15907322, -0.07312733, -0.10462031, -0.08407747, -0.14599952, 0.20981629, -0.044482782, 0.400037, -0.1437334, 0.0788931, 0.042104036, -0.20607647, 0.08861181, -0.032842077, 0.023695804) * inp_0_2_1;
|
|
result1 += M4(0.04181302, -0.060805034, -0.101417646, -0.17794839, 0.168702, -0.51052046, -0.095897056, -0.013912092, 0.038582865, 0.05309609, -0.06728378, 0.341196, -0.017040616, -0.06550378, 0.11086736, -0.042624153) * inp_0_0_2;
|
|
result1 += M4(0.11940134, -0.12056612, -0.13654755, -0.026973963, 0.15465894, 0.029888792, -0.49261746, -0.22650442, 0.015893001, -0.44998038, -0.7092512, -0.24032496, 0.07558687, 0.03830741, 0.078829594, 0.06326567) * inp_0_1_2;
|
|
result1 += M4(0.18127115, -0.065435536, -0.07445673, -0.0049071168, 0.01085999, -0.0077388254, -0.064653255, -0.09067383, 0.026689146, -0.08541669, -0.060653634, -0.08493797, -0.14466925, -0.11301376, 0.058424987, 0.09166556) * inp_0_2_2;
|
|
result2 += M4(-0.13080364, -0.08167301, -0.463303, -0.70742375, 0.077425644, -0.021687286, 0.035808627, -0.39734727, -0.1559171, -0.21604316, 0.39925998, -0.3012099, 0.016638095, -0.08232811, 0.03377222, 0.031966764) * inp_0_0_0;
|
|
result2 += M4(0.14442822, 0.12295271, 0.029818185, 0.0359594, 0.04060354, 0.015188929, -0.0029900323, -0.23532009, 0.2359964, 0.14970055, 0.017785046, -0.52034444, 0.059353128, 0.10592766, -0.05263293, -0.10214093) * inp_0_1_0;
|
|
result2 += M4(0.122345954, 0.32518455, -0.13197717, -0.2253006, 0.06691174, 0.052411076, 0.100197494, -0.26456442, -0.030602941, 0.2827907, -0.31574076, -0.14921468, -0.05038948, -0.16915296, -0.10316771, -0.02792467) * inp_0_2_0;
|
|
result2 += M4(-0.08198538, -0.10204986, 0.5690069, -0.09734485, -0.028890003, -0.15102413, 0.44935277, -0.024167726, -0.015774969, -0.0887488, -0.51489425, 0.43573058, 0.07211464, 0.086417206, -0.029778117, 0.0897787) * inp_0_0_1;
|
|
result2 += M4(-0.11789364, 0.06326017, 0.13353379, 0.14045511, -0.097247526, 0.13069586, 0.18198517, 0.07585104, -0.46691963, -0.12969762, 0.12990327, 0.10933748, 0.22077899, 0.08396143, 0.11584505, 0.06981046) * inp_0_1_1;
|
|
result2 += M4(-0.15274513, -0.019888762, 0.21179429, -0.08906937, -0.048165828, 0.2743937, -0.05530385, 0.08738966, -0.077265665, 0.31894842, -0.05136382, 0.12788285, 0.09329922, -0.047389865, 0.08291986, 0.1473245) * inp_0_2_1;
|
|
result2 += M4(-0.108804405, -0.023400303, -0.2225971, 0.101814784, 0.048623733, -0.14735846, -0.38235044, 0.2508335, 0.0006776365, 0.05468187, 0.56234205, -0.09719959, -0.05784428, -0.015568528, 0.0027001353, -0.08191959) * inp_0_0_2;
|
|
result2 += M4(0.0972354, -0.06312126, 0.022070305, 0.120524995, 0.50088096, 0.090986624, -0.05814756, 0.20085476, 0.560176, -0.016112555, -0.08229239, 0.012685615, -0.18278423, 0.109280005, -0.022708299, -0.14846179) * inp_0_1_2;
|
|
result2 += M4(0.03168514, 0.09864207, -0.09442118, -0.024364354, -0.10918896, 0.06662421, -0.091709755, 0.05298994, -0.036009606, -0.15768065, -0.10558869, -0.05636757, -0.062536, -0.09078685, -0.03775271, -0.05907822) * inp_0_2_2;
|
|
result3 += M4(0.16951673, 0.13079187, 0.15705979, -0.15498151, 0.07683223, -0.06972948, 0.1610643, 0.017183576, 0.017889338, -0.123466015, -0.18230678, 0.020653097, -0.028243298, -0.0455674, -0.0099932505, 0.15229751) * inp_0_0_0;
|
|
result3 += M4(-0.14118189, -0.07251147, -0.124787614, -0.031930055, 0.0014606286, 0.061471842, 0.12428364, 0.09564553, 0.08472746, -0.054346558, -0.3817292, 0.018670253, -0.0053583533, 0.057495654, -0.006340129, -0.04558433) * inp_0_1_0;
|
|
result3 += M4(-0.22084264, 0.072441526, 0.016335221, 0.061928593, -0.034427874, -0.052877698, 0.031620156, 0.02055477, -0.24549209, -0.28623292, 0.11228471, 0.10617587, 0.0035924714, 0.057636518, -0.0062976037, -0.09103348) * inp_0_2_0;
|
|
result3 += M4(0.22203928, 0.0001094966, -0.28109252, 0.14876871, 0.20899206, -0.13489018, 0.15935789, 0.13730305, 0.067521155, -0.17813672, 0.073929176, -0.47425798, -0.14579837, -0.12225127, 0.026552638, 0.09890729) * inp_0_0_1;
|
|
result3 += M4(-0.08939892, -0.038700964, -0.023869948, -0.025948375, -0.28878126, 0.06866243, -0.17241889, 0.16191031, 0.48571798, 0.0033233624, 0.5171774, 0.27048627, -0.024863949, 0.10009337, -0.054171123, 0.021327967) * inp_0_1_1;
|
|
result3 += M4(-0.0026751498, 0.01677554, -0.114878856, 0.07205768, -0.33671686, -0.20127785, -0.269759, 0.013932504, -0.38406572, -0.3358019, 0.015354786, 0.020901006, 0.13394894, -0.11160113, -0.09172442, -0.02606201) * inp_0_2_1;
|
|
result3 += M4(0.02983239, 0.065238595, -0.08852208, 0.041396923, 0.03438514, 0.52203476, 0.09024649, 0.08568877, -0.11356743, -0.06145079, 0.16605784, -0.0046695387, -0.07753392, -0.04085484, -0.04211701, 0.042229425) * inp_0_0_2;
|
|
result3 += M4(-0.0152936075, -0.30379444, -0.03498485, -0.059613615, 0.0010579604, 0.03850498, -0.5338625, -0.18131024, -0.010662432, 0.95364696, -0.3127961, 0.10594221, 0.013022731, 0.1168128, 0.0039640763, -0.008202392) * inp_0_1_2;
|
|
result3 += M4(-0.04573058, 0.12372858, -0.22841553, 0.07078649, 0.13948447, 0.034082845, 0.19417766, -0.12902696, -0.027753403, 0.1267231, -0.397033, 0.011799876, 0.03935273, -0.0021190709, -0.011170245, -0.010836182) * inp_0_2_2;
|
|
const V4 inp_1_0_0 = inp[1][local_xy.y + 0][local_xy.x + 0];
|
|
const V4 inp_1_1_0 = inp[1][local_xy.y + 0][local_xy.x + 1];
|
|
const V4 inp_1_2_0 = inp[1][local_xy.y + 0][local_xy.x + 2];
|
|
const V4 inp_1_0_1 = inp[1][local_xy.y + 1][local_xy.x + 0];
|
|
const V4 inp_1_1_1 = inp[1][local_xy.y + 1][local_xy.x + 1];
|
|
const V4 inp_1_2_1 = inp[1][local_xy.y + 1][local_xy.x + 2];
|
|
const V4 inp_1_0_2 = inp[1][local_xy.y + 2][local_xy.x + 0];
|
|
const V4 inp_1_1_2 = inp[1][local_xy.y + 2][local_xy.x + 1];
|
|
const V4 inp_1_2_2 = inp[1][local_xy.y + 2][local_xy.x + 2];
|
|
result0 += M4(0.8353019, -0.03079682, -0.048795003, 0.035018213, -0.23654273, 0.07702494, 0.2094514, -0.03364075, -0.7785589, 0.48244116, -0.15566188, -0.02168974, -0.2537349, 0.55893207, -0.11559338, 0.056175552) * inp_1_0_0;
|
|
result0 += M4(-0.36740738, 0.11978992, 0.0896231, 0.026315548, 0.61221665, -0.37786412, -0.011924415, -0.25350574, 0.7978943, -0.051793884, 0.12227405, -0.11176683, -0.33257982, -0.033167973, -0.080166414, 0.0071900175) * inp_1_1_0;
|
|
result0 += M4(0.30219695, 0.19819108, -0.051961806, 0.032858204, 0.13871473, 0.04834231, 0.0116140265, 0.023473512, -0.3913116, -0.5835288, 0.09242092, 0.13168785, 0.06345958, -0.2336482, -0.2062129, 0.07203379) * inp_1_2_0;
|
|
result0 += M4(-0.36866486, -0.06509952, 0.106615335, -0.10289987, -0.10938692, -0.016172621, -0.16599819, 0.039859798, -0.05009089, -0.3045099, 0.016047738, 0.14461611, -0.32650867, 0.4984813, 0.22866598, 0.15623319) * inp_1_0_1;
|
|
result0 += M4(0.1897873, -0.39516336, 0.09100719, 0.08683436, -0.13866301, 0.26886114, 0.02321497, 0.09762188, 0.015329164, 0.2783753, 0.08462078, 0.041773602, 0.87116414, -0.77580714, -0.25930312, 0.2847646) * inp_1_1_1;
|
|
result0 += M4(-0.16257232, 0.08016403, -0.11788802, -0.1697364, -0.22687338, -0.0767716, -0.13487695, 0.12317228, -0.14794314, 0.378525, -0.07111922, -0.14637247, 0.3404566, 0.6845142, -0.06920808, 0.13534123) * inp_1_2_1;
|
|
result0 += M4(0.019833196, 0.14731897, -0.025829142, 0.102971524, 0.09881936, -0.14266418, -0.055933498, 0.08291914, 0.31677926, 0.15038142, -0.1840297, 0.035697404, 0.0027101748, 0.07143232, -0.29017213, 0.32565188) * inp_1_0_2;
|
|
result0 += M4(0.08040377, -0.04806, 0.111961365, -0.18063085, 0.04080433, 0.03362372, 0.004829591, 0.19333047, 0.0032951036, 0.116119325, 0.15918833, 0.09558537, -0.107474156, -0.014522343, 0.35576323, -0.93175983) * inp_1_1_2;
|
|
result0 += M4(-0.11092724, 0.15114045, -0.28484997, 0.04351722, 0.14012209, -0.038325023, -0.024206607, -0.15268601, 0.20977294, -0.552664, 0.11712065, -0.08422484, -0.030889055, -0.48031336, -0.026412616, -0.06320308) * inp_1_2_2;
|
|
result1 += M4(-0.29958254, -0.109615885, 0.029395718, 0.016319282, 0.26873666, 0.3702847, 0.09765828, 0.14932635, 0.14159209, 0.095116585, 0.16584423, -0.09755582, 0.019258445, -0.060782902, -0.2806007, -0.1301911) * inp_1_0_0;
|
|
result1 += M4(0.10790642, 0.030600157, -0.038562354, -0.026029062, -0.2230801, 0.1270215, 0.18553448, -0.08388566, 0.09725127, -0.05428193, 0.16844873, 0.2568333, 0.12408434, -0.16046543, -0.0019557131, 0.016269071) * inp_1_1_0;
|
|
result1 += M4(0.13869679, 0.05662262, 0.10610408, -0.042847093, -0.07361296, -0.06860708, 0.11945361, -0.010420255, -0.26884106, 0.22707044, -0.107094064, 0.103328854, 0.17813848, -0.26080275, -0.095853835, -0.016011981) * inp_1_2_0;
|
|
result1 += M4(-0.025391787, -0.5703227, 0.06401796, -0.34907568, 0.10032994, 0.39301944, 0.016423864, 0.4536823, -0.11761743, -0.112939045, -0.024434509, 0.13416661, -0.24094307, -0.4419523, 0.00059161003, -0.46946138) * inp_1_0_1;
|
|
result1 += M4(0.35361376, 0.06384827, -0.16368373, 0.1610783, -0.2828763, 0.033816554, -0.13143909, -0.30080402, 0.16076425, -0.22998048, -0.47442713, -0.30463502, 0.3944553, 0.048345, -0.2522477, 0.11306068) * inp_1_1_1;
|
|
result1 += M4(0.24947502, 0.07821673, 0.14102261, 0.007180845, -0.11125239, 0.055846, 0.012407736, -0.11969543, -0.26606387, -0.18257982, 0.14033502, 0.1393476, 0.12760912, 0.095806316, -0.1708761, -0.09545417) * inp_1_2_1;
|
|
result1 += M4(-0.1447064, -0.4311559, -0.006957143, -0.23297, 0.29948533, 0.39930835, -0.14382444, 0.8876203, -0.15697467, -0.1968718, -0.11124502, -0.2506147, 0.18206203, 0.051802326, -0.09965735, 0.2832789) * inp_1_0_2;
|
|
result1 += M4(0.10724873, 0.016814586, 0.21944435, 0.23991458, -0.16073646, -0.015581932, -0.3656503, -0.19130723, 0.21842504, 0.3039945, 0.12558551, -0.22017072, -0.05429101, 0.67426455, 1.0007616, 0.28180587) * inp_1_1_2;
|
|
result1 += M4(0.22271243, -0.11349672, -0.21032788, -0.163929, -0.28222632, 0.13967724, -0.017861396, 0.06513735, -0.13312815, 0.1615168, 0.11318072, 0.09575312, -0.20077147, -0.16503984, 0.11308297, 0.025842024) * inp_1_2_2;
|
|
result2 += M4(-0.059448976, 0.014798713, 0.57572407, -0.4970689, 0.19702779, 0.13581169, -0.19225039, 0.6739382, -0.15882425, -0.051979855, -0.97899884, -0.22377504, -0.030026965, -0.113155484, 0.24761347, -0.719494) * inp_1_0_0;
|
|
result2 += M4(-0.03346012, 0.109789826, 0.20284073, -0.018877316, -0.13332081, -0.25938398, -0.014910783, -0.18132773, -0.20557809, 0.21696718, 0.106756255, 0.23250414, -0.27276435, -0.00024258936, 0.026531879, 0.14486963) * inp_1_1_0;
|
|
result2 += M4(-0.014871851, -0.03370725, 0.32739255, -0.056933377, 0.01230874, 0.028961578, 0.15074731, 0.13390249, 0.16060308, -0.28017068, -0.049378213, 0.15798374, 0.1049363, 0.021499243, -0.056283005, 0.5702396) * inp_1_2_0;
|
|
result2 += M4(-0.40781596, -0.068078905, -0.2608516, -0.36839285, 0.27331737, 0.06990319, 0.028166782, 0.47841585, -0.07101787, -0.07055015, 1.3041064, -0.08154195, -0.6420218, -0.16353844, 0.8393286, 0.14371522) * inp_1_0_1;
|
|
result2 += M4(-0.036683656, 0.23350303, -0.3823271, -0.1895325, -0.042114716, -0.27834532, -0.011016889, 0.054359082, 0.52122265, 0.09595518, 0.29784077, -0.045716044, 0.26147208, 0.1535827, -0.3547086, -0.5459924) * inp_1_1_1;
|
|
result2 += M4(-0.08925608, -0.14973558, -0.17211129, 0.03096805, 0.11671369, 0.21920115, 0.06494186, -0.06473811, 0.13254163, 0.0064040422, 0.0197999, -0.029223228, -0.12759385, -0.038808268, -0.07336323, 0.4978441) * inp_1_2_1;
|
|
result2 += M4(-0.08437726, -0.20194645, 0.07599329, -0.21935628, 0.3217862, -0.027808418, -0.055483975, 0.48027533, -0.07248972, -0.10917602, -1.1656119, 0.25192067, 0.726754, -0.4072386, -0.84861493, -0.28209558) * inp_1_0_2;
|
|
result2 += M4(0.24826227, 0.03057961, -0.025810828, 0.15662467, 0.19474192, -0.20894553, -0.13577338, 0.22615886, -0.18568474, 0.03584874, 0.24559572, 0.13821158, -0.45080966, 0.32287788, 0.014510343, 0.0734196) * inp_1_1_2;
|
|
result2 += M4(-0.02822516, -0.023234604, -0.058450423, 0.15003574, -0.010218517, 0.0002537157, -0.05167111, -0.051569637, -0.120971926, 0.122743204, 0.14452058, 0.026441488, -0.17925249, 0.2772131, 0.017249027, 0.16620068) * inp_1_2_2;
|
|
result3 += M4(0.030601235, 0.1533475, 0.1354541, 0.17745161, -0.020671252, -0.10372984, -0.13732138, -0.23245959, -0.084360994, 0.22388554, 0.26257366, -0.08955721, 0.054022234, 0.0046199774, -0.40396032, 0.17547776) * inp_1_0_0;
|
|
result3 += M4(0.0950914, 0.13105528, -0.17804535, 0.036833197, 0.10695207, -0.12510276, 0.020602226, 0.15301901, 0.17476954, -0.02053389, 0.55537015, -0.09537796, 0.11434194, 0.020771341, -0.0027719524, -0.022421993) * inp_1_1_0;
|
|
result3 += M4(-0.09498485, 0.017095882, -0.13202192, 0.040244695, -0.19800507, -0.031560943, 0.04959914, 0.03567479, 0.24101183, -0.042635966, -0.18614642, -0.009154916, -0.00247375, -0.19376199, 0.26423156, 0.03759395) * inp_1_2_0;
|
|
result3 += M4(0.07953845, 0.39391887, 0.00727727, 0.35082912, 0.025060296, -0.2595865, 0.16177765, -0.7336906, 0.23894046, -0.070483424, -0.30498013, 0.56130445, -0.010698394, 0.021584244, -0.58075994, 0.68465334) * inp_1_0_1;
|
|
result3 += M4(-0.40409416, -0.0077395765, -0.3227979, -0.068225116, 0.5861685, 0.08172666, 0.13572781, 0.3026567, -0.57700115, 0.107233115, -0.2528313, -0.17424521, -0.84155977, -0.02813022, -0.5172479, -0.59169096) * inp_1_1_1;
|
|
result3 += M4(0.14502329, 0.06812947, 0.2185236, 0.0643197, -0.2091687, -0.09261454, -0.09526991, 0.040777106, 0.032173075, -0.11730932, -0.13217378, 0.0023497643, 0.44384873, -0.15709418, 0.6121635, -0.090208404) * inp_1_2_1;
|
|
result3 += M4(0.04270719, 0.21312836, -0.048879534, -0.11266838, -0.14382827, -0.2192215, 0.0068878373, -0.025609564, -0.09477325, 0.2191171, 0.096425086, -0.16293575, -0.051669516, 0.4190975, 0.12580837, -0.135226) * inp_1_0_2;
|
|
result3 += M4(-0.046412982, -0.12211086, 0.0077603217, -0.07431488, 0.17982213, 0.0438941, -0.196129, 0.06958203, 0.0024261524, -0.26292798, -0.097179495, -0.19446476, 0.29509872, -0.07991557, 0.74099654, 0.1677081) * inp_1_1_2;
|
|
result3 += M4(0.047113243, 0.058038574, 0.24930288, 0.06464759, -0.03720209, -0.051006317, -0.01912244, 0.011376446, 0.15856679, 0.103483565, 0.46561158, 0.030039294, -0.07310521, 0.45909077, 0.32737142, -0.009899002) * inp_1_2_2;
|
|
const V4 inp_2_0_0 = inp[2][local_xy.y + 0][local_xy.x + 0];
|
|
const V4 inp_2_1_0 = inp[2][local_xy.y + 0][local_xy.x + 1];
|
|
const V4 inp_2_2_0 = inp[2][local_xy.y + 0][local_xy.x + 2];
|
|
const V4 inp_2_0_1 = inp[2][local_xy.y + 1][local_xy.x + 0];
|
|
const V4 inp_2_1_1 = inp[2][local_xy.y + 1][local_xy.x + 1];
|
|
const V4 inp_2_2_1 = inp[2][local_xy.y + 1][local_xy.x + 2];
|
|
const V4 inp_2_0_2 = inp[2][local_xy.y + 2][local_xy.x + 0];
|
|
const V4 inp_2_1_2 = inp[2][local_xy.y + 2][local_xy.x + 1];
|
|
const V4 inp_2_2_2 = inp[2][local_xy.y + 2][local_xy.x + 2];
|
|
result0 += M4(0.056658532, -0.084393896, -0.08351355, 0.026212027, -0.16376431, 0.28912234, 0.027704367, -0.071667574, 0.22876024, 0.5063291, -0.034939148, -0.04414098, 0.80793244, 0.2117281, 0.04003154, -0.019289244) * inp_2_0_0;
|
|
result0 += M4(-0.6404585, -0.17796884, -0.14469966, 0.064434, -0.037456833, -0.43776658, -0.076254845, -0.016480407, 0.053084683, -0.4298736, 0.009404756, 0.083069295, -1.0547874, -0.20475821, 0.027985618, 0.12540238) * inp_2_1_0;
|
|
result0 += M4(-0.28070635, 0.1289751, 0.1219496, 0.01570686, -0.19013144, 0.29028776, 0.012497347, -0.0010326781, -0.7109733, -0.095800534, -0.07843679, -0.017538793, 0.4179525, 0.13152626, 0.106455006, 0.01078675) * inp_2_2_0;
|
|
result0 += M4(0.081900895, 0.19029334, -0.073567726, -0.047557395, 0.19428933, -0.038285363, -0.11491762, -0.01837945, 0.07547317, -0.5344631, 0.1524544, -0.14964266, -0.19406794, -0.059020177, 0.19315392, -0.038649715) * inp_2_0_1;
|
|
result0 += M4(-0.16288629, 0.43891653, 0.08301114, -0.359797, -0.07925925, 0.157717, 0.23638429, -0.5961064, -0.20833725, 0.6921076, 0.18377437, -0.46835884, 0.23760724, 0.2279108, -0.18868038, -0.24260604) * inp_2_1_1;
|
|
result0 += M4(-0.13185735, 0.16606236, 0.07416285, -0.03832922, 0.17361966, -0.13698995, -0.08244967, -0.08690483, 0.023102188, -0.21447626, -0.15169983, 0.22278568, -0.030070322, 0.10786125, -0.09606021, -0.00094145857) * inp_2_2_1;
|
|
result0 += M4(0.19589414, 0.008045848, -0.26970163, 0.07926561, 0.0055276607, -0.034727324, 0.014047634, 0.01084498, 0.013823401, 0.045765556, -0.06885619, 0.11313184, -0.09608044, 0.11905903, -0.057270247, 0.17040935) * inp_2_0_2;
|
|
result0 += M4(-0.35795772, 0.052093923, -0.27925724, 0.7024668, -0.23307371, 0.15484054, 0.086788125, 0.40594432, 0.030013239, -0.012880656, -0.23584186, 0.6172303, 0.100337416, -0.2668913, 0.07190245, 0.045928806) * inp_2_1_2;
|
|
result0 += M4(0.08252405, -0.14609249, -0.064848445, -0.05421016, -0.08036742, 0.064709485, 0.0995519, 0.17094779, 0.19813484, -0.023966646, 0.06744239, -0.24773261, -0.08125354, -0.11565303, -0.004539193, 0.003593128) * inp_2_2_2;
|
|
result1 += M4(-0.05913632, -0.13035738, -0.2095817, -0.121928796, 0.24421848, 0.31298313, 0.06490786, 0.11518253, -0.12560591, -0.019254718, 0.10287586, 0.10359144, -0.18111609, 0.003691266, -0.16280791, 0.15910712) * inp_2_0_0;
|
|
result1 += M4(-0.09409358, 0.18263733, 0.044311643, 0.06420531, 0.09978127, 0.061665844, 0.19545491, -0.07039197, 0.12624802, -0.034005325, -0.14515208, -0.078887306, 0.106090754, 0.16866021, -0.14042042, -0.046339422) * inp_2_1_0;
|
|
result1 += M4(0.07321761, -0.005795847, -0.13165809, 0.019000694, -0.09719375, 0.16906215, -0.08389258, -0.024464838, -0.20402004, -0.22585747, 0.19619657, 0.03625385, -0.056417644, -0.10498231, 0.15780598, -0.05896881) * inp_2_2_0;
|
|
result1 += M4(0.11133829, 0.15564004, -0.018679706, -0.20204216, 0.18958686, 0.27253, -0.06635486, -0.14202532, -0.12708612, 0.22483705, 0.028482271, 0.287388, 0.07226748, 0.16693592, 0.083715275, -0.43332836) * inp_2_0_1;
|
|
result1 += M4(-0.107852295, 0.4372522, 0.82334834, -0.010232122, -0.124953635, -0.1031519, 0.88873106, -0.066571906, 0.107724965, 0.14239775, 0.38775948, -0.3008945, 0.12835532, -0.004528145, 0.46319053, 0.37589112) * inp_2_1_1;
|
|
result1 += M4(0.36154172, -0.048908047, 0.01569887, 0.10236648, -0.3750691, -0.028939823, 0.13799433, -0.07261984, -0.19416392, 0.23474926, 0.16244835, 0.17886005, -0.08432286, -0.099077776, -0.16162856, 0.006568329) * inp_2_2_1;
|
|
result1 += M4(0.20400487, -0.11719335, -0.11616023, 0.5995642, 0.122836575, 0.4458128, -0.16099311, 0.699037, -0.21749511, -0.22285137, -0.13708612, 0.08973937, -0.033287726, -0.33511263, 0.01033256, 0.060349323) * inp_2_0_2;
|
|
result1 += M4(-0.13193716, -0.27737704, -0.442998, 0.02232665, 0.13035831, -0.17953672, -0.57704264, -0.19806914, 0.66499674, -0.057644695, -0.6075821, -0.7843484, 0.11177764, 0.3197587, -0.19009523, -0.35939646) * inp_2_1_2;
|
|
result1 += M4(0.30169463, -0.27423045, 0.030066399, 0.09561448, -0.35065278, -0.09054245, -0.13476075, 0.058679003, -0.240253, -0.25924155, -0.025114952, 0.1503409, -0.10358453, 0.06972844, 0.10086383, 0.1513331) * inp_2_2_2;
|
|
result2 += M4(-0.09140265, -0.043480042, 0.39341742, -0.16124645, 0.039826732, 0.0982359, 0.14812395, -0.29226306, -0.12270788, -0.056483883, 0.46743312, -0.12414679, -0.08598476, -0.020382252, 0.8526356, -0.6251697) * inp_2_0_0;
|
|
result2 += M4(0.19089483, -0.094089076, 0.27113757, -0.89349926, 0.092731245, -0.052982565, 0.029999843, -0.27503353, 0.1723571, -0.14498913, -0.59834313, 0.38859525, 0.12832993, 0.06796332, -0.43113524, 0.2746368) * inp_2_1_0;
|
|
result2 += M4(0.101626456, 0.012754597, -0.25569427, -0.075960755, -0.04676257, 0.022405503, -0.08868581, 0.07564415, -0.1573639, 0.12833588, -0.31780383, -0.46218848, -0.027274387, -0.13945863, -0.110267706, -0.027970377) * inp_2_2_0;
|
|
result2 += M4(0.03608637, -0.09479316, -0.21446887, 0.56932735, -0.08810863, 0.06604438, -0.1869864, 0.6002932, -0.12353022, 0.0830962, 0.2746934, 0.16245197, -0.08361289, 0.0022464357, -0.2217645, -0.12708506) * inp_2_0_1;
|
|
result2 += M4(-0.3414478, -0.1966153, 0.055342548, -0.013102268, -0.47288916, -0.5555628, -0.14289796, -0.08635351, -0.41934514, -0.6622999, 0.22686398, -0.15859646, -0.16033597, -0.0053745653, 0.36677605, -0.23678315) * inp_2_1_1;
|
|
result2 += M4(-0.17615376, -0.046758268, 0.01108182, -0.07680903, 0.029766507, 0.6448251, 0.20447639, 0.056505933, 0.23630296, 1.0478128, 0.26991642, -0.055334356, 0.13279189, 0.026829982, -0.12233411, 0.16576806) * inp_2_2_1;
|
|
result2 += M4(0.15395284, -0.1248782, 0.31775445, 0.0008446973, 0.2462245, 0.050902765, 0.22869344, 0.31496996, 0.032222766, -0.19146186, 0.16774654, 0.4773935, 0.12845011, -0.0674604, -0.16974731, 0.6718414) * inp_2_0_2;
|
|
result2 += M4(0.19998777, -0.0046336846, -0.17863709, -0.022363333, 0.30268896, -0.21146934, -0.05744014, 0.16582312, 0.55801415, 0.09421628, -0.652831, 0.22474986, 0.052013434, -0.08298826, -0.20941404, 0.2198382) * inp_2_1_2;
|
|
result2 += M4(-0.05906059, -0.055516556, -0.15871921, -0.20876054, 0.056753717, -0.122805975, -0.10752009, 0.07214043, -0.2886711, -0.104196325, 0.13151167, -0.11443949, -0.29457253, 0.09404759, 0.0781491, 0.09187794) * inp_2_2_2;
|
|
result3 += M4(0.07131227, -0.17103189, -0.65701765, -0.08103885, 0.0114034135, -0.11564006, -0.3758374, -0.30220702, 0.10926563, 0.060695365, -0.13507707, 0.02752198, 0.15815619, -0.099520825, -0.12582445, 0.011142395) * inp_2_0_0;
|
|
result3 += M4(0.16390102, -0.043893002, -0.17439, 0.00196632, 0.23113947, -0.054559767, -0.22033226, 0.16813108, 0.07049406, -0.07914666, 0.44900405, -0.2244528, 0.035653476, 0.05198975, -0.16546313, -0.113693014) * inp_2_1_0;
|
|
result3 += M4(0.015167149, 0.080200985, 0.18556446, 0.044955164, -0.17171162, -0.055256296, -0.002402905, 0.082981795, -0.22192933, 0.03015825, -0.25711286, 0.011584354, -0.095374405, -0.1382058, -0.12054189, 0.10028561) * inp_2_2_0;
|
|
result3 += M4(0.004898744, -0.089915045, -0.095151104, -0.8470385, 0.037769705, -0.07265799, -0.13910796, -0.52633315, -0.07218728, -0.05430542, 0.37463775, -0.4195107, -0.11475595, 0.32739305, -0.06587419, 0.096468985) * inp_2_0_1;
|
|
result3 += M4(0.43917456, 0.026247881, 0.19216591, 0.11570986, 0.46347585, -0.39810273, 0.45491657, 0.7642445, 0.52890843, 0.058011733, 0.6879336, 1.197659, -0.17768478, -0.044728726, 0.0114222765, 0.15778284) * inp_2_1_1;
|
|
result3 += M4(-0.087177016, -0.10747335, -0.10010042, 0.115062304, -0.8122979, 0.06514974, -0.20169608, -0.10079475, -0.99654114, 0.3199778, -0.07893785, -0.12280641, -0.31013697, 0.34886715, 0.16507483, -0.1397757) * inp_2_2_1;
|
|
result3 += M4(-0.040510368, -0.19001658, 0.10233444, -0.21234024, 0.019421631, -0.2970453, 0.0011585673, -0.20484056, 0.07370205, -0.07258561, 0.26393914, 0.021444967, 0.17476425, -0.11027206, 0.45120278, -0.14630376) * inp_2_0_2;
|
|
result3 += M4(0.13665591, 0.4259041, 0.0023495876, 0.14909796, 0.19523306, 0.3640925, -0.26424402, 0.22802217, 0.083183356, 0.6097358, -0.7197997, -0.32168692, 0.2591536, 0.26636642, -0.1078281, -0.1142868) * inp_2_1_2;
|
|
result3 += M4(0.041843053, 0.021810856, -0.21729478, -0.08708366, -0.2119869, -0.15670185, -0.18791877, 0.05018508, 0.3888126, -0.37974566, -0.27909023, 0.053460464, 0.06214204, -0.44816417, 0.15038887, 0.16712622) * inp_2_2_2;
|
|
const V4 inp_3_0_0 = inp[3][local_xy.y + 0][local_xy.x + 0];
|
|
const V4 inp_3_1_0 = inp[3][local_xy.y + 0][local_xy.x + 1];
|
|
const V4 inp_3_2_0 = inp[3][local_xy.y + 0][local_xy.x + 2];
|
|
const V4 inp_3_0_1 = inp[3][local_xy.y + 1][local_xy.x + 0];
|
|
const V4 inp_3_1_1 = inp[3][local_xy.y + 1][local_xy.x + 1];
|
|
const V4 inp_3_2_1 = inp[3][local_xy.y + 1][local_xy.x + 2];
|
|
const V4 inp_3_0_2 = inp[3][local_xy.y + 2][local_xy.x + 0];
|
|
const V4 inp_3_1_2 = inp[3][local_xy.y + 2][local_xy.x + 1];
|
|
const V4 inp_3_2_2 = inp[3][local_xy.y + 2][local_xy.x + 2];
|
|
result0 += M4(0.3472437, 0.013380894, 0.08867833, -0.03453232, 0.58152944, -0.43871528, 0.22380953, -0.04550669, 0.120581925, 0.004563894, 0.04559358, 0.0027206047, 0.017552061, 0.16171905, -0.037060566, -0.01708926) * inp_3_0_0;
|
|
result0 += M4(-0.34160993, -0.11255932, -0.058914144, 0.026892548, -0.31992993, 0.26869294, -0.09144462, 0.08680876, 0.26296964, -0.12034341, -0.045838475, -0.032575533, 0.07309527, -0.11566818, 0.018513981, -0.010388167) * inp_3_1_0;
|
|
result0 += M4(0.28056777, 0.07695634, -0.059898093, -0.061575457, 0.3669255, 0.028526982, 0.24511854, -0.09934631, -0.07288608, 0.050052203, -0.038593568, 0.008848001, 0.27569962, -0.037571654, 0.056816753, -0.0039305515) * inp_3_2_0;
|
|
result0 += M4(-0.028644724, 0.12271751, 0.026339592, 0.008865822, 0.2519346, -0.26284647, -0.1638227, -0.008493934, -0.28506994, 0.011916564, 0.019927721, -0.029855786, 0.0014189436, -0.047583543, -0.050952516, -0.083322175) * inp_3_0_1;
|
|
result0 += M4(0.20403737, -0.040827665, -0.0028530445, -0.017813073, -0.4706143, 0.42873096, 0.14829667, 0.046647582, -0.48000586, 0.37023175, 0.08343689, 0.0029796327, -0.23113053, 0.065826334, 0.074439414, 0.074652724) * inp_3_1_1;
|
|
result0 += M4(0.1586078, 0.0784932, -0.15297528, 0.09501966, -0.4055182, -0.30885902, -0.060376216, -0.15000467, -0.009729531, 0.07221125, -0.04001262, 0.12093661, -0.15710236, 0.09445375, 0.073956475, -0.019581962) * inp_3_2_1;
|
|
result0 += M4(-0.111901745, 0.02471801, -0.06651766, 0.095790766, -0.08006672, 0.19840515, 0.26936525, -0.13326232, -0.10196766, -0.19608083, -0.11811127, 0.22120981, -0.10520785, 0.095748596, -0.007844923, 0.11197408) * inp_3_0_2;
|
|
result0 += M4(0.043696456, 0.013067065, 0.07440999, -0.53549933, 0.073651895, -0.0088031795, -0.14840111, 0.44721222, 0.2975085, -0.023100551, -0.15204108, 1.3613566, -0.03796179, 0.07700565, -0.04039371, 0.39191732) * inp_3_1_2;
|
|
result0 += M4(-0.035744753, 0.008431576, -0.011160678, -0.089015596, 0.067718446, 0.045862436, -0.1271991, -0.052337117, -0.21953662, -0.021048736, -0.019922525, 0.16980134, -0.06376514, 0.020770412, -0.1292233, 0.19095159) * inp_3_2_2;
|
|
result1 += M4(0.04209156, -0.10028043, 0.045473978, 0.029526308, -0.1029502, 0.097725846, 0.11866024, 0.101209536, -0.103658065, 0.1303318, 0.10141563, 0.08520663, 0.056324724, 0.043090805, 0.061262637, -0.009563499) * inp_3_0_0;
|
|
result1 += M4(-0.11467653, -0.05272206, -0.12958023, -0.018456401, 0.023291504, 0.10627822, 0.046663657, 0.16016315, 0.049062163, -0.19274554, -0.076242104, -0.039942898, -0.25169963, -0.048676368, 0.0037137556, 0.053856958) * inp_3_1_0;
|
|
result1 += M4(-0.15907876, -0.11642729, 0.08522632, -0.012964159, -0.21032064, 0.18714333, 0.018133575, -0.12953383, 0.07832173, -0.07664562, -0.008530792, -0.030324288, 0.2171452, -0.052815564, -0.03799995, -0.07796586) * inp_3_2_0;
|
|
result1 += M4(0.014487541, 0.035919957, -0.043378692, -0.10243317, 0.29915744, -0.029785946, 0.08348936, -0.2593623, -0.28476247, 0.15268299, -0.009884593, 0.28988793, 0.18091768, 0.41806173, -0.0008945458, 0.43557742) * inp_3_0_1;
|
|
result1 += M4(-0.015301471, 0.028162321, 0.0628364, 0.12303836, -0.07908043, -0.05095927, 0.08793644, 0.109865434, 0.009046035, -0.014763819, -0.10699331, -0.1811893, -0.35765564, 0.015265002, -0.021379912, 0.054260004) * inp_3_1_1;
|
|
result1 += M4(-0.008764806, -0.047667857, -0.00043627238, -0.085696295, -0.071407646, -0.31469864, 0.09608359, 0.0036131532, 0.04063981, -0.02390764, 0.03337885, -0.08083824, 0.4892242, -0.030610098, 0.053893816, 0.0005124162) * inp_3_2_1;
|
|
result1 += M4(0.23028834, 0.27810562, -0.0005937561, 0.062948644, -0.06315908, 0.2782613, 0.054228317, 0.37353197, 0.04041054, -0.83467233, -0.20950946, 0.2553413, 0.13149354, 0.12652624, -0.108717784, 0.39738894) * inp_3_0_2;
|
|
result1 += M4(-0.056789584, 0.22073776, 0.43888393, 0.007940124, -0.17490996, -0.10502325, -0.49565843, -0.26163256, 0.5991587, -0.6067586, -0.9914189, -0.2210728, -0.3194455, -0.15456308, -0.13144505, -0.03552233) * inp_3_1_2;
|
|
result1 += M4(-0.32254165, 0.044227004, 0.13516445, 0.03845159, 0.009997396, 0.008264542, -0.06699806, -0.051496867, 0.11909525, -0.28769228, -0.18918002, -0.057668176, 0.23843196, -0.1769408, -0.076675124, -0.031703703) * inp_3_2_2;
|
|
result2 += M4(0.024601893, 0.047437504, 0.29823276, 0.09947944, 0.21934599, 0.11803661, 0.65973103, 0.26546025, 0.029531708, -0.0067652045, -0.17580013, 0.26997727, -0.030777141, 0.040013187, 0.024318442, 0.37423348) * inp_3_0_0;
|
|
result2 += M4(0.112576716, -0.13343264, 0.08077175, -0.048998382, -0.093169354, -0.04977975, 0.015730407, -0.73369396, -0.09483911, -0.06737101, 0.06244766, 0.07160388, -0.002243437, -0.12980196, -0.042938337, -0.026755292) * inp_3_1_0;
|
|
result2 += M4(0.062544756, 0.30026558, 0.17554766, 0.13842122, 0.04878832, -0.1402889, 0.1123605, -0.070087515, 0.016846227, 0.13830933, -0.003600853, -0.07052991, 0.08062244, 0.14440808, 0.037623156, -0.10593128) * inp_3_2_0;
|
|
result2 += M4(-0.022144489, 0.031163864, -0.35879242, -0.06148122, 0.18611787, 0.28179172, -0.65257895, 0.22794338, 0.075979315, -0.1493961, 0.022940412, -0.04455144, 0.013953682, 0.08925602, -0.010988895, -0.06853787) * inp_3_0_1;
|
|
result2 += M4(-0.104398176, -0.165788, -0.27504763, -0.45820633, 0.12718037, -0.30875504, 0.12423894, 0.15830809, -0.14640152, 0.072613806, -0.013286344, 0.03865688, 0.06950812, -0.34721127, -0.008446023, 0.07753682) * inp_3_1_1;
|
|
result2 += M4(-0.0765464, 0.043303255, 0.021159632, 0.031017859, -0.11436186, 0.084210224, -0.3106996, 0.088460155, 0.1289226, 0.20739235, 0.1362793, -0.2524162, -0.17312491, -0.12867886, -0.10566141, 0.031078596) * inp_3_2_1;
|
|
result2 += M4(0.09355678, 0.10412212, 0.004731364, 0.42740902, -0.116885625, 0.052118473, 0.43677473, 0.20393315, 0.07955809, -0.119825155, 0.17133446, -0.11519095, 0.13483945, -0.010165948, 0.11746103, 0.033520274) * inp_3_0_2;
|
|
result2 += M4(-0.3813429, -0.11363336, -0.12936473, 0.18715818, 0.32119343, -0.021526324, -0.43352234, -0.06257893, 0.6233187, 0.13396265, 0.038704164, 0.0631709, 0.1939896, -0.15153073, 0.123462155, -0.01652716) * inp_3_1_2;
|
|
result2 += M4(-0.41880918, -0.030412536, 0.12958126, 0.120551094, -0.022546515, -0.04808989, 0.17756711, -0.108908154, 0.2226288, -0.10673246, -0.16231227, -0.03192408, 0.14065991, 0.1836104, -0.03244477, -0.044319637) * inp_3_2_2;
|
|
result3 += M4(-0.0011758883, 0.0031363205, -0.16173664, 0.08941673, -0.11167737, -0.33703038, 0.12200337, 0.055131473, 0.035779245, 0.0029444366, 0.055127386, -0.09139993, -0.033134703, -0.03748916, -0.066961385, -0.108324334) * inp_3_0_0;
|
|
result3 += M4(0.053190347, -0.111483805, -0.06346869, 0.012349723, -0.045100734, 0.029720673, -0.22522068, 0.14960602, -0.043017942, 0.092041545, 0.16698928, 0.08137758, 0.16750063, 0.011768444, -0.09525918, 0.010928377) * inp_3_1_0;
|
|
result3 += M4(-0.14769338, 0.027719587, -0.058809258, 0.028551238, -0.09908326, 0.10515259, -0.14025599, 0.061426327, -0.042780805, -0.035405204, -0.05823736, -0.016086705, 0.04844154, 0.032153595, -0.07846922, 0.019060513) * inp_3_2_0;
|
|
result3 += M4(-0.07933811, 0.29348436, 0.15147564, -0.011950693, -0.07698759, 0.21123506, 0.27513096, -0.4870618, 0.15858337, -0.088143416, 0.0066797114, -0.2147107, 0.09265716, -0.03495672, 0.045518417, -0.3315414) * inp_3_0_1;
|
|
result3 += M4(0.1404907, -0.053150203, -0.040700138, 0.18472278, 0.61421394, 0.054680698, 0.046741713, 0.0037532535, 0.13713816, 0.114549585, 0.26238853, 0.21299282, 0.12599483, 0.028305324, -0.010574808, -0.57075983) * inp_3_1_1;
|
|
result3 += M4(-0.13837083, 0.1861814, 0.27288082, 0.02824301, -0.2062445, 0.06472042, 0.019210115, 0.007461685, -0.32328922, 0.014523166, -0.30206257, 0.1573444, 0.15600231, 0.0886993, 0.24092384, 0.02336472) * inp_3_2_1;
|
|
result3 += M4(0.024918266, -0.28346768, -0.033286523, -0.19488813, -0.021365946, -0.31525356, -0.06427523, -0.022802275, 0.017455311, 0.29595226, -0.21315272, -0.055813782, 0.086387426, -0.14360276, -0.13709465, -0.122531205) * inp_3_0_2;
|
|
result3 += M4(0.10574195, -0.4813584, 0.31677288, -0.08348199, -0.15907282, 0.090848915, -0.4333516, -0.014139355, 0.11864891, 0.5082066, -0.43740028, 0.10677436, 0.101659745, 0.06943931, 0.02947963, -0.027463546) * inp_3_1_2;
|
|
result3 += M4(-0.20253208, -0.26093277, 0.068154514, 0.024943102, 0.054091427, -0.12938003, 0.20981514, 0.16747186, -0.19666524, 0.109958075, -0.24808154, -0.011728526, -0.020255229, 0.12573044, -0.07719842, 0.07252243) * inp_3_2_2;
|
|
const ivec2 output_base = ivec2(gl_GlobalInvocationID) * ivec2(2, 2);
|
|
imageStore(out_image, output_base + ivec2(0, 0), max(result0, V4(0.0)));
|
|
imageStore(out_image, output_base + ivec2(1, 0), max(result1, V4(0.0)));
|
|
imageStore(out_image, output_base + ivec2(0, 1), max(result2, V4(0.0)));
|
|
imageStore(out_image, output_base + ivec2(1, 1), max(result3, V4(0.0)));
|
|
}
|
|
|
|
//!DESC ArtCNN C4F16 DS (Conv2D-2-ReLU)
|
|
//!COMPUTE 24 32 12 16
|
|
//!HOOK LUMA
|
|
//!BIND conv2d_1
|
|
//!SAVE conv2d_2
|
|
//!WIDTH LUMA.w 2.0 *
|
|
//!HEIGHT LUMA.h 2.0 *
|
|
//!COMPONENTS 4
|
|
//!WHEN OUTPUT.w LUMA.w / 1.3 > OUTPUT.h LUMA.h / 1.3 > *
|
|
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
|
|
#ifdef GL_EXT_shader_explicit_arithmetic_types_float16
|
|
# define V4 f16vec4
|
|
# define M4 f16mat4
|
|
# define F float16_t
|
|
#else
|
|
# define V4 vec4
|
|
# define M4 mat4
|
|
# define F float
|
|
#endif
|
|
|
|
const ivec2 ksize = ivec2(3, 3);
|
|
const ivec2 offset = ksize / 2;
|
|
const ivec2 wg_size = ivec2(gl_WorkGroupSize);
|
|
const ivec2 isize = wg_size + ksize - 1;
|
|
shared V4 inp[4][isize.y][isize.x];
|
|
void hook() {
|
|
const uvec2 local_xy = gl_LocalInvocationID.xy;
|
|
ivec2 base = ivec2(gl_WorkGroupID) * wg_size;
|
|
for (uint y = local_xy.y; y < isize.y; y += wg_size.y) {
|
|
for (uint x = local_xy.x; x < isize.x; x += wg_size.x) {
|
|
const ivec2 input_base = (base + ivec2(x,y) - offset) * ivec2(2, 2);
|
|
inp[0][y][x] = V4(conv2d_1_mul * texelFetch(conv2d_1_raw, input_base + ivec2(0, 0), 0));
|
|
inp[1][y][x] = V4(conv2d_1_mul * texelFetch(conv2d_1_raw, input_base + ivec2(1, 0), 0));
|
|
inp[2][y][x] = V4(conv2d_1_mul * texelFetch(conv2d_1_raw, input_base + ivec2(0, 1), 0));
|
|
inp[3][y][x] = V4(conv2d_1_mul * texelFetch(conv2d_1_raw, input_base + ivec2(1, 1), 0));
|
|
}
|
|
}
|
|
|
|
barrier();
|
|
V4 result0 = V4(0.023435008, -0.00027622582, -0.015393672, 0.008721407);
|
|
V4 result1 = V4(-0.0058370507, -0.015384002, -0.010110728, 0.015183784);
|
|
V4 result2 = V4(0.038818885, 0.047503617, -0.043154787, -0.005362168);
|
|
V4 result3 = V4(-0.03511599, -0.0390787, -0.001977005, 0.019544978);
|
|
const V4 inp_0_0_0 = inp[0][local_xy.y + 0][local_xy.x + 0];
|
|
const V4 inp_0_1_0 = inp[0][local_xy.y + 0][local_xy.x + 1];
|
|
const V4 inp_0_2_0 = inp[0][local_xy.y + 0][local_xy.x + 2];
|
|
const V4 inp_0_0_1 = inp[0][local_xy.y + 1][local_xy.x + 0];
|
|
const V4 inp_0_1_1 = inp[0][local_xy.y + 1][local_xy.x + 1];
|
|
const V4 inp_0_2_1 = inp[0][local_xy.y + 1][local_xy.x + 2];
|
|
const V4 inp_0_0_2 = inp[0][local_xy.y + 2][local_xy.x + 0];
|
|
const V4 inp_0_1_2 = inp[0][local_xy.y + 2][local_xy.x + 1];
|
|
const V4 inp_0_2_2 = inp[0][local_xy.y + 2][local_xy.x + 2];
|
|
result0 += M4(-0.058053754, 0.13135362, -0.047918122, -0.011845601, -0.0074042883, -0.01840911, -0.011805869, -0.03396952, -0.1423342, 0.26230896, -0.119458295, -0.06616202, -0.009783131, 0.035237905, -0.03701137, -0.10224075) * inp_0_0_0;
|
|
result0 += M4(-0.041274697, -0.07608542, 0.047750793, 0.054070923, -0.16413322, 0.014423329, -0.03390137, -0.00094438414, 0.24362351, -0.2306998, 0.025874767, -0.07668255, -0.15923639, 0.46262452, 0.05566397, 0.15915436) * inp_0_1_0;
|
|
result0 += M4(0.11520685, 0.0699546, 0.064455785, -0.0076417695, 0.0049518016, -0.0375039, -0.026019963, -0.007440243, 0.03008722, 0.14577916, -0.049544826, 0.05725768, 0.06544244, 0.07811894, 0.05958433, 0.037279688) * inp_0_2_0;
|
|
result0 += M4(-0.077427626, -0.28169155, 0.14521974, -0.02468973, -0.012213673, 0.045991287, 0.032625675, -0.041064482, 0.045073047, 0.22772516, 0.09916965, 0.032622837, 0.081653826, 0.0040710922, -0.06014905, -0.014239243) * inp_0_0_1;
|
|
result0 += M4(-0.08949968, 0.40064472, -0.3088256, -0.030333402, -0.077949814, 0.036044396, 0.006116637, 0.016873905, -0.061148554, -0.18398748, 0.016047241, 0.21445145, -0.5862237, 0.06960463, -0.24190919, 0.019996881) * inp_0_1_1;
|
|
result0 += M4(0.0010335072, -0.074155085, 0.06035906, 0.05635123, -0.029693812, 0.25365904, -0.00024468364, 0.017304827, -0.18047398, -0.060659446, -0.011265205, 0.014874942, -0.60377824, 0.0348222, 0.076552495, 0.06994206) * inp_0_2_1;
|
|
result0 += M4(0.0068813893, -0.040201046, 0.15965754, -1.7235106e-05, -0.017728887, 0.0489135, -0.048296545, -0.0256505, 0.09437446, 0.12918611, -0.04645181, -0.22671208, 0.08722971, -0.27255175, 0.024839658, 0.031033102) * inp_0_0_2;
|
|
result0 += M4(0.32137305, -0.102484845, 0.5899058, 0.44748813, 0.14033101, 0.26633027, 0.14133942, -0.045242507, -0.3500667, -0.16679467, -0.11007231, -0.1118854, 0.14673825, 0.065924786, 0.017390981, -0.0049263104) * inp_0_1_2;
|
|
result0 += M4(-0.2750656, -0.12161614, -0.28282353, -0.22726911, -0.04463269, 0.053947803, -0.038568437, -0.06423929, 0.24576302, 0.03625427, 0.10282412, 0.05768898, -0.30617765, 0.08486049, -0.027985916, 0.014466912) * inp_0_2_2;
|
|
result1 += M4(-0.0073026754, -0.10402381, -0.013642001, 0.044249803, -0.02854999, 0.26434416, 0.057275936, 0.08449719, -0.2368094, -0.006792606, 0.08725315, -0.1143782, -0.26464862, -0.14515208, 0.06274822, 0.36444998) * inp_0_0_0;
|
|
result1 += M4(-0.013720833, -0.06886963, 0.05349541, -0.06206712, 0.035714574, 0.039087676, 0.08770568, 0.14166538, 0.19585021, -0.2129663, 0.14013258, 0.0022852065, -0.5944084, -0.7182823, 0.122801915, -0.15706588) * inp_0_1_0;
|
|
result1 += M4(0.10479358, 0.120234706, -0.064341865, 0.14256644, 0.10301237, -0.04109505, -0.0512812, 0.13723285, -0.029703898, 0.24554458, 0.01530522, 0.3853674, -0.20900846, -0.5164066, -0.20735526, 0.07260991) * inp_0_2_0;
|
|
result1 += M4(-0.14815906, -0.01028779, -0.14179946, -0.08245007, 0.019574288, 0.16411702, 0.11887322, 0.0020758726, 0.028338032, -0.16261598, 0.17139073, -0.06354438, -0.18751918, -0.06267931, -0.009964829, 0.011002064) * inp_0_0_1;
|
|
result1 += M4(-0.080518, 0.1318538, 0.44151536, -0.04916626, -0.056665212, 0.34340844, 0.16808915, 0.07016322, -0.2744961, 0.019613005, -0.52572715, -0.5972775, -0.17945085, -0.38791883, 0.2364365, 0.4347594) * inp_0_1_1;
|
|
result1 += M4(0.18026938, -0.1343583, -0.27794445, 0.25141716, 0.25054744, 0.11847508, -0.022598626, -0.08347308, -0.06789105, 0.56946987, 0.21327427, 0.39672527, -0.07893942, -0.3660021, 0.04420206, -0.04678048) * inp_0_2_1;
|
|
result1 += M4(-0.3361689, 0.21600454, 0.29812515, 0.19549452, 0.13161008, 0.1506024, 0.084969185, 0.07457274, 0.115148, -0.10726738, -0.15455638, 0.022031983, -0.07685952, 0.004126467, -0.0036478213, -0.03565146) * inp_0_0_2;
|
|
result1 += M4(-0.041066427, -0.11341293, -0.9815334, -1.0637896, 0.23624156, 0.144971, 0.13262825, -0.06459397, 0.3298128, -0.18340024, 0.044938684, -0.078921996, -0.15567741, -0.06563018, 0.058869045, -0.31469694) * inp_0_1_2;
|
|
result1 += M4(-0.18666749, -0.04640199, 0.6378595, 0.15205689, -0.010305863, 0.25054187, 0.18608533, 0.12638347, 0.1989298, -0.16685888, -0.008758655, -0.11104765, -0.010293042, -0.19616619, 0.03701753, 0.16267337) * inp_0_2_2;
|
|
result2 += M4(-0.03328148, -0.006684863, 0.047662534, -0.06358788, 0.018822119, -0.053844366, 0.06987464, 0.092299104, -0.13518637, 0.12761132, -0.13602121, 0.15030415, 0.16349082, 0.13304846, -0.12295147, -0.5695785) * inp_0_0_0;
|
|
result2 += M4(0.09411206, -0.05187804, -0.017689293, 0.013353826, -0.08697054, -0.1597196, 0.22802964, 0.04886305, 0.08418551, -0.0804207, 0.11035258, 0.12820846, 0.023220098, 0.057945892, -0.31034696, -0.15134479) * inp_0_1_0;
|
|
result2 += M4(0.114032544, -0.06916583, 0.025773713, 0.05741373, -0.24842425, -0.09751518, 0.14063355, 0.0502536, 0.054900263, -0.17048357, -0.11259802, 0.160759, 0.16757573, -0.01877, -0.11194997, -0.16462773) * inp_0_2_0;
|
|
result2 += M4(0.35808134, 0.039701495, 6.3124726e-06, 0.03133524, -0.05278441, -0.054526035, 0.04481739, 0.2529997, -0.16400562, -0.19464107, -0.2156525, -0.04006176, 0.19686534, 0.033792473, -0.49558243, -0.25051036) * inp_0_0_1;
|
|
result2 += M4(-0.05304926, -0.3043419, 0.10092999, -0.058093864, -0.23438485, 0.034214076, 0.057646334, 0.1186709, 0.044353776, -0.1963721, -0.37588304, -0.13445406, 0.127387, -0.35068044, -2.7945485, -0.34946597) * inp_0_1_1;
|
|
result2 += M4(-0.39820927, 0.21416184, -0.05426307, 0.04687176, -0.2081737, -0.2406106, 0.23088469, 0.11969322, -0.083836876, 0.29234567, -0.28048143, 0.052370936, 0.21438457, 0.03972491, 0.13101222, -0.27309152) * inp_0_2_1;
|
|
result2 += M4(-0.41306856, 0.16440691, -0.09808768, -0.26407242, 0.0053243865, -0.09325784, 0.20075214, 0.3811357, 0.30143264, 0.058478504, -0.06928174, 0.027018353, -0.20570262, -0.013981209, 0.007406996, -0.118252516) * inp_0_0_2;
|
|
result2 += M4(0.27038062, 1.5538056, -0.87050104, 0.03884074, -0.28182444, -0.08340967, -0.0689404, 0.3859192, -0.116228536, -0.31909284, 1.1460763, -0.2930017, 0.05609198, -0.26499516, -0.40788504, -0.4939044) * inp_0_1_2;
|
|
result2 += M4(0.18884245, -0.25252223, -0.2608849, -0.101122685, 0.037499864, -0.23475505, 0.23866269, 0.112167336, -0.011161603, 0.13258833, 0.23283808, 0.20873502, 0.12333728, 0.008156681, -0.061962094, -0.2777167) * inp_0_2_2;
|
|
result3 += M4(-0.017292995, 0.059847377, -0.01150032, -0.004381958, 0.009071516, 0.006968245, -0.034055986, 0.03740713, -0.07671129, 0.010568533, -0.11548467, -0.03640214, 0.004297413, 0.013166457, -0.020208651, -0.042670313) * inp_0_0_0;
|
|
result3 += M4(0.021905567, -0.012356532, 0.034336973, -0.025187774, 0.021469206, -0.04151741, 0.009018021, 0.11130828, 0.036343213, 0.023039078, 0.046457145, -0.16021183, 0.07557342, -0.04547214, -0.06765464, -0.10687027) * inp_0_1_0;
|
|
result3 += M4(-0.030245442, 0.017085928, 0.05411745, -0.02326739, 0.09594549, 0.042214155, -0.051801145, 0.08451421, -0.13254002, -0.02382279, 0.050920837, 0.00031751744, 0.18047972, 0.16602163, 0.030823914, -0.0011384314) * inp_0_2_0;
|
|
result3 += M4(0.023018463, 0.17725265, -0.083900005, -0.0016977334, 0.08052999, 0.012068541, 0.023925066, 0.062392928, 0.089258514, 0.20762452, -0.0012363704, -0.06545757, -0.041220512, 0.14856975, -0.006452312, -0.06116456) * inp_0_0_1;
|
|
result3 += M4(0.26562902, -0.016244365, 0.0019193676, 0.24509418, -0.09421186, -0.010551239, 0.023509221, 0.2747535, 0.22532648, 0.12740539, 0.036053278, 0.05915232, -0.2972877, 0.3417145, 0.49117815, -0.15531492) * inp_0_1_1;
|
|
result3 += M4(-0.24040136, 0.12780526, -0.013421846, -0.25284502, 0.11611097, -0.025960911, 0.03066189, 0.1830375, 0.21415165, 0.033922963, -0.13749541, 0.02706012, -0.2598328, 0.21364947, 0.09336273, 0.04968067) * inp_0_2_1;
|
|
result3 += M4(-0.03718352, -0.96204364, 0.03714818, -0.43719187, -0.048487782, -0.43293557, -0.059800256, 0.078956455, 0.0018238121, -3.0549855, -0.03621669, 0.108869955, 0.036982004, 0.19670472, 0.036557764, 0.1534082) * inp_0_0_2;
|
|
result3 += M4(0.26199692, -0.8192162, 0.28409663, 0.9116149, 0.089281835, -0.75085104, 0.07663377, 0.13557278, -0.08273171, -3.062832, -0.019690165, -0.18279873, -0.037827354, 0.18978551, -0.028312258, -0.09479274) * inp_0_1_2;
|
|
result3 += M4(0.21662533, -0.042922545, -0.20742215, 0.028488081, -0.022670094, -0.5893865, 0.008961727, 0.3320175, 0.028824994, -2.5308259, 0.11584823, -0.08467847, -0.09340072, 0.3430715, -0.060575213, 0.101658076) * inp_0_2_2;
|
|
const V4 inp_1_0_0 = inp[1][local_xy.y + 0][local_xy.x + 0];
|
|
const V4 inp_1_1_0 = inp[1][local_xy.y + 0][local_xy.x + 1];
|
|
const V4 inp_1_2_0 = inp[1][local_xy.y + 0][local_xy.x + 2];
|
|
const V4 inp_1_0_1 = inp[1][local_xy.y + 1][local_xy.x + 0];
|
|
const V4 inp_1_1_1 = inp[1][local_xy.y + 1][local_xy.x + 1];
|
|
const V4 inp_1_2_1 = inp[1][local_xy.y + 1][local_xy.x + 2];
|
|
const V4 inp_1_0_2 = inp[1][local_xy.y + 2][local_xy.x + 0];
|
|
const V4 inp_1_1_2 = inp[1][local_xy.y + 2][local_xy.x + 1];
|
|
const V4 inp_1_2_2 = inp[1][local_xy.y + 2][local_xy.x + 2];
|
|
result0 += M4(-0.020158637, 0.04921676, 0.0011248378, -0.0139853535, -0.021115072, 0.0842691, 0.07378322, -0.0016990606, 0.059866313, -0.28137094, -0.06698033, 0.10465488, 0.010329514, 0.0108864745, -0.01272496, 0.045122433) * inp_1_0_0;
|
|
result0 += M4(0.08854995, 0.055070054, -0.055015758, 0.021567572, -0.11257386, 0.5842653, -0.0069135004, 0.035885066, -0.1636142, -0.7459314, -0.15335257, -0.010262181, 0.027063252, 0.15648766, -0.055906493, -0.05358466) * inp_1_1_0;
|
|
result0 += M4(0.11930604, -0.041682154, -0.00046179755, -0.046034325, -0.039645318, 0.13995461, 0.06940307, 0.0001820343, 0.25363562, -0.08676501, 0.017460763, -0.0004489692, 0.23728251, 3.2216347e-05, 0.08378842, 0.11764811) * inp_1_2_0;
|
|
result0 += M4(-0.055466212, -0.04458063, 0.092873715, 0.038544595, -0.14927305, 0.005744432, 0.04113772, 0.019167574, 0.036161095, -0.33363798, 0.039980143, 0.045724906, 0.1571698, -0.0023385854, -0.07739416, -0.08512195) * inp_1_0_1;
|
|
result0 += M4(0.16172823, 0.19970015, -0.023721108, -0.08872322, -0.06674386, 0.20343593, 0.04090839, 0.12648766, 0.32689074, 0.07949466, 0.77407956, 0.27892375, 0.0955797, -0.3725432, 0.11103006, 0.12239984) * inp_1_1_1;
|
|
result0 += M4(-0.38779613, -0.01784853, -0.06963441, -0.0397871, -0.63667417, 0.11148345, 0.11130723, 0.080527484, 0.52029324, -0.077319995, -0.024498427, 0.018516215, 0.4087798, -0.3360343, -0.06909514, -0.045169) * inp_1_2_1;
|
|
result0 += M4(-0.14526395, 0.061370913, -0.043710344, 0.00422225, -0.009295979, 0.039428223, -0.037319314, -0.028051874, 0.011409687, -0.035212006, 0.044103246, 0.07119403, -0.029897586, -0.11282113, 0.06211073, 0.029109601) * inp_1_0_2;
|
|
result0 += M4(-0.09135313, -0.06939499, 0.07843825, 0.097238205, -0.074063435, 0.063782595, -0.0199373, -0.004162513, 0.23876402, 0.048402824, 0.011737331, 0.0032931073, -0.027612906, -0.060305446, 0.062121384, 0.02301595) * inp_1_1_2;
|
|
result0 += M4(0.3584948, 0.042655364, 0.036384244, 0.035225525, -0.08145385, 0.11985752, -0.030626295, -0.015063418, 0.05193084, -0.1576456, -0.013641532, -0.018054835, 0.014948068, -0.050316945, 0.0032408063, -0.014731045) * inp_1_2_2;
|
|
result1 += M4(0.32308826, 0.14756969, -0.25738078, -0.08276465, 0.0055609527, 0.063924596, -0.022715418, 0.049488764, 0.02728907, -0.13584627, -0.028130148, -0.25563374, 0.06999099, -0.050134107, -0.00291317, 0.0083785765) * inp_1_0_0;
|
|
result1 += M4(-0.12584916, -0.064904496, 0.12157791, -0.48184228, 0.105223686, 0.22921236, 0.07946646, 0.15504126, -0.2577391, -0.37409326, 0.120499425, 0.12071196, 0.1728067, -0.05941374, 0.08807147, -0.4108605) * inp_1_1_0;
|
|
result1 += M4(0.06337823, 0.15880086, -0.099946834, -0.114392236, 0.1709455, 0.5266816, -0.13256815, 0.16290307, 0.077787966, -0.7275313, 0.12744896, -0.41949978, -0.2116448, -0.9786744, -0.1913137, 0.1305901) * inp_1_2_0;
|
|
result1 += M4(0.16809405, 0.23277469, -0.058339387, 0.16693619, -0.038102575, -0.08851083, -0.02797779, 0.04274495, -0.20028922, 0.04436475, 0.013740321, 0.053681087, 0.048335608, 0.032794934, -0.07580541, -0.047612146) * inp_1_0_1;
|
|
result1 += M4(0.39193976, -0.035292547, 0.13096325, -0.5574103, 0.19815198, 0.09274524, -0.19850785, 0.21971238, -0.47791016, -0.37023675, 0.4238322, -0.085052595, -0.113550246, 0.019958464, -0.021854822, 0.1149585) * inp_1_1_1;
|
|
result1 += M4(-0.1860216, -0.23182867, -0.013862331, 0.608505, -0.06452028, 0.28561857, 0.04867474, 0.8088521, 0.0956222, -0.13799746, -0.20208229, -0.71716607, -0.041455816, -0.3238654, -0.42159647, -0.09715043) * inp_1_2_1;
|
|
result1 += M4(0.144747, -0.0021215512, -0.08473307, 0.27575985, 0.022519859, 0.039966933, 0.030069487, 0.04688268, -0.07048565, 0.03966074, 0.041769076, -0.047249142, 0.04115463, -0.008614794, -0.0038662087, -0.060845178) * inp_1_0_2;
|
|
result1 += M4(-0.05863267, -0.004415535, 0.12966555, 0.24865213, -0.014542384, -0.09795358, -0.032396104, 0.36158124, 0.05366866, 0.004015526, -0.0067248484, -0.38101593, 0.10725015, 0.012040856, 0.023441372, -0.069572) * inp_1_1_2;
|
|
result1 += M4(-0.09699632, 0.15249658, 0.12531403, -0.3328034, 0.0030972431, -0.19210772, -0.035863765, 0.45071825, -0.1378622, 0.08406743, -0.038835898, -0.20673525, 0.07893035, -0.054715797, -0.08594201, -0.2384776) * inp_1_2_2;
|
|
result2 += M4(-0.10114114, -0.17318277, 0.2932721, 0.37389088, 0.04549849, 0.021528823, -0.0007721709, -0.1394988, 0.1169677, -0.08984449, -0.01845354, 0.19966806, 0.13676907, -0.055648476, -0.018576363, 0.15089002) * inp_1_0_0;
|
|
result2 += M4(-0.08810577, 0.12845947, -0.022037087, 0.0713056, 0.026246676, -0.011513274, 0.0025324174, -0.082259364, -0.38545743, -0.2139153, 0.090562925, 0.1171016, 0.020509541, -0.14015672, -0.10214009, 0.030530844) * inp_1_1_0;
|
|
result2 += M4(0.005022763, -0.1465861, 0.17625697, 0.07496652, 0.06826987, 0.0644632, 0.05419615, 0.068747275, -0.01850323, 0.11714754, 0.015112417, 0.12423838, -0.21304888, -0.008375001, -0.2605828, -0.07451024) * inp_1_2_0;
|
|
result2 += M4(-0.15553749, -0.34850895, 0.39819357, 0.22360839, 0.054737903, 0.043017726, -0.008483063, -0.32135597, -0.11481686, 0.070533566, 0.32961053, -0.21778224, -0.13037261, -0.15484872, 0.07325692, 0.13470246) * inp_1_0_1;
|
|
result2 += M4(0.16771689, -0.3213681, -0.22630855, 0.2090357, 0.531006, -0.1665998, -0.55734706, -0.7523218, -0.090682685, 0.39249292, 0.21835013, -0.030219488, -0.1309162, 0.08927703, 0.40079466, 0.5337078) * inp_1_1_1;
|
|
result2 += M4(-0.047077622, 0.10245612, -0.11671954, -0.039104506, 0.045307513, 0.06795554, 0.37351328, -0.27963153, -0.10477316, 0.012324867, -0.17526017, 0.091815375, 0.38696444, 0.14811198, -3.5837452, 0.32542264) * inp_1_2_1;
|
|
result2 += M4(0.107646614, -0.12702344, -0.017104609, -0.10133434, 0.062007267, 0.02794019, -0.12198437, -0.20940919, -0.039446287, -0.04715233, -0.1441309, -0.00020755765, -0.050709587, 0.004934061, 0.066610165, 0.102615274) * inp_1_0_2;
|
|
result2 += M4(0.055885743, 0.20596245, 0.39146072, -0.10714875, 0.0031467446, 0.042376287, 0.20682976, -0.46683162, -0.029935647, 0.12960306, -1.4127905, 0.297964, -0.09073476, -0.10628283, -0.15256898, 0.28072315) * inp_1_1_2;
|
|
result2 += M4(-0.01828279, 0.055520464, -0.1621053, -0.100111686, 0.051583163, -0.023852073, -2.2467358, -0.43954092, -0.09379297, -0.004591499, 0.12515631, 0.13008092, -0.09091408, -0.10551536, 0.06935946, 0.267165) * inp_1_2_2;
|
|
result3 += M4(0.001121231, -0.116080426, -0.107541114, -0.19430692, 0.03921505, 0.032906685, 0.027074352, -0.006836348, -0.049477026, -0.020372244, -0.046985947, -0.017023273, -0.09252118, 0.00045681695, -0.05083715, 0.0002327922) * inp_1_0_0;
|
|
result3 += M4(0.17081107, 0.171148, -0.0045726285, -0.059847992, 0.102544926, -0.037925754, 0.023703234, -0.016200975, -0.068067454, 0.08789572, -0.045858927, -0.15687598, -0.030475779, 0.3318887, 0.092105575, 0.0339815) * inp_1_1_0;
|
|
result3 += M4(-0.3998928, 0.10400071, 0.0146760205, -0.079928584, -0.19716969, 0.05670307, 0.043592338, -0.05051233, -0.4631333, -0.111446775, 0.018087799, -0.13826992, -0.4345676, -0.28385895, -0.09957666, -0.09572035) * inp_1_2_0;
|
|
result3 += M4(0.025526108, -0.65327173, 0.050278466, -0.17187065, 0.02873274, -0.049899336, 0.034705333, 0.0026502954, -0.06212493, -1.0741755, 0.00038897205, -0.010395267, 0.030468699, 0.23473783, -0.009471362, -0.06953327) * inp_1_0_1;
|
|
result3 += M4(0.504466, 0.25026208, 0.14870858, -0.32623893, 0.043759342, 0.119388305, -0.01180262, 0.045797486, -0.15875077, -1.2279036, -0.13139768, -0.015066072, 0.15799735, 0.36802685, -0.02803516, -0.009432813) * inp_1_1_1;
|
|
result3 += M4(-0.07725342, -0.23465729, 0.01499422, 0.008299808, 0.080197036, -0.34347633, 0.021336041, 0.0015247765, -0.08134993, -1.0686655, 0.006038173, -0.17920311, -0.021682333, -0.5358267, 0.032830767, -0.16503559) * inp_1_2_1;
|
|
result3 += M4(-0.046656102, 0.4130624, -0.08565986, 0.044300493, -0.00034583264, -0.4603019, -0.00814766, 0.03564214, 0.011428414, 0.13996872, 0.03787744, -0.095117405, 0.027809668, -0.47052267, 0.053844158, 0.014706537) * inp_1_0_2;
|
|
result3 += M4(0.22918473, -1.2321059, 0.018048212, 0.14816047, -0.08237241, -0.8479986, 0.029264655, 0.045883853, 0.0739114, 0.3766523, 0.025773916, -0.1114103, 0.007666082, -1.6611128, 0.06056709, 0.013368697) * inp_1_1_2;
|
|
result3 += M4(0.070496134, -0.13917391, 0.01671008, -0.02739195, -0.055815388, 0.27967262, -0.026290499, 0.044944398, 0.04376932, -1.7079539, 0.014957602, -0.0064308657, -0.0064193904, -1.7823993, 0.020053854, -0.020422848) * inp_1_2_2;
|
|
const V4 inp_2_0_0 = inp[2][local_xy.y + 0][local_xy.x + 0];
|
|
const V4 inp_2_1_0 = inp[2][local_xy.y + 0][local_xy.x + 1];
|
|
const V4 inp_2_2_0 = inp[2][local_xy.y + 0][local_xy.x + 2];
|
|
const V4 inp_2_0_1 = inp[2][local_xy.y + 1][local_xy.x + 0];
|
|
const V4 inp_2_1_1 = inp[2][local_xy.y + 1][local_xy.x + 1];
|
|
const V4 inp_2_2_1 = inp[2][local_xy.y + 1][local_xy.x + 2];
|
|
const V4 inp_2_0_2 = inp[2][local_xy.y + 2][local_xy.x + 0];
|
|
const V4 inp_2_1_2 = inp[2][local_xy.y + 2][local_xy.x + 1];
|
|
const V4 inp_2_2_2 = inp[2][local_xy.y + 2][local_xy.x + 2];
|
|
result0 += M4(-0.040685326, 0.019202568, 0.085796, 0.058194246, -0.12204348, -0.17381316, 0.14245704, 0.08302883, -0.11325476, -0.098387495, 0.011919243, 0.037839588, -0.019909292, -0.090265736, -0.029772518, 0.021361295) * inp_2_0_0;
|
|
result0 += M4(0.1350175, -0.17427717, -0.06368384, 0.04834391, -0.32116097, 0.22988725, -0.15886529, -0.057563584, -0.06309216, -0.19327909, 0.012142371, -0.019530954, 0.015532162, -0.09410268, 0.028791951, 0.019857269) * inp_2_1_0;
|
|
result0 += M4(-0.09504906, 0.09759539, -0.037708234, -0.03228641, -0.04091912, 0.019800322, 0.037136376, -0.014333935, -0.04841269, 0.006947392, -0.058824852, -0.01539875, 0.031869523, -0.10707408, -0.0053441883, 0.037088424) * inp_2_2_0;
|
|
result0 += M4(-0.017622449, 0.1668945, 0.049392745, 0.023371896, 0.14355041, 0.1256854, -0.09238751, 0.6084779, -0.06498068, 0.031000355, 0.008305978, 0.06628111, -0.0059575806, -0.07780641, -0.019977028, 0.053992756) * inp_2_0_1;
|
|
result0 += M4(0.64182985, -0.24790576, 0.22865957, 0.15013333, 0.22472541, -0.41521356, 0.2727451, 0.1425033, -0.104730055, 0.07813815, -0.07939012, -0.004840109, 0.015298698, -0.40924573, 0.047924146, 0.050329972) * inp_2_1_1;
|
|
result0 += M4(0.5166944, -0.14351964, 0.002835482, -0.021241073, -0.044501536, -0.09123378, -0.0025898484, 0.088492654, -0.060680177, 0.17358576, -0.031046059, -0.023313675, 0.21963911, -0.32172033, -0.03685404, -0.056302886) * inp_2_2_1;
|
|
result0 += M4(-0.14249846, 0.1919643, -0.054700527, -0.030300103, 0.116425626, 0.030547854, 0.29164848, 0.24575163, 0.113290496, 0.081504695, 0.11042106, 0.052331973, 0.051052187, -0.035564587, 0.10448482, 0.04722249) * inp_2_0_2;
|
|
result0 += M4(-0.20679013, -0.1357482, 0.0670046, -0.0044386904, -0.103470646, -0.026976867, -0.019741116, -0.096597396, 0.22226769, 0.039062824, 0.09040389, -0.0021132238, 0.0815542, -0.4116235, 0.30562884, 0.15595485) * inp_2_1_2;
|
|
result0 += M4(0.31853187, -0.0028201935, -0.0018939296, 0.000118292395, -0.03563755, 0.08407976, 0.047397576, 0.0067874063, -0.07854643, 0.08589363, 0.0595104, 0.033278853, 0.4471929, -0.1569817, 0.082737334, 0.017763961) * inp_2_2_2;
|
|
result1 += M4(-0.051908046, 0.04940803, 0.025206795, -0.08582518, 0.105329625, -0.13497052, -0.086282, 0.3592072, -0.021779265, 0.01662948, -0.029943835, 0.037047792, 0.01232303, 0.011557858, 0.011784409, -0.03676768) * inp_2_0_0;
|
|
result1 += M4(-0.18673477, -0.24641654, 0.33000603, 0.11542049, -0.30434307, 0.5306456, -0.011384738, 0.45843646, -0.20390716, 0.10540199, 0.016882136, -0.031867232, 0.008356345, -0.13086288, -0.041344773, -0.0036602747) * inp_2_1_0;
|
|
result1 += M4(0.13682915, 0.0036812855, 0.012057137, 0.23811786, 0.07334074, -0.08856756, 0.04595943, 0.1599024, -0.18250804, 0.13405186, 0.04430016, 0.051927723, 0.056383044, -0.027024115, 0.038770597, -0.07303996) * inp_2_2_0;
|
|
result1 += M4(0.15318306, -0.12760171, 0.0450346, -0.07374663, 0.07834138, 0.49790388, 0.2915172, -0.09512411, -0.19560269, 0.14700219, 0.07985623, 0.10233619, -0.115605205, 0.053538296, 0.052413, -0.01793204) * inp_2_0_1;
|
|
result1 += M4(0.25549623, 0.79109895, -0.15794283, -0.4189323, -0.065393336, -0.88975954, 0.08374594, -1.1873229, -0.016332334, 0.15877184, 0.110330045, 0.22505662, -0.10667615, 0.2046037, -0.034100883, -0.14114152) * inp_2_1_1;
|
|
result1 += M4(-0.029152432, 0.19976863, -0.0067866994, 0.1554457, 0.07682561, 0.13939528, -0.11835224, -0.1414863, -0.24614279, 0.087009184, 0.2559374, -0.06699069, -0.085265845, 0.18493001, -0.013739076, -0.42368194) * inp_2_2_1;
|
|
result1 += M4(0.118983015, -0.0572679, 0.000318014, 0.007505368, -0.0299957, 0.14891346, 0.12727791, 0.54379475, -0.07486102, 0.117346324, 0.09264876, 0.19114546, -0.051801726, 0.09137759, 0.076237656, 0.007558985) * inp_2_0_2;
|
|
result1 += M4(-0.0016464916, 0.092834815, -0.07756129, 0.4916174, 0.15548897, 0.41225353, 0.033371974, 0.05942626, -0.22053158, 0.1703363, 0.018514646, 0.014471713, -0.0042645177, 0.11441778, 0.20387441, -0.19286348) * inp_2_1_2;
|
|
result1 += M4(0.045293007, 0.056085728, -0.04795258, -0.22916062, 0.083087645, -0.03853095, 0.012642993, -0.11186545, -0.11702325, 0.21539073, 0.1151789, 0.12692805, 0.048481543, 0.073370904, -0.1511963, -0.4363887) * inp_2_2_2;
|
|
result2 += M4(0.22008175, 0.006780327, 0.005894409, 0.10082709, -0.044127602, 0.05716219, 0.0787319, -0.08502259, -0.05248802, 0.10199217, 0.12158407, -0.12550665, -0.06999623, 0.022333184, 0.014213934, 0.11376998) * inp_2_0_0;
|
|
result2 += M4(0.19652066, 0.36215308, 0.09469319, 0.06413764, 0.05205069, -0.016372532, 0.1787366, 0.0877199, -0.08429393, 0.21526356, 0.14981632, -0.1523603, -0.0466523, -0.057867594, 0.04867976, 0.069361486) * inp_2_1_0;
|
|
result2 += M4(-0.2724398, 0.19309507, 0.09607288, 0.1206929, 0.07003121, 0.09855454, -0.025575627, 0.009461037, -0.19269364, 0.16064173, 0.14619134, -0.08403769, 0.04281966, 0.055891816, -0.015193178, 0.02850246) * inp_2_2_0;
|
|
result2 += M4(0.052729435, 0.018342854, 0.020109378, 0.049135573, -0.27488902, -0.20894177, 0.08238209, 0.12393387, -0.05489923, 0.12008817, 0.13158463, -0.15824428, -0.18391077, 0.03304128, 0.04371131, 0.08666164) * inp_2_0_1;
|
|
result2 += M4(-0.07128967, 0.31714946, 0.08881361, -0.08835794, 0.041942906, -0.07898635, 0.09270609, -0.17499882, -0.1766717, 0.17561086, 0.1755127, -0.23229517, -0.46135002, 0.109490745, 0.11468097, 0.04771001) * inp_2_1_1;
|
|
result2 += M4(-0.049790166, -0.058075182, -0.072881676, 0.03250728, -0.009275402, -0.010968189, -0.1080809, -0.05768197, -0.26625115, 0.2584098, 0.23041652, -0.2520977, -0.21001528, 0.2393364, 0.06696907, 0.0046516233) * inp_2_2_1;
|
|
result2 += M4(0.23925087, 0.0680813, 0.078727536, -0.022581024, -0.095084116, 0.057004742, 0.3138766, 0.0392872, -0.14981374, 0.07913679, 0.1031623, -0.040174864, -0.10357394, 0.07672665, 0.02854845, 0.047957916) * inp_2_0_2;
|
|
result2 += M4(-0.08796459, 0.21651562, 0.2702043, -0.1740451, -0.27310392, 0.09648933, -0.3957877, -0.12951781, -0.021011474, 0.19034348, 0.11784486, -0.37994373, -0.5231188, 0.13480479, 0.19004749, 0.08633311) * inp_2_1_2;
|
|
result2 += M4(-0.09953264, -0.057516363, 0.009389417, 0.14430472, 0.032242656, -0.066502385, 0.28266618, 0.09512168, 0.09544029, 0.07469234, -0.004258793, -0.33108658, -0.46292925, 0.060545143, -0.26074094, 0.08396749) * inp_2_2_2;
|
|
result3 += M4(-0.07606638, -0.063000284, 0.09598554, 0.104964264, -0.040376373, -0.035215527, -0.0042664115, 0.1667837, 0.063100494, -0.018531347, 0.029744422, -0.075527735, 0.03385397, -0.021013837, -0.017285448, -0.08043407) * inp_2_0_0;
|
|
result3 += M4(-0.26622432, -0.03477078, 0.03494469, 0.04049817, 0.039655168, 0.08699442, -0.050543044, 0.22162461, 0.069138594, -0.050977368, -0.022323107, -0.05545389, 0.015020561, 0.026642598, 0.021018691, -0.02690074) * inp_2_1_0;
|
|
result3 += M4(-0.032372512, -0.08706534, -0.015817178, -0.08862899, 0.013261543, 0.002546997, 0.0176546, 0.008601677, 0.049245533, 0.04126535, -0.036267422, -0.1031398, 0.008470616, 0.015351556, 0.019256743, -0.016997326) * inp_2_2_0;
|
|
result3 += M4(0.0823704, 0.2817941, 0.08857372, 0.10827305, -0.12110614, 0.078699365, 0.23771732, 0.094654255, -0.042905893, 0.0771783, 0.0006978189, 0.029819358, -0.0036443584, -0.020106701, -0.01071964, -0.01958439) * inp_2_0_1;
|
|
result3 += M4(0.48942843, 0.27745262, 0.36281285, -0.028398752, 0.24205421, 0.25413743, 0.13406385, -0.018467713, -0.122251295, 0.0134095885, -0.074013464, -0.11212041, 0.00941324, -0.03942664, -0.008397736, -0.008076683) * inp_2_1_1;
|
|
result3 += M4(0.20749235, 0.21867277, 0.004653491, 0.03703782, 0.023102382, 0.07013684, -0.016614152, 0.028991466, 0.034551147, -0.079350136, -0.006204742, 0.061720155, 0.061270434, 0.028488534, -0.010227663, 0.07652426) * inp_2_2_1;
|
|
result3 += M4(-0.02412553, 0.0037526242, -0.0668934, -0.054726753, -0.016596323, -0.30688837, 0.049872916, 0.06895975, -0.017586173, 0.3452859, 0.0950053, -0.018741097, -0.048104398, 0.017518392, 0.0949136, 0.028228115) * inp_2_0_2;
|
|
result3 += M4(0.013070116, -0.5232367, 0.058542944, 0.050612744, 0.102370545, -0.7376758, 0.08592539, 0.07714531, 0.013114515, -0.0074695004, 0.02178679, -0.15938848, 0.020757858, -0.48573405, 0.32483444, -0.023649752) * inp_2_1_2;
|
|
result3 += M4(0.046947844, -0.3603543, 0.07617314, -0.04335851, 0.05356554, -0.04626905, 0.0144886095, 0.05473025, 0.07629946, 0.89244694, 0.020768393, -0.22464417, -0.0074107046, -0.87721777, 0.052553397, -0.090482995) * inp_2_2_2;
|
|
const V4 inp_3_0_0 = inp[3][local_xy.y + 0][local_xy.x + 0];
|
|
const V4 inp_3_1_0 = inp[3][local_xy.y + 0][local_xy.x + 1];
|
|
const V4 inp_3_2_0 = inp[3][local_xy.y + 0][local_xy.x + 2];
|
|
const V4 inp_3_0_1 = inp[3][local_xy.y + 1][local_xy.x + 0];
|
|
const V4 inp_3_1_1 = inp[3][local_xy.y + 1][local_xy.x + 1];
|
|
const V4 inp_3_2_1 = inp[3][local_xy.y + 1][local_xy.x + 2];
|
|
const V4 inp_3_0_2 = inp[3][local_xy.y + 2][local_xy.x + 0];
|
|
const V4 inp_3_1_2 = inp[3][local_xy.y + 2][local_xy.x + 1];
|
|
const V4 inp_3_2_2 = inp[3][local_xy.y + 2][local_xy.x + 2];
|
|
result0 += M4(-0.0766504, -0.09879246, 0.0396668, 0.017115591, -0.010305095, 0.19718798, -0.071258314, 0.0664194, -0.046132524, 0.062901266, 0.080579214, -0.004289638, 0.12400763, -0.0585527, 0.015766127, -0.0032669778) * inp_3_0_0;
|
|
result0 += M4(-0.020116923, -0.1888125, 0.059224084, 0.08714167, 0.4200794, -0.22349876, 0.11738465, -0.08908094, -0.12611064, 0.28323323, -0.0006456269, 0.055735517, 0.09580583, -0.03230914, -0.06237639, -0.0038313572) * inp_3_1_0;
|
|
result0 += M4(-0.015171148, -0.011160129, 0.039504074, 0.034451094, -0.33802676, -0.20952617, -0.09564441, -0.018064525, -0.14134207, 0.13155054, 0.0101029305, 0.033066314, 0.16871352, -0.0057818503, -0.0054384056, -0.03857239) * inp_3_2_0;
|
|
result0 += M4(-0.059774876, -0.23344266, 0.1916854, -0.41434157, 0.1374049, 0.28787696, -0.029185118, -0.033947468, -0.26908988, 0.10820922, 0.012907376, 0.06392264, 0.035846196, -0.10413254, 0.02084434, -0.10425743) * inp_3_0_1;
|
|
result0 += M4(-0.08171906, 0.35755453, 0.059284337, 0.0021495572, 0.020121615, -0.08626559, 0.16330962, 0.08871405, -0.3114198, 0.0147190485, 0.097635314, 0.13202067, -0.31036153, 0.011890783, 0.1266637, 0.5408988) * inp_3_1_1;
|
|
result0 += M4(0.17335528, -0.015333025, -0.008093493, -0.033920698, -0.02464884, 0.15351357, 0.036093365, 0.03906647, -0.26172918, 0.21810411, 0.0920626, 0.07798469, -0.16078314, 0.09534679, -0.08410649, -0.104074486) * inp_3_2_1;
|
|
result0 += M4(0.09742962, 0.22259852, -0.0017837521, -0.12143848, 0.050734755, 0.22144549, -0.052987207, -0.049092356, -0.14100018, 0.013511058, 0.043222908, 0.061185308, -0.11384891, -0.0965968, 0.010946058, 0.12053483) * inp_3_0_2;
|
|
result0 += M4(-0.42434677, 0.23842174, 0.030445544, 0.060407106, 0.07218617, 0.03208096, -0.071624324, 0.0028445057, -0.15972686, -0.002714478, 0.07611455, 0.047295272, 0.07009659, 0.13138281, 0.05384319, 0.043430082) * inp_3_1_2;
|
|
result0 += M4(-0.0030897667, -0.040491477, -0.045542356, -0.037420522, 0.0134954015, 0.015531995, 0.01853244, -0.033848308, -0.19752295, 0.054179564, 0.04510645, 0.049722757, 0.20566362, -0.05376662, 0.05160025, 0.082582206) * inp_3_2_2;
|
|
result1 += M4(-0.071607076, -0.01151562, 0.03144804, 0.06795253, 0.035164766, 0.09399475, 0.046911433, -0.105985604, -0.027395945, 0.08368641, -0.03945179, 0.06652106, -0.009234248, 0.0311306, 0.015887855, 0.013628602) * inp_3_0_0;
|
|
result1 += M4(-0.009326969, -0.5451517, -0.06685787, -0.2203032, -0.04059302, 0.50291795, -0.23659435, -0.16453515, -0.019921033, -0.08829904, 0.022711797, 0.04344343, 0.080518015, -0.017371742, -0.12549073, -0.1885871) * inp_3_1_0;
|
|
result1 += M4(0.06340467, -0.045215208, -0.011862773, -0.17774045, 0.03218759, -0.21211256, 0.24903278, 0.16339235, -0.0889197, 0.26705155, 0.026684994, 0.20748319, 0.21641175, -0.7041937, -0.12879434, 0.034087796) * inp_3_2_0;
|
|
result1 += M4(0.0043784324, -0.1104343, 0.0051153954, 0.3049556, 0.053384513, -0.083815835, -0.077591434, 0.06180734, 0.056226797, -0.060590953, 0.023422653, -0.021765847, 0.06481506, 0.038366422, -0.03140693, -0.04423246) * inp_3_0_1;
|
|
result1 += M4(0.14104234, -0.42322505, 0.58770734, 0.02756258, -0.06619171, 0.039784182, 0.09670571, -0.11221906, 0.090540856, 0.21492137, 0.0033946468, -0.054638606, -0.26438943, -0.40769553, 0.58308834, 0.011558848) * inp_3_1_1;
|
|
result1 += M4(0.038047027, -0.09069749, -0.03155484, -0.4204703, 0.112935, 0.15827107, -0.1785291, 0.10565701, 0.03801543, 0.13049063, 0.18165028, 0.40097496, 0.2054982, -0.65470904, -0.17466919, -0.74355334) * inp_3_2_1;
|
|
result1 += M4(0.085544996, -0.17516859, -0.09178954, -0.47846717, 0.081615664, 0.03366028, -0.008114159, 0.10606008, 0.010830146, 0.0031332439, 0.06839305, 0.0031851435, -0.054583967, -0.022798512, 0.062276945, -0.01278501) * inp_3_0_2;
|
|
result1 += M4(-0.03308873, 0.18022367, 0.42132232, -0.31112126, 0.13086608, 0.16092083, 0.032381505, -0.061450902, 0.09202975, 0.20787741, 0.008323081, 0.117322624, 0.09185017, -0.079603694, 0.11225445, -0.27967831) * inp_3_1_2;
|
|
result1 += M4(-0.03957761, -0.06413558, -0.020417236, -0.094888, 0.16328244, 0.29664227, 0.04430604, -0.067853294, 0.08792947, 0.15714999, 0.05795048, 0.101340644, -0.002680674, -0.050374318, 0.00048598138, -0.35752374) * inp_3_2_2;
|
|
result2 += M4(-0.060640704, -0.15616873, 0.07173303, 0.029078547, -0.30562615, -0.041268278, 0.20111884, 0.41848227, 0.06553992, -0.040807955, 0.043503594, -0.24304286, -0.0060193194, -0.012492793, -0.081317544, -0.059194572) * inp_3_0_0;
|
|
result2 += M4(-0.04040787, -0.16354334, -0.17756464, -0.015088575, 0.23186228, 0.05988759, 0.14465544, 0.16085775, 0.18045077, -0.105936624, 0.044305015, -0.17218077, 0.04191084, -0.02523788, -0.23255497, -0.04335376) * inp_3_1_0;
|
|
result2 += M4(-0.006653806, -0.01155372, 0.013032641, 0.004019512, -0.017225059, -0.34220526, 0.094255075, 0.12780511, 0.06525166, -0.07204751, -0.028964553, -0.08786299, 0.01816411, -0.084005155, 0.00058218115, -0.073107585) * inp_3_2_0;
|
|
result2 += M4(0.15325405, -0.18560395, 0.1131455, 0.06522893, 0.07688168, 0.054853395, -0.063450925, 0.10854047, 0.02840006, -0.0005397363, 0.05471179, -0.54284006, -0.017266683, 0.0016633559, -0.02881508, -0.05636048) * inp_3_0_1;
|
|
result2 += M4(0.13207924, 0.2558923, -0.056082908, 0.13571228, 0.083247505, -0.016255124, -1.6765794, 0.026998602, 0.35833323, -0.25714707, -0.06403621, -0.259124, 0.0952539, 0.11210623, -0.12490101, 0.07662671) * inp_3_1_1;
|
|
result2 += M4(-0.021977363, -0.0713963, 0.123100124, 0.052161366, -0.20624985, -0.1925915, 0.00520385, 0.110770926, 0.17356953, 0.020864306, 0.11559725, -0.079656266, 0.021110583, -0.314465, 0.04681424, 0.1371713) * inp_3_2_1;
|
|
result2 += M4(0.30375904, -0.17828616, -0.17909098, 0.013856255, -0.06746472, 0.003626132, 0.19023083, -0.05196694, 0.15099213, -0.06683428, 0.08304773, -0.12654506, 0.13353965, 0.044826567, -0.059660748, 0.013467923) * inp_3_0_2;
|
|
result2 += M4(0.08426794, -0.22384238, 0.19148509, 0.05827365, 0.018852733, 0.1217786, -0.014563931, 0.021475017, 0.057863247, -0.040182285, 0.58330965, -0.22231136, 0.050184354, 0.12184998, -2.07873, -0.018987421) * inp_3_1_2;
|
|
result2 += M4(-0.020585502, -0.0018608408, -0.14940372, -0.011206775, -0.052012913, -0.021318972, 0.27726486, 0.13465387, 0.18750753, 0.03791731, 0.040556125, -0.11679988, 0.19256034, -0.053861372, -0.028166458, -0.14577334) * inp_3_2_2;
|
|
result3 += M4(0.1110923, -0.023193434, 0.08967661, 0.0026217531, -0.05411397, 0.00651578, -0.031594105, -0.09873143, 0.041768722, 0.04100008, 0.036410563, 0.039395727, -0.011185455, 0.01551338, 0.023943875, 0.015728712) * inp_3_0_0;
|
|
result3 += M4(-0.046828236, -0.07020306, 0.00973051, 0.0005743727, 0.1787815, 0.22740924, 0.11705792, -0.0115812505, 0.057035774, -0.0070796604, 0.014894732, 0.024319354, -0.047951322, 0.03793726, -0.10123618, 0.014985628) * inp_3_1_0;
|
|
result3 += M4(0.046608195, 0.026250076, 0.0219202, -0.009327337, -0.008816733, -0.082837604, -0.017703755, 0.2030575, 0.056901768, -0.0084842825, 0.012729636, -0.08129811, 0.019200847, 0.032389343, -0.0009601458, 0.013909917) * inp_3_2_0;
|
|
result3 += M4(-0.48060805, -0.23521748, 0.34547088, -0.040182896, -0.038119644, -0.18511339, -0.04657396, -0.027486663, 0.08065303, 0.12756409, 0.036840674, 0.10895672, 0.047392767, -0.0069511794, 0.094890505, 0.052782986) * inp_3_0_1;
|
|
result3 += M4(0.12509732, 0.03977793, -0.03640901, 0.50160116, -0.15279287, 0.318791, 0.13314092, -0.1444542, -0.044084627, 0.084593356, 0.078950174, 0.16075894, -0.03516418, -0.2527174, -0.036211584, 0.47695947) * inp_3_1_1;
|
|
result3 += M4(-0.12992151, 0.043950334, 0.03127988, 0.042125385, -0.19439988, -0.25358784, -0.048711743, -0.20638546, 0.13232373, 0.0047641313, 0.08614499, -0.0324139, -0.6161785, 0.08246424, 0.15840074, -0.26129988) * inp_3_2_1;
|
|
result3 += M4(0.011187205, 0.42798644, 0.008353978, -0.044744402, -0.0177301, 0.25025943, -0.041332502, -0.11565288, 0.010321948, 0.536089, 0.03314616, 0.008272843, -0.008212299, 0.09620918, -0.008280064, 0.11386974) * inp_3_0_2;
|
|
result3 += M4(-0.061153438, 1.292626, -0.014955989, 0.20095024, -0.027531676, -0.7807742, -0.11907358, -0.09337009, 0.03249329, -0.62426835, 0.0146850245, 0.026055751, -0.04021995, -1.9652365, 0.088370696, 0.11366978) * inp_3_1_2;
|
|
result3 += M4(0.04068244, 0.35956866, -0.024620563, -0.029591298, 0.0932946, 0.34455878, 0.011712873, -0.20691097, -0.004321678, -0.2839504, 0.010122162, 0.025036965, -0.02681107, -2.100412, 0.0069077644, 0.06897316) * inp_3_2_2;
|
|
const ivec2 output_base = ivec2(gl_GlobalInvocationID) * ivec2(2, 2);
|
|
imageStore(out_image, output_base + ivec2(0, 0), max(result0, V4(0.0)));
|
|
imageStore(out_image, output_base + ivec2(1, 0), max(result1, V4(0.0)));
|
|
imageStore(out_image, output_base + ivec2(0, 1), max(result2, V4(0.0)));
|
|
imageStore(out_image, output_base + ivec2(1, 1), max(result3, V4(0.0)));
|
|
}
|
|
|
|
//!DESC ArtCNN C4F16 DS (Conv2D-3-ReLU)
|
|
//!COMPUTE 24 32 12 16
|
|
//!HOOK LUMA
|
|
//!BIND conv2d_2
|
|
//!SAVE conv2d_3
|
|
//!WIDTH LUMA.w 2.0 *
|
|
//!HEIGHT LUMA.h 2.0 *
|
|
//!COMPONENTS 4
|
|
//!WHEN OUTPUT.w LUMA.w / 1.3 > OUTPUT.h LUMA.h / 1.3 > *
|
|
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
|
|
#ifdef GL_EXT_shader_explicit_arithmetic_types_float16
|
|
# define V4 f16vec4
|
|
# define M4 f16mat4
|
|
# define F float16_t
|
|
#else
|
|
# define V4 vec4
|
|
# define M4 mat4
|
|
# define F float
|
|
#endif
|
|
|
|
const ivec2 ksize = ivec2(3, 3);
|
|
const ivec2 offset = ksize / 2;
|
|
const ivec2 wg_size = ivec2(gl_WorkGroupSize);
|
|
const ivec2 isize = wg_size + ksize - 1;
|
|
shared V4 inp[4][isize.y][isize.x];
|
|
void hook() {
|
|
const uvec2 local_xy = gl_LocalInvocationID.xy;
|
|
ivec2 base = ivec2(gl_WorkGroupID) * wg_size;
|
|
for (uint y = local_xy.y; y < isize.y; y += wg_size.y) {
|
|
for (uint x = local_xy.x; x < isize.x; x += wg_size.x) {
|
|
const ivec2 input_base = (base + ivec2(x,y) - offset) * ivec2(2, 2);
|
|
inp[0][y][x] = V4(conv2d_2_mul * texelFetch(conv2d_2_raw, input_base + ivec2(0, 0), 0));
|
|
inp[1][y][x] = V4(conv2d_2_mul * texelFetch(conv2d_2_raw, input_base + ivec2(1, 0), 0));
|
|
inp[2][y][x] = V4(conv2d_2_mul * texelFetch(conv2d_2_raw, input_base + ivec2(0, 1), 0));
|
|
inp[3][y][x] = V4(conv2d_2_mul * texelFetch(conv2d_2_raw, input_base + ivec2(1, 1), 0));
|
|
}
|
|
}
|
|
|
|
barrier();
|
|
V4 result0 = V4(0.0017480821, -0.013207066, -0.011228051, -0.0030521527);
|
|
V4 result1 = V4(-0.0059395116, 0.02963556, 0.0096618235, 0.02099762);
|
|
V4 result2 = V4(0.0037767761, 0.008920694, -0.019380828, 0.009338229);
|
|
V4 result3 = V4(-0.010227559, -0.02027447, 0.025720553, 0.0076178582);
|
|
const V4 inp_0_0_0 = inp[0][local_xy.y + 0][local_xy.x + 0];
|
|
const V4 inp_0_1_0 = inp[0][local_xy.y + 0][local_xy.x + 1];
|
|
const V4 inp_0_2_0 = inp[0][local_xy.y + 0][local_xy.x + 2];
|
|
const V4 inp_0_0_1 = inp[0][local_xy.y + 1][local_xy.x + 0];
|
|
const V4 inp_0_1_1 = inp[0][local_xy.y + 1][local_xy.x + 1];
|
|
const V4 inp_0_2_1 = inp[0][local_xy.y + 1][local_xy.x + 2];
|
|
const V4 inp_0_0_2 = inp[0][local_xy.y + 2][local_xy.x + 0];
|
|
const V4 inp_0_1_2 = inp[0][local_xy.y + 2][local_xy.x + 1];
|
|
const V4 inp_0_2_2 = inp[0][local_xy.y + 2][local_xy.x + 2];
|
|
result0 += M4(0.013742333, -0.31396708, 0.26787847, 0.072659366, 0.11329625, 0.09537591, 0.033292115, -0.017220195, 0.001464037, 0.008237256, -0.11971067, 0.017268868, 0.09323862, 0.09715007, 0.060889743, -3.5066485e-05) * inp_0_0_0;
|
|
result0 += M4(-0.50970757, -0.5762601, 0.20819591, -0.17460805, 0.11446012, -0.15912902, -0.121048756, -0.047206264, -0.08427633, 0.0025400645, -0.33599967, 0.10243455, 0.0020691168, -0.3237394, 0.13666141, -0.13717362) * inp_0_1_0;
|
|
result0 += M4(-0.20074122, -0.041116606, -0.07211998, 0.03917335, 0.005626392, 0.00093446556, 0.04984837, 0.052918322, -0.095496096, 0.02798376, -0.16388294, -0.09691992, -0.009316182, 0.002156926, 0.076914035, -0.0018362058) * inp_0_2_0;
|
|
result0 += M4(-0.04646638, 0.04301821, -0.213666, -0.068135574, 0.049697164, 0.13429289, -0.19906549, 0.32909, 0.07195283, -0.112840325, 0.03280211, 0.049278136, -0.10621306, -0.10757828, 0.021037694, 0.20384435) * inp_0_0_1;
|
|
result0 += M4(-0.077998064, 0.22377425, -0.3773312, -0.088498555, 0.0053320946, -0.058212977, 0.080621, -0.11171977, -0.21263243, 0.011046657, 0.35355136, -0.06552714, 0.09067343, 0.3376479, 0.34425646, -0.43196526) * inp_0_1_1;
|
|
result0 += M4(0.061848424, 0.06426705, -0.030383265, 0.03173913, -0.16706698, 0.010781343, 0.21188083, -0.2265141, -0.01627768, -0.14701325, 0.082368046, -0.05859685, -0.04978609, 0.19023238, -0.03965014, 0.29524118) * inp_0_2_1;
|
|
result0 += M4(-0.0306261, -0.04085749, -0.23889224, -0.050767697, -0.04013987, -0.13312452, 0.19715339, 0.26171264, 0.0013510226, -0.013842771, 0.027681874, -0.03974694, -0.19338731, 0.04583281, -0.046023604, 0.041369166) * inp_0_0_2;
|
|
result0 += M4(-0.10139793, 0.04532661, -0.0342177, -0.009461673, 0.15787649, 0.12668087, -0.13030411, 0.090191655, 0.18736099, 0.04283658, 0.12219039, 0.027710259, 0.024654623, 0.01619275, -0.35501233, -0.051116623) * inp_0_1_2;
|
|
result0 += M4(-0.12528433, -0.026363146, -0.16625732, -0.08191529, 0.12736906, 0.26764688, 0.10838407, -0.11167493, 0.059556503, 0.056794092, 0.06564879, 0.0980191, -0.11926366, -0.20925348, -0.15895343, -0.0804178) * inp_0_2_2;
|
|
result1 += M4(0.28437835, -0.002334728, -0.057142854, -0.13161235, -0.037597105, -0.0081220865, 0.033518326, 0.16829304, 0.048631717, -0.05146907, 0.01925202, -0.08801352, 0.031352233, 0.06670703, 0.0057261786, -0.07080493) * inp_0_0_0;
|
|
result1 += M4(0.20693526, -0.0027875837, -0.024844248, 0.003722317, 0.12421726, -0.071424775, 0.017107086, -0.3732517, 0.03228796, 0.026641833, 0.014621808, 0.003753755, 0.011267291, 0.012458715, 0.021493524, -0.118005805) * inp_0_1_0;
|
|
result1 += M4(-0.034746096, -0.03172964, -0.014664306, -0.012086779, 0.1931416, -0.03533445, 0.01723795, -0.5334721, -0.07718448, -0.033954136, 0.025482401, -0.18039058, 0.0783087, 0.09332574, -0.033299036, 0.15519528) * inp_0_2_0;
|
|
result1 += M4(-0.329095, -0.13896094, -0.17261124, -0.016496744, 0.09107782, -0.01243064, 0.06770809, 0.30866104, -0.0288415, 0.028245337, 0.023785003, 0.03607363, -0.011177616, 0.1798321, -0.04502171, 0.13481946) * inp_0_0_1;
|
|
result1 += M4(-0.34093675, 0.07681657, 0.0021810206, -0.06599798, -0.22647366, -0.16830544, -0.08243902, -0.30915564, 0.04177963, 0.5109498, 0.062553845, -0.034190703, 0.04370762, 0.30578157, -0.073416814, -0.06466905) * inp_0_1_1;
|
|
result1 += M4(0.04676398, -0.028934004, 0.025172938, 0.05854845, -0.12591504, -0.011103147, 0.028433567, -0.010902395, -0.15242963, 0.008639989, -0.018708037, 0.13760155, 0.19433762, 0.0031262431, -0.004067586, -0.39977702) * inp_0_2_1;
|
|
result1 += M4(-0.18624696, -0.072138086, -0.108983666, 0.09679024, 0.082618475, -0.03762175, 0.08016512, 0.21064514, -0.049806964, 0.03683179, 0.12170394, 0.04623598, -0.050608203, -0.04886757, -0.14925289, -0.23452131) * inp_0_0_2;
|
|
result1 += M4(0.06740208, 0.03922189, -0.024193132, -0.023183608, -0.10275763, 0.026307985, 0.06166279, -0.0033073507, 0.103257954, -0.003347554, 0.07212322, 0.056260653, -0.13365982, 0.05859138, -0.16502652, 0.19636813) * inp_0_1_2;
|
|
result1 += M4(-0.10216632, 0.008577115, 0.0034097305, -0.03322281, 0.031984664, 0.03745504, -0.12077493, 0.10455194, 0.072156295, 0.0039019848, -0.08020873, 0.14048693, -0.1509022, -0.0042885765, 0.15331472, -0.262448) * inp_0_2_2;
|
|
result2 += M4(-0.16221876, 0.057853937, -0.02687466, -0.02580708, -0.039356407, 0.026669305, -0.016994065, -0.0018950413, -0.03317146, 0.111001246, -0.08564103, 0.036529347, 0.0069948873, -0.004476116, 0.037428625, -0.032405715) * inp_0_0_0;
|
|
result2 += M4(-0.024450691, -0.18772535, -0.03373765, -0.02393417, 0.08165734, 0.04839238, -0.04392691, 0.060463354, 0.08244979, -0.030043233, -0.17591904, -0.105013795, 0.09993796, -0.07685819, 0.18041363, 0.068260804) * inp_0_1_0;
|
|
result2 += M4(-0.07076884, 0.007816242, -0.04245959, 0.03505174, -0.017449595, 0.10012675, -0.08593324, 0.026215378, -0.12722993, 0.11531461, -0.044889923, -0.07379326, 0.102622665, -0.13516678, 0.027440194, -0.025661053) * inp_0_2_0;
|
|
result2 += M4(-0.2417654, 0.532874, -0.1792782, -0.016072318, 0.12632751, -0.1394212, -0.104871325, -0.2143921, 0.009683901, -0.13191098, -0.072481215, 0.14556748, 0.14193138, -0.084580764, 0.02502378, -0.1356686) * inp_0_0_1;
|
|
result2 += M4(-0.070205666, -0.27398396, 0.07058423, -0.1282003, -0.19091621, -0.00998729, 0.009443734, -0.22386715, 0.4032219, -0.1489337, -0.036794376, 0.010098983, -0.17007609, -0.1622153, 0.5655884, 0.09552588) * inp_0_1_1;
|
|
result2 += M4(-0.05603438, 0.04169446, 0.012783244, 0.05043658, 0.053274922, -0.09159566, 0.062812835, -0.07592551, 0.033113863, -0.05460731, 0.06573498, 0.040921483, -0.07896506, 0.1589944, -0.1188062, 0.107915975) * inp_0_2_1;
|
|
result2 += M4(-0.14555661, -0.02623733, -0.010950724, -0.0029378114, 0.16225207, -0.14775589, 0.043020986, -0.12051408, 0.043429397, 0.05345529, -0.038897146, -0.010908579, -0.021893447, 0.097037636, -0.012969422, 0.13148984) * inp_0_0_2;
|
|
result2 += M4(0.0028671154, -0.13369134, 0.009956911, -0.052492116, -0.029130064, 0.58226454, -0.011425127, -0.013592997, 0.021517135, -0.008963109, 0.02541666, 0.017184312, -0.16393614, -0.110572346, 0.0063540046, -0.31564647) * inp_0_1_2;
|
|
result2 += M4(-0.033714134, -0.06151679, 0.026827423, 0.03188713, 0.042517334, -0.025831262, 0.04598944, -0.08595885, -0.003915388, 0.09385612, -0.038792178, 0.0895971, -0.09253884, -0.021541333, 0.086981736, -0.10932342) * inp_0_2_2;
|
|
result3 += M4(-0.08393953, -0.22956555, 0.011609948, 0.009277218, 0.039219324, 0.04410125, -0.01109496, -0.05332186, 0.03279442, -0.046855666, -0.057603728, -0.006750665, 0.030960107, -0.03515314, 0.05504648, 0.015092096) * inp_0_0_0;
|
|
result3 += M4(0.13919544, 0.1411814, 0.042253297, 0.0037964282, -0.040356793, 0.12674251, -0.015921002, 0.053343415, 0.0379153, 0.20943864, 0.0771062, -0.030640893, 0.011386835, 0.0866107, 0.023407476, 0.058897074) * inp_0_1_0;
|
|
result3 += M4(0.052371733, -0.3991684, -0.07190776, 0.0007726265, -0.07851629, 0.18438594, -0.06764928, -0.5050416, -0.100258075, -0.09006746, -0.015903315, -0.5025817, 0.0031620082, -0.08231843, 0.05990831, 0.0768073) * inp_0_2_0;
|
|
result3 += M4(-0.06402679, -0.6725876, -0.10156491, -0.09864526, 0.02643018, 0.3727122, 0.008444335, -0.08263052, 0.018349787, 0.044862207, 0.0880957, 0.004279047, 0.054860793, -0.048947245, 0.07504498, -0.035847556) * inp_0_0_1;
|
|
result3 += M4(0.11614327, -0.30468974, 0.0444558, -0.028236007, -0.067298174, 0.29058734, -0.14453624, -0.099617906, -0.22497794, -0.18741079, 0.18873163, 0.07804972, -0.05881788, -0.22348139, -0.108909026, -0.04795675) * inp_0_1_1;
|
|
result3 += M4(0.058302045, -0.0505233, 0.0009843127, -0.043714497, -0.21508288, 0.30368665, 0.00013597557, 0.18061589, 0.039270017, 0.005499783, 0.071200795, 0.14625464, 0.09535705, 0.04838145, -0.056300323, -0.32017702) * inp_0_2_1;
|
|
result3 += M4(-0.014569219, -0.14319171, -0.044703428, -0.04628076, 0.036289185, 0.11027814, -0.023670927, 0.027902605, -0.016727578, -0.0030887127, 0.058026824, 0.03686519, -0.028439755, -0.12083449, -0.030791108, -0.016661761) * inp_0_0_2;
|
|
result3 += M4(0.007907899, -0.22081214, 0.021030104, -0.02149127, -0.0752811, 0.2893503, -0.033847194, -0.0119202975, 0.029012432, 0.024946356, 0.07516003, 0.08124122, -0.01716123, 0.031961255, -0.20085806, -0.051188) * inp_0_1_2;
|
|
result3 += M4(-0.0072488342, -0.20686667, 0.015600979, -0.041693658, 0.0012413642, 0.15499248, 0.04859961, 0.0626556, 0.020021334, -0.12558144, -0.017673044, -0.015979042, -0.050852936, -0.008009429, 0.0006964212, -0.031111179) * inp_0_2_2;
|
|
const V4 inp_1_0_0 = inp[1][local_xy.y + 0][local_xy.x + 0];
|
|
const V4 inp_1_1_0 = inp[1][local_xy.y + 0][local_xy.x + 1];
|
|
const V4 inp_1_2_0 = inp[1][local_xy.y + 0][local_xy.x + 2];
|
|
const V4 inp_1_0_1 = inp[1][local_xy.y + 1][local_xy.x + 0];
|
|
const V4 inp_1_1_1 = inp[1][local_xy.y + 1][local_xy.x + 1];
|
|
const V4 inp_1_2_1 = inp[1][local_xy.y + 1][local_xy.x + 2];
|
|
const V4 inp_1_0_2 = inp[1][local_xy.y + 2][local_xy.x + 0];
|
|
const V4 inp_1_1_2 = inp[1][local_xy.y + 2][local_xy.x + 1];
|
|
const V4 inp_1_2_2 = inp[1][local_xy.y + 2][local_xy.x + 2];
|
|
result0 += M4(-0.050834622, 0.13558929, 0.069609776, 0.0836248, 0.16258588, -0.1463772, 0.10431995, 0.071166046, -0.09360978, -0.030868849, -0.089349106, -0.05845485, -0.052687116, -0.019358756, 0.13166283, 0.047010917) * inp_1_0_0;
|
|
result0 += M4(0.1834363, -0.06733706, 0.19551572, 0.065064564, -0.10159622, -0.44030255, 0.17712298, -0.15667069, 0.018854847, 0.27443087, -0.20249093, 0.059142705, -0.0146979755, 0.107905276, 0.3368964, -0.14103155) * inp_1_1_0;
|
|
result0 += M4(0.03400846, -0.020920517, 0.054308336, -0.06452696, -0.10025594, -0.11509778, 0.0049686376, -0.029111736, 0.08407605, 0.14476754, -0.044888042, 0.05088734, 0.010941878, -0.08833402, -0.02657171, -0.074516624) * inp_1_2_0;
|
|
result0 += M4(0.11975449, -0.09650759, 0.063281395, -0.022790521, -0.5575803, -0.09101216, 0.013859138, -0.013325497, 0.03603514, 0.030577073, -0.011164303, -0.18578358, -0.01083901, 0.24841565, -0.13914369, -0.05354659) * inp_1_0_1;
|
|
result0 += M4(-0.26356968, -0.54071015, -0.4976373, -0.043348465, -0.22630574, -0.19664787, -0.32047185, 0.016972514, 0.46792364, -0.034396674, -0.08538839, 0.102404736, -0.15684944, 0.40727407, -0.12092723, -0.2929467) * inp_1_1_1;
|
|
result0 += M4(0.1258391, -0.2243027, -0.13347167, 0.11358319, -0.08303393, -0.14977665, 0.0890761, -0.41722098, 0.15439256, 0.030496476, 0.012219409, 0.19310397, 0.26037794, 0.027789328, 0.08922465, 0.089834034) * inp_1_2_1;
|
|
result0 += M4(0.09877862, 0.05284025, -0.027668688, -0.001652929, -0.32570714, 0.021121345, -0.3423895, 0.079213284, 0.33848628, -0.049122784, 0.04044871, -0.06866454, 0.09534095, -0.0642997, 0.07336694, -0.030166717) * inp_1_0_2;
|
|
result0 += M4(-0.014141151, 0.40663037, -0.03562297, 0.00014230338, -0.31842545, 0.03633864, -0.36178958, -0.042703845, -0.1249328, -0.028372467, 0.40126094, 0.070099086, 0.08341997, -0.15070833, -0.0008065649, -0.09727535) * inp_1_1_2;
|
|
result0 += M4(0.027538782, 0.07063461, 0.086960584, -0.0014761664, 0.14140855, 0.13613716, -0.07045416, -0.18241577, 0.0012894623, -0.16937791, 0.14318486, 0.12240525, -0.109862946, 0.038892377, -0.059320077, 0.018929685) * inp_1_2_2;
|
|
result1 += M4(0.031230053, 0.054024037, 0.015748722, 0.19641443, 0.04458012, 0.0074059726, -0.024924599, -0.22967412, -0.065919496, 8.747364e-05, 0.00095081795, 0.079052135, 0.044639762, 0.034701884, -0.070215575, -0.15965857) * inp_1_0_0;
|
|
result1 += M4(-0.093358554, -0.050979193, -0.06365453, -0.21407472, 0.056124493, -0.031864822, -0.001345247, 0.26996806, -0.15269825, 0.029912075, -0.014851082, 0.05646165, -0.1369963, -0.005340246, -0.00034040198, -0.87173325) * inp_1_1_0;
|
|
result1 += M4(-0.053627335, -0.05403443, 0.0053771413, -0.033885114, 0.07943473, -0.049489744, 0.01970428, -0.019735165, -0.16704302, -0.029533712, -0.012479018, 0.046492025, -0.122892916, 0.016930556, -0.0114216255, -0.15970671) * inp_1_2_0;
|
|
result1 += M4(0.108939044, 0.06477513, 0.102464, 0.047452956, -0.12532373, -0.018778939, -0.1330631, -0.09594444, -0.009711892, 0.08322932, 0.097861886, 0.087248646, 0.03657147, 0.02723903, -0.10135503, -0.2575392) * inp_1_0_1;
|
|
result1 += M4(-0.008707382, 0.16884588, -0.06652616, 0.08253744, -0.034825716, 0.034717374, 0.03668941, -0.4708626, -0.18865623, 0.13615835, 0.017796587, 0.2501019, 0.20887725, 0.096245155, -0.08776682, -0.020289192) * inp_1_1_1;
|
|
result1 += M4(0.0970189, -0.030351888, -0.00047928322, 0.085058145, 0.027607283, -0.007281266, 0.007417884, 0.019575235, -0.029443549, -0.04141028, -0.039709818, 0.21408917, -0.049271036, -0.05297429, 0.040338963, -1.5085386) * inp_1_2_1;
|
|
result1 += M4(-0.119401, 0.03140251, 0.045685366, 0.24838144, -0.043740734, -0.0737762, -0.009221195, -0.14987206, 0.16932084, 0.019038102, 0.16664478, 0.16489927, -0.04896412, 0.05431783, -0.06988649, 0.13275269) * inp_1_0_2;
|
|
result1 += M4(0.150259, -0.15978405, -0.0844281, -0.06703002, -0.24657513, 0.06366323, -0.069003426, -0.2717428, 0.4409574, -0.037973717, -0.029585278, 0.017423902, 0.09426487, -0.04516054, -0.104453884, 0.21409835) * inp_1_1_2;
|
|
result1 += M4(0.009213807, -0.027094958, 0.07477295, 0.020622253, -0.07857662, -0.04560902, -0.024470983, 0.2686737, 0.09560327, -0.022401039, -0.010302134, 0.19773903, 0.03367946, 0.046936627, 0.10942362, -0.04420304) * inp_1_2_2;
|
|
result2 += M4(0.06776484, 0.031195775, 0.050318476, -0.052922856, 0.12900554, 0.018970301, -0.036268327, -0.048933093, -0.05853769, 0.0007942279, -0.029834637, 0.0031533486, 0.07673135, -0.094641976, 0.06459924, -0.056525134) * inp_1_0_0;
|
|
result2 += M4(-0.11017411, 0.098181106, -0.02243249, 0.012068443, 0.005660485, 0.052767463, -0.035950188, 0.06065624, -0.13324538, 0.026223551, -0.021790896, 0.0048657963, -0.04097028, 0.05637621, 0.027283916, -0.15542829) * inp_1_1_0;
|
|
result2 += M4(-0.013024199, 0.015949627, -0.08571484, -0.054483596, 0.033917774, 0.06274524, -0.050583415, 0.06525622, -0.04354908, 0.025803443, -0.011482242, -5.2527274e-05, 0.03598789, 0.09888249, -0.04873406, -0.06561173) * inp_1_2_0;
|
|
result2 += M4(-0.018317174, -0.03853359, 0.09234778, 0.087074734, -0.26256627, 0.19705102, -0.016328111, -0.1725605, -0.028978063, -0.18006021, 0.17666177, 0.05648208, -0.088303916, 0.40847126, 0.074141726, -0.13249528) * inp_1_0_1;
|
|
result2 += M4(0.06658258, 0.03539948, 0.09751061, -0.08332911, -0.30838192, 0.31569394, 0.03325235, -0.52049816, 0.4147554, -0.06113487, 0.10763208, 0.48811552, 0.2149239, -0.02266595, -0.064685725, -0.12511243) * inp_1_1_1;
|
|
result2 += M4(-0.095920496, 0.0039743925, -0.025515337, 0.07142987, -0.08141908, -0.024632594, 0.0048594745, 0.06391914, -0.02217199, 0.13312626, -0.09880823, 0.02288019, 0.022563223, 0.0145039195, 0.013533599, 0.15826075) * inp_1_2_1;
|
|
result2 += M4(0.010565236, 0.0057190843, 0.006396659, -0.038720515, -0.2800984, 0.18256105, -0.0718135, -0.12158327, 0.015065574, -0.124020405, 0.013299919, -0.1870773, 0.104143724, -0.0570007, 0.015877953, -0.06448135) * inp_1_0_2;
|
|
result2 += M4(0.10921295, -0.09829529, -0.13485469, 0.03348664, -0.25693277, 0.12156001, 0.052303057, -0.18438673, 0.03092165, 0.029485954, -0.0062324405, 0.08378771, -0.06272121, -0.22547439, -0.016985578, -0.28853402) * inp_1_1_2;
|
|
result2 += M4(0.08607484, 0.041633364, -0.0058278185, 0.06025964, 0.0077968123, 0.016047232, -0.010274299, -0.008913052, -0.026840642, 0.090148285, -0.023221692, 0.13393198, 0.049763493, 0.060152527, 0.041689675, -0.017965268) * inp_1_2_2;
|
|
result3 += M4(0.008406257, -0.070935205, -0.007202847, 0.06941878, -0.05537707, -0.18120946, -0.03022563, 0.16825119, 0.010797398, -0.12705854, -0.0070367176, 0.0055851717, -0.03206117, 0.2384432, 0.05903697, 0.013868561) * inp_1_0_0;
|
|
result3 += M4(0.02166264, 0.046648763, 0.021499218, 0.065290526, 0.01560753, -0.17776456, -0.025728986, 0.1073188, 0.0889913, -0.011712805, 0.051837422, -0.10854497, 0.08656417, 0.012107013, -0.0058191423, 0.06694211) * inp_1_1_0;
|
|
result3 += M4(-0.030368745, 0.15441988, -0.047315974, -0.0033873403, 0.0558242, 0.05901615, -0.04364993, 0.18534467, -0.00180104, -0.11714469, -0.021285538, -0.14386547, -0.012022105, -0.06351964, -0.020860594, -0.030060872) * inp_1_2_0;
|
|
result3 += M4(0.03706961, -0.134447, 0.107362546, -0.01044757, -0.014409411, 0.1598686, -0.013746884, -0.09035731, -0.0048773824, -0.1106227, 0.11328745, 0.14612412, -0.019757058, 0.2840584, -0.06905247, 0.044269875) * inp_1_0_1;
|
|
result3 += M4(0.00435332, 0.22003464, 0.054780096, 0.02049974, 0.08106458, -0.23199026, 0.13086085, -0.33637485, 0.042000774, -0.148645, 0.049710836, 0.11972893, 0.12264688, 0.24849172, 0.1564239, 0.13550651) * inp_1_1_1;
|
|
result3 += M4(0.07536812, 0.10604653, -0.036423583, -0.26073584, 0.0397844, -0.045089003, -0.019320367, 0.19621585, 0.10164428, -0.009166188, -0.07304803, -0.015371115, 0.04261303, 0.11308855, -0.032034043, -0.20426352) * inp_1_2_1;
|
|
result3 += M4(-0.015813572, 0.0056332136, -0.012351634, 0.08331286, -0.007511319, -0.6502108, -0.04515211, -0.09339431, 0.027167298, 0.23520617, -0.00018588088, 0.040712796, 0.057343673, 0.114973925, 0.02719836, 0.021863716) * inp_1_0_2;
|
|
result3 += M4(0.037174106, 0.12065753, -0.07257851, 0.090148546, -0.058854315, 0.026826117, 0.030978559, -0.013551632, 0.049525514, 0.13865441, 0.016414402, 0.061311614, -0.014844421, 0.085599475, -0.05553838, -0.011335998) * inp_1_1_2;
|
|
result3 += M4(-0.024182256, -0.03277256, -0.023684012, -0.06914193, 0.03953002, 0.00093374, -0.03862413, -0.0043832413, 0.086972594, 0.017723167, -0.02329716, 0.0585194, -0.028904395, -0.030326229, 0.037591804, -0.06744188) * inp_1_2_2;
|
|
const V4 inp_2_0_0 = inp[2][local_xy.y + 0][local_xy.x + 0];
|
|
const V4 inp_2_1_0 = inp[2][local_xy.y + 0][local_xy.x + 1];
|
|
const V4 inp_2_2_0 = inp[2][local_xy.y + 0][local_xy.x + 2];
|
|
const V4 inp_2_0_1 = inp[2][local_xy.y + 1][local_xy.x + 0];
|
|
const V4 inp_2_1_1 = inp[2][local_xy.y + 1][local_xy.x + 1];
|
|
const V4 inp_2_2_1 = inp[2][local_xy.y + 1][local_xy.x + 2];
|
|
const V4 inp_2_0_2 = inp[2][local_xy.y + 2][local_xy.x + 0];
|
|
const V4 inp_2_1_2 = inp[2][local_xy.y + 2][local_xy.x + 1];
|
|
const V4 inp_2_2_2 = inp[2][local_xy.y + 2][local_xy.x + 2];
|
|
result0 += M4(0.032315515, -0.122047596, 0.059832327, -0.03277833, 0.020007847, 0.13211598, -0.097809285, 0.122699976, 0.13954099, 0.071660824, 0.13365898, 0.035498723, -0.32890487, -0.23000252, 0.18859388, 0.2246585) * inp_2_0_0;
|
|
result0 += M4(-0.09090872, -0.15547122, 0.14182991, -0.03447231, -0.08700012, -0.14764556, 0.06351315, -0.031944465, -0.24570633, -0.10371965, -0.6612911, -0.010604841, 0.30677596, -0.122792654, -0.3476362, -0.3333027) * inp_2_1_0;
|
|
result0 += M4(-0.101048835, -0.28088036, 0.064946696, -0.02987513, -0.053139113, -0.0017336245, 0.021613726, -0.09509941, -0.24492142, -0.027400596, -0.03304482, -0.2676795, -0.17882577, -0.0015913589, 0.07456976, -0.10931344) * inp_2_2_0;
|
|
result0 += M4(0.018682102, 0.03897567, -0.0043925717, -0.12452545, -0.2039322, 0.07347261, -0.077316, 0.11665575, 0.12262391, 0.012757696, -0.033062913, -0.04495245, 0.006626678, 0.18125992, 0.040581536, 0.019702636) * inp_2_0_1;
|
|
result0 += M4(-0.059305314, 0.078166485, -0.19206336, 0.107413165, -0.35976288, 0.20749648, -0.10897632, 0.048565254, -0.28695294, 0.13703996, -0.3667627, 0.13787156, 0.011949593, -0.08204921, -0.20876266, -0.036897074) * inp_2_1_1;
|
|
result0 += M4(-0.0057640765, -0.25577998, -0.27986136, -0.1747803, 0.07495716, 0.16943099, 0.084771186, -0.1751805, 0.048841394, 0.06562859, 0.021907087, -0.24557696, 0.24831182, -0.1436971, 0.56543666, 0.032267977) * inp_2_2_1;
|
|
result0 += M4(0.067160636, -0.010749014, -0.15154673, -0.022950703, 0.11410057, 0.045740303, 0.23280439, 0.10311792, 0.10404408, -0.038766913, 0.101108626, 0.014880373, -0.08169517, 0.03724728, -0.004071181, -0.02176287) * inp_2_0_2;
|
|
result0 += M4(-0.1724258, 0.043157067, 0.023351375, -0.10025255, -0.17951916, 0.047159195, 0.022547374, -0.098868735, 0.07145314, -0.0070737815, 0.035666347, -0.011632881, -0.098744564, 0.014386245, 0.15310442, 0.1590473) * inp_2_1_2;
|
|
result0 += M4(0.03659194, 0.048870873, -0.06802044, -0.06394387, -0.09571226, -0.04181391, -0.02092777, -0.08023691, 0.0043218927, -0.07452731, 0.02718202, -0.032503504, 0.066612795, 0.25409463, -0.29573214, -0.100742854) * inp_2_2_2;
|
|
result1 += M4(-0.06804398, 0.010259403, -0.05606417, -0.45905623, 0.03798285, -0.011380956, -0.0043548928, 0.0049317023, 0.064162396, -0.16467147, -0.13547602, -0.4170852, -0.0946343, 0.12367309, 0.0036930486, 0.2594846) * inp_2_0_0;
|
|
result1 += M4(0.08167365, 0.0076501844, -0.040654324, 0.076445766, 0.061445758, -0.07112302, 0.020587264, -0.094360456, 0.14121495, 0.039353162, 0.07584208, -0.24924478, 0.13394992, -0.15720025, 0.020550527, -0.4719891) * inp_2_1_0;
|
|
result1 += M4(0.24099086, 0.004460851, 0.033801142, 0.028916445, 0.009813314, 0.02370655, 0.019507933, 0.0723726, 0.11482338, -0.0041218535, 0.0060363747, 0.18329585, -0.03605137, -0.016704429, -0.03968744, 0.49401382) * inp_2_2_0;
|
|
result1 += M4(-0.03777468, 0.07079181, -0.086785324, -0.19576482, 0.016638516, 0.06932382, 0.02143979, -0.18267234, -0.25579676, -0.11076712, -0.2595194, 0.11735666, -0.086780265, -0.04006134, 0.049523488, -0.3053417) * inp_2_0_1;
|
|
result1 += M4(-0.19085847, -0.0018226462, -0.09526131, -0.030490309, -0.09358571, -0.1312608, -0.025299216, -0.4467069, -0.40401617, -0.11442126, -0.02932193, -0.55489945, -0.06258919, 0.17412789, -0.13525261, 0.36862898) * inp_2_1_1;
|
|
result1 += M4(-0.1783346, 0.020232402, 0.062352747, -0.2333071, 0.20440717, 0.036881648, 0.011426944, -0.161388, -0.091673456, -0.07425262, -0.007313462, 0.18958011, -0.05012611, 0.14050211, 0.023201272, -0.18373603) * inp_2_2_1;
|
|
result1 += M4(-0.041670453, 0.04761505, -0.07205055, -0.16726835, 0.16217317, 0.031830892, -0.045013987, -0.13016471, 0.038423758, 0.018830528, -0.1379043, 0.07583382, -0.08549732, 0.020179233, -0.05180274, -0.08676527) * inp_2_0_2;
|
|
result1 += M4(-0.1144892, -0.065310515, -0.07988956, -0.28776947, -0.022516059, -0.10576069, -0.09915789, -0.029305372, -0.018182946, 0.051951002, -0.031691708, 0.109498486, -0.010285694, -0.058526095, -0.015336989, 0.28561556) * inp_2_1_2;
|
|
result1 += M4(-0.114991814, -0.0060320315, 0.032515556, -0.088495135, 0.0023830642, -0.015636275, 0.10081914, -0.18555105, 0.0049523246, -0.01522886, 0.031541333, 0.14967763, -0.13520668, -0.00976766, 0.07235242, -0.13526863) * inp_2_2_2;
|
|
result2 += M4(-0.006102216, -0.029172745, 0.09222103, -0.028744753, 0.036760766, 0.02058598, -0.051013798, 0.07528594, -0.19947468, 0.2854517, -0.22350396, 0.07103823, 0.15858191, -0.16111182, 0.10137347, 0.0031016655) * inp_2_0_0;
|
|
result2 += M4(-0.051077593, 0.018110406, -0.085154414, -0.11970748, -0.06248552, 0.090771616, -0.11020889, 0.016226625, -0.30493045, 0.34879982, -0.08281113, -0.2973166, -0.08395863, 0.14489202, 0.10018392, 0.2918778) * inp_2_1_0;
|
|
result2 += M4(0.100762405, -0.14077328, 0.0068873675, -0.012728327, 0.038007546, -0.010162205, 0.016245509, -0.045379486, -0.015239795, 0.044774063, -0.0041919495, -0.039544307, 0.10751771, -0.19274761, -0.13351429, -0.21966757) * inp_2_2_0;
|
|
result2 += M4(0.0014801857, 0.090907656, 0.09321273, 0.19191459, 0.11852471, 0.024387602, 0.13766855, 0.058077153, 0.13642266, 0.10572148, 0.03806547, 0.115434796, -0.13728566, 0.015369699, -0.029896675, 0.1222438) * inp_2_0_1;
|
|
result2 += M4(-0.23657471, 0.01041624, -0.104025744, -0.08028565, -0.27475524, 0.014432225, -0.190358, -0.1933413, 0.06009237, 0.35213372, -0.18603286, 0.15629223, -0.075596966, -0.045948125, -0.069723725, -0.2296189) * inp_2_1_1;
|
|
result2 += M4(-0.114092045, -0.21760537, 0.06743137, -0.16528656, 0.038565528, -0.047547337, 0.016258612, 0.04621514, 0.06486923, 0.21303502, -0.056794677, 0.07416883, -0.27225333, -0.106772475, 0.116170004, 0.03875536) * inp_2_2_1;
|
|
result2 += M4(-0.12807086, -0.050668288, 0.004014784, -0.12517461, 0.05560518, -0.031908523, 0.02181089, 0.054382585, -0.031665996, 0.052780844, 0.0075956094, -0.05371376, 0.0088007385, 0.05524695, 0.049430843, 0.08108161) * inp_2_0_2;
|
|
result2 += M4(0.051009595, -0.1502449, -0.045252007, 0.06836838, 0.14730917, 0.036979046, -0.083785385, 0.06063368, -0.028182903, 0.07504901, 0.055856213, 0.04504785, 0.18590024, -0.045080796, -0.036583446, 0.012569273) * inp_2_1_2;
|
|
result2 += M4(0.09400213, -0.09781128, -0.028674224, -0.04409969, 0.03257724, -0.060171966, 0.018233486, 0.059538074, -0.05567502, 0.027766956, -0.004594858, 0.013047014, 0.118355066, 0.10237973, 0.017876882, -0.0016306012) * inp_2_2_2;
|
|
result3 += M4(-0.060102426, 0.055194635, -0.0050862706, 0.031027785, 0.015334437, 0.05874696, -0.014036861, -0.09948192, -0.022876646, 0.25203875, -0.16902465, -0.1250054, 0.028966324, 0.19289416, 0.06767192, -0.027996095) * inp_2_0_0;
|
|
result3 += M4(-0.073572166, -0.2641532, 0.0045350282, -0.0032056102, -0.018192617, -0.053080373, -0.077567, -0.120395035, 0.05026236, -0.2493179, -0.047281366, -0.4302733, -0.04435675, -0.08884186, -0.081195176, 0.05236053) * inp_2_1_0;
|
|
result3 += M4(0.15846111, 0.033074684, -0.002145444, 0.25345713, -0.016138427, 0.060776513, 0.020433042, 0.054068796, 0.07300529, 0.1499025, 0.01061005, 0.26232153, -0.086272724, 0.19294065, 0.07322928, -0.13114798) * inp_2_2_0;
|
|
result3 += M4(-0.026156582, -0.09473598, 0.01886588, 0.03316904, 0.046567984, 0.05356517, 0.112859644, 0.045939382, -0.014500696, 0.39915556, -0.044323806, 0.08406331, -0.02588856, 0.28643692, -0.054337215, -0.044069048) * inp_2_0_1;
|
|
result3 += M4(0.119249105, -0.06426904, 0.049732402, -0.24069054, 0.053948652, 0.18656449, -0.13553333, -0.09931661, 0.05298581, 0.06414778, -0.01429046, 0.0886309, -0.0169578, 0.19047803, 0.18266238, 0.0035147448) * inp_2_1_1;
|
|
result3 += M4(-0.0056939553, -0.21608217, 0.02736181, -0.036639106, 0.0103363665, 0.029477157, 0.016544899, -0.067491375, 0.07800084, -0.12613757, -0.0043560416, 0.0653098, 0.03597968, 0.0737496, -0.108304635, -0.15834598) * inp_2_2_1;
|
|
result3 += M4(-0.024152312, -0.03797177, 0.024080073, -0.0372, 0.025841238, 0.1450337, 0.003514035, 0.01807124, -0.014084937, 0.20126233, -0.07188575, 0.009998302, 0.014528717, -0.0146696335, 0.06857181, 0.032898333) * inp_2_0_2;
|
|
result3 += M4(-0.0058606123, -0.18987623, 0.0017278753, 0.010762768, -0.06410252, 0.05551138, -0.044577245, -0.07427778, 0.013683679, 0.17193417, 0.08636119, 0.052021578, -0.04665759, 0.009889614, -0.13780324, -0.06077317) * inp_2_1_2;
|
|
result3 += M4(-0.024604267, 0.0576184, -0.0052168164, -0.013863482, 0.0020399143, 0.14541508, 0.021378214, 0.034496024, 0.042570945, 0.10022286, -0.02205846, 0.14285377, 0.033961337, 0.20753984, 0.07047152, -0.2780571) * inp_2_2_2;
|
|
const V4 inp_3_0_0 = inp[3][local_xy.y + 0][local_xy.x + 0];
|
|
const V4 inp_3_1_0 = inp[3][local_xy.y + 0][local_xy.x + 1];
|
|
const V4 inp_3_2_0 = inp[3][local_xy.y + 0][local_xy.x + 2];
|
|
const V4 inp_3_0_1 = inp[3][local_xy.y + 1][local_xy.x + 0];
|
|
const V4 inp_3_1_1 = inp[3][local_xy.y + 1][local_xy.x + 1];
|
|
const V4 inp_3_2_1 = inp[3][local_xy.y + 1][local_xy.x + 2];
|
|
const V4 inp_3_0_2 = inp[3][local_xy.y + 2][local_xy.x + 0];
|
|
const V4 inp_3_1_2 = inp[3][local_xy.y + 2][local_xy.x + 1];
|
|
const V4 inp_3_2_2 = inp[3][local_xy.y + 2][local_xy.x + 2];
|
|
result0 += M4(0.027015617, -0.7996558, 0.18498614, 0.104103, 0.38189876, -0.11344139, -0.0007145073, 0.3342583, 0.031134395, 0.089367434, 0.037405856, 0.015087987, -0.14192465, -0.22596288, 0.029936088, 0.0421774) * inp_3_0_0;
|
|
result0 += M4(0.18507116, -0.13057783, 0.18906131, -0.04625072, 0.4304938, 0.20377009, 0.15997867, -0.01429329, 0.038801454, 0.08704504, 0.21694064, -0.11549993, -0.0045122113, -0.3900684, -3.388382e-05, 0.15001586) * inp_3_1_0;
|
|
result0 += M4(0.060679402, -0.16186985, 0.061862193, -0.00022022054, 0.26157132, -0.043998968, 0.1945971, -0.7142528, 0.09871455, 0.022045933, -0.024860913, 0.004302735, 0.14181943, -0.18637265, -0.00040650467, 0.11827929) * inp_3_2_0;
|
|
result0 += M4(-0.5922311, 0.22684027, -0.11227259, -0.21391045, 0.032060653, -0.053092804, 0.31259525, -0.07501715, -0.10745047, -0.0038042206, -0.10862252, -0.004506264, 0.20306757, 0.25435647, 0.16563417, -0.17325446) * inp_3_0_1;
|
|
result0 += M4(-0.35888693, 0.16700989, 0.1668772, 0.17332852, -0.05505689, -0.11818606, -0.06606742, 0.110823445, 0.03806446, -0.113237, -0.31512976, 0.095162764, -0.46377453, -0.0036337571, -0.16946682, 0.1773919) * inp_3_1_1;
|
|
result0 += M4(-0.12228825, -0.039548386, 0.011141476, -0.1074657, 0.019009113, 0.08645584, 0.2815848, -0.06357786, 0.015157962, 0.09658943, 0.053875048, -0.033454716, -0.0061273533, 0.062192447, 0.07787697, 6.049956e-05) * inp_3_2_1;
|
|
result0 += M4(-0.15141164, -0.5041164, -0.9490496, 0.19432089, 0.02835402, -0.04002404, 0.08439427, -0.010552297, -0.07459201, 0.021660993, 0.032625314, 0.010420403, -0.07382987, -0.07785111, -0.10839164, -0.052428048) * inp_3_0_2;
|
|
result0 += M4(0.05921418, -0.13308622, -0.37330568, 0.17807813, 0.07824402, 0.03588622, -0.12656505, 0.09064255, -0.06055568, -0.024535418, -0.043053485, 0.00056338724, 0.06676607, -0.27000788, -0.29120812, 0.11573923) * inp_3_1_2;
|
|
result0 += M4(-0.028129566, 0.14413905, -0.16586044, 0.017026152, 0.076706395, -0.05394849, 0.09306623, 0.019118821, 0.007054275, 0.08550639, 0.13275635, -0.025670476, -0.0022794658, 0.10155379, -0.02085458, 0.034746658) * inp_3_2_2;
|
|
result1 += M4(-0.12369097, -0.022380395, -0.19092223, -1.0336181, 0.25007322, -0.1082927, -0.1355875, -2.3267636, -0.060449358, 0.047252428, -0.01162158, -0.005936594, 0.11749812, -0.09796796, 0.028199805, 0.1737005) * inp_3_0_0;
|
|
result1 += M4(0.21402386, 0.013675318, -0.030755024, -0.0027503022, -0.2956065, -0.10900944, -0.47659904, -1.3750017, -0.22972079, 0.06601595, 0.013311301, 0.12648493, 0.24865986, -0.0054492513, 0.015514159, -0.035419848) * inp_3_1_0;
|
|
result1 += M4(0.04552766, 0.037142105, 0.0035919624, 0.08431789, -0.3844037, -0.28543824, -0.23054305, -0.94371766, -0.06714347, 0.07686601, -0.0064314655, 0.087116875, 0.05579566, -0.070394486, 0.0021310588, 0.045538615) * inp_3_2_0;
|
|
result1 += M4(-0.37080222, -0.18180142, -0.24774034, -1.7342087, -0.06769111, 0.043335035, 0.35196152, -0.0039335964, 0.044669848, -0.03951889, -0.025922338, -0.048581965, -0.10638707, -0.15603809, 0.017693024, -0.15826935) * inp_3_0_1;
|
|
result1 += M4(-0.15859805, 0.099043906, -0.051725943, -0.2880917, -0.18174146, 0.0665793, 0.54645437, -0.033481486, 0.40742868, -0.3033671, 0.122001864, -0.08608495, -0.2042584, 0.05535326, 0.12946245, -0.4357408) * inp_3_1_1;
|
|
result1 += M4(0.12178875, 0.025560115, 0.028441695, -0.22325999, 0.061120175, -0.025200972, 0.14321159, 0.19563259, -0.022041185, 0.04249189, -0.07945956, 0.11619728, -0.0501297, -0.054087505, -0.011183701, 0.1266374) * inp_3_2_1;
|
|
result1 += M4(-0.6316759, -0.0918079, -0.3422181, -0.2808048, 0.03502315, -0.07254006, 0.021251619, 0.1637716, 0.0266244, 0.014856987, -0.073231086, -0.057996526, -0.07746048, 0.007990325, -0.021293677, 0.14186025) * inp_3_0_2;
|
|
result1 += M4(-0.10816348, -0.19827484, -0.03404185, 0.17547783, 0.071025066, -0.013042217, 0.043203786, 0.06272457, -0.22442414, 0.1362349, 0.14682624, -0.030669976, -0.3585226, 0.027426068, 0.15036522, -0.1341319) * inp_3_1_2;
|
|
result1 += M4(-0.14291224, -0.049865674, 0.025149126, -0.1153935, 0.057290137, -0.05218865, -0.0013165397, 0.027519017, 0.134451, -0.0015477352, -0.062499885, 0.0006934406, -0.030470002, 0.010570013, -0.013825029, -0.04198862) * inp_3_2_2;
|
|
result2 += M4(-0.10711042, 0.032619312, -0.024730971, -0.06444852, 0.4182004, 0.54544973, -0.239182, -1.0138891, 0.027894305, -0.08686301, 0.077421546, -0.046399735, 0.063517295, 0.01817226, -0.09217656, 0.04170022) * inp_3_0_0;
|
|
result2 += M4(0.22543283, -0.109545745, 0.025299635, -0.025161963, 0.7423745, -0.216506, 0.0028229007, -0.73982394, -0.008515469, -0.061802708, 0.32045597, -0.08967708, 0.054728646, 0.01958016, -0.10577295, 0.19468236) * inp_3_1_0;
|
|
result2 += M4(0.05417426, -0.06550254, 0.008215511, 0.08211287, 0.49578804, 0.02951148, -0.051892225, -0.23949671, 0.07806937, -0.035536528, 0.13598205, 0.08785305, 0.079127, 0.070848905, -0.03802217, 0.051596798) * inp_3_2_0;
|
|
result2 += M4(-0.48896796, 0.68660605, -0.16183376, -0.86002463, 0.033824537, -0.08554051, 0.003849123, -0.08059784, -0.029788414, 0.08485035, 0.03887551, -0.05369966, -0.13131899, 0.22066273, -0.10666296, 0.05315469) * inp_3_0_1;
|
|
result2 += M4(-0.167758, 0.29998848, -0.034977388, -0.09690403, 0.13826017, 0.24243373, 0.100414276, 0.17001465, -0.2882979, -0.3450798, 0.18749917, -0.13422024, -0.17123859, 0.09044185, 0.052391898, -0.36916175) * inp_3_1_1;
|
|
result2 += M4(-0.16430867, 0.0064501395, 0.06647598, -0.023820901, 0.08984767, 0.013764179, -0.035873856, 0.032932702, 0.09450842, 0.071256734, -0.040001966, -0.04960062, -0.007343706, 0.049707595, -0.016152225, -0.0027885356) * inp_3_2_1;
|
|
result2 += M4(-0.73847115, 0.5513414, 0.030851787, -0.2581955, -0.0024027135, -0.0036326675, -0.030248651, -0.004381591, -0.02528615, -0.019515892, 0.029741172, -0.003951281, -0.019615853, -0.03865882, 0.003828569, -0.0031004255) * inp_3_0_2;
|
|
result2 += M4(-0.17671159, -0.081465006, -0.07955902, -0.04966974, -0.06376834, -0.023979286, -0.015434015, -0.041929364, 0.120791264, 0.036787927, 0.04004447, 0.09695027, -0.08316816, 0.09391913, 0.049567543, 0.027576985) * inp_3_1_2;
|
|
result2 += M4(0.10369331, -0.024534395, -0.052448582, -0.09192535, 0.027912363, 0.09893594, 0.028118687, 0.06701703, 0.014928166, 0.017885402, -0.03428379, -0.025608117, 0.031982064, 0.003519881, -0.0178403, -0.009937768) * inp_3_2_2;
|
|
result3 += M4(-0.027271697, 0.42789787, -0.049469214, 0.022823947, -0.17875957, -0.9508793, -0.059357125, 0.17309445, -0.008523562, 0.106697135, 0.06968476, 0.011999886, -0.052791767, 0.063847676, -0.10121569, 0.035633165) * inp_3_0_0;
|
|
result3 += M4(0.14716758, -0.07930711, -0.017321438, 0.13599691, 0.07637399, 0.04328131, -0.07516573, 0.43305922, -0.08643355, -0.04845797, 0.016906133, 0.030227428, -0.0071497494, -0.11972896, -0.05875496, 0.10012184) * inp_3_1_0;
|
|
result3 += M4(-0.04849445, 0.020217191, -0.046229035, 0.18088053, -0.22129643, -0.12000977, -0.12619595, 0.1956666, -0.008121098, -0.14342684, 0.10044852, 0.1925586, -0.013161276, 0.0966338, -0.056078617, 0.09678964) * inp_3_2_0;
|
|
result3 += M4(0.08718787, -0.40120795, 0.14511861, -0.74981576, 0.08892279, -0.25088233, 0.1971236, -0.008586428, -0.036342554, -0.003950924, -0.10522428, -0.008721086, -0.051654376, 0.15270655, -0.11333946, -0.1062836) * inp_3_0_1;
|
|
result3 += M4(0.3832336, -0.069977686, -0.014638319, 0.22976342, 0.4424893, -0.68896824, 0.50525415, 0.1229973, 0.2716183, 0.22153828, 0.42152727, -0.04672893, -0.050966874, -0.5484808, 0.05782651, -0.0730039) * inp_3_1_1;
|
|
result3 += M4(0.064079545, 0.11863564, 0.11544964, -0.19748935, 0.32926935, -0.8571917, -0.07095916, 0.053836744, -0.12118399, -0.16282205, -0.03557967, 0.015947886, 0.028216584, 0.09699497, -0.0085840095, -0.0819769) * inp_3_2_1;
|
|
result3 += M4(-0.07120117, -0.63295966, 0.043656614, -0.45833355, 0.024976943, 0.16151588, -0.07555022, -0.009623016, 0.010849734, -0.1750549, -0.021309523, -0.009259946, 7.156295e-06, 0.15191126, -0.018279374, -0.04998776) * inp_3_0_2;
|
|
result3 += M4(0.17812641, 0.07574793, -0.13986841, -0.04758223, 0.039438676, -0.04838939, 0.05826339, 0.058625273, 0.010918394, -0.008298822, 0.13868612, -0.011732604, -0.024179596, 0.02025137, 0.0014702389, -0.072934866) * inp_3_1_2;
|
|
result3 += M4(-0.044331793, -0.021380348, -0.065975286, -0.00053424994, -0.010777215, 0.062775485, -0.026242074, -0.017773766, -0.034250915, 0.114954434, -0.020100623, 0.04363309, -0.029236004, -0.14147997, 0.0040791784, -0.043546647) * inp_3_2_2;
|
|
const ivec2 output_base = ivec2(gl_GlobalInvocationID) * ivec2(2, 2);
|
|
imageStore(out_image, output_base + ivec2(0, 0), max(result0, V4(0.0)));
|
|
imageStore(out_image, output_base + ivec2(1, 0), max(result1, V4(0.0)));
|
|
imageStore(out_image, output_base + ivec2(0, 1), max(result2, V4(0.0)));
|
|
imageStore(out_image, output_base + ivec2(1, 1), max(result3, V4(0.0)));
|
|
}
|
|
|
|
//!DESC ArtCNN C4F16 DS (Conv2D-4-ReLU)
|
|
//!COMPUTE 24 32 12 16
|
|
//!HOOK LUMA
|
|
//!BIND conv2d_3
|
|
//!SAVE conv2d_4
|
|
//!WIDTH LUMA.w 2.0 *
|
|
//!HEIGHT LUMA.h 2.0 *
|
|
//!COMPONENTS 4
|
|
//!WHEN OUTPUT.w LUMA.w / 1.3 > OUTPUT.h LUMA.h / 1.3 > *
|
|
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
|
|
#ifdef GL_EXT_shader_explicit_arithmetic_types_float16
|
|
# define V4 f16vec4
|
|
# define M4 f16mat4
|
|
# define F float16_t
|
|
#else
|
|
# define V4 vec4
|
|
# define M4 mat4
|
|
# define F float
|
|
#endif
|
|
|
|
const ivec2 ksize = ivec2(3, 3);
|
|
const ivec2 offset = ksize / 2;
|
|
const ivec2 wg_size = ivec2(gl_WorkGroupSize);
|
|
const ivec2 isize = wg_size + ksize - 1;
|
|
shared V4 inp[4][isize.y][isize.x];
|
|
void hook() {
|
|
const uvec2 local_xy = gl_LocalInvocationID.xy;
|
|
ivec2 base = ivec2(gl_WorkGroupID) * wg_size;
|
|
for (uint y = local_xy.y; y < isize.y; y += wg_size.y) {
|
|
for (uint x = local_xy.x; x < isize.x; x += wg_size.x) {
|
|
const ivec2 input_base = (base + ivec2(x,y) - offset) * ivec2(2, 2);
|
|
inp[0][y][x] = V4(conv2d_3_mul * texelFetch(conv2d_3_raw, input_base + ivec2(0, 0), 0));
|
|
inp[1][y][x] = V4(conv2d_3_mul * texelFetch(conv2d_3_raw, input_base + ivec2(1, 0), 0));
|
|
inp[2][y][x] = V4(conv2d_3_mul * texelFetch(conv2d_3_raw, input_base + ivec2(0, 1), 0));
|
|
inp[3][y][x] = V4(conv2d_3_mul * texelFetch(conv2d_3_raw, input_base + ivec2(1, 1), 0));
|
|
}
|
|
}
|
|
|
|
barrier();
|
|
V4 result0 = V4(-0.006771086, -0.0051601403, 0.020885777, -0.024551088);
|
|
V4 result1 = V4(0.004770867, 0.0010445794, -0.0035112808, -0.0010415314);
|
|
V4 result2 = V4(-0.0118335625, -0.08267318, -0.0066933953, 0.00024997361);
|
|
V4 result3 = V4(-3.5229634e-05, 0.01858382, -0.012685065, -0.008151426);
|
|
const V4 inp_0_0_0 = inp[0][local_xy.y + 0][local_xy.x + 0];
|
|
const V4 inp_0_1_0 = inp[0][local_xy.y + 0][local_xy.x + 1];
|
|
const V4 inp_0_2_0 = inp[0][local_xy.y + 0][local_xy.x + 2];
|
|
const V4 inp_0_0_1 = inp[0][local_xy.y + 1][local_xy.x + 0];
|
|
const V4 inp_0_1_1 = inp[0][local_xy.y + 1][local_xy.x + 1];
|
|
const V4 inp_0_2_1 = inp[0][local_xy.y + 1][local_xy.x + 2];
|
|
const V4 inp_0_0_2 = inp[0][local_xy.y + 2][local_xy.x + 0];
|
|
const V4 inp_0_1_2 = inp[0][local_xy.y + 2][local_xy.x + 1];
|
|
const V4 inp_0_2_2 = inp[0][local_xy.y + 2][local_xy.x + 2];
|
|
result0 += M4(0.0033632028, 0.04721141, 0.03007575, -0.052616846, -0.008006311, 0.065169215, 0.016507383, -0.0044050934, -0.018410988, 0.07643572, 0.03065518, -0.08753873, 0.015026827, 0.16328007, 0.004729976, -0.014612305) * inp_0_0_0;
|
|
result0 += M4(-0.0510358, -0.040047985, -0.15714039, 0.18750839, 0.029424198, 0.063464284, 0.04334012, -0.051018972, 0.044537134, 0.016376844, 0.0021454738, 0.07393311, -0.003398863, -0.07367582, 0.03664616, -0.0471132) * inp_0_1_0;
|
|
result0 += M4(0.033857785, -0.044069454, 0.0029815182, 0.07231605, -0.012478533, -0.014247982, -0.011120171, 0.0064618033, -0.024211371, -0.0057577016, 0.009435734, -0.00255496, 0.00047270884, 0.049642805, 0.010110964, -0.023349527) * inp_0_2_0;
|
|
result0 += M4(0.024893802, -0.06915593, -0.09700506, 0.109927185, -0.022845587, 0.033251625, -0.0370686, 0.04165276, -0.023201028, -0.09822818, -0.0098308325, 0.03426258, 0.038701996, 0.36037982, 0.030453933, -0.045780078) * inp_0_0_1;
|
|
result0 += M4(-0.0022862866, 0.099955656, 0.34152862, -0.3465966, -0.05589964, -0.05136851, -0.068635, 0.09597882, 0.07702414, 0.021747537, 0.12726207, -0.1702579, 0.025863616, -0.06331183, -0.17451076, 0.18619111) * inp_0_1_1;
|
|
result0 += M4(-0.010405649, 0.018087514, -0.15909216, 0.14597063, 0.033948172, 0.013681024, -0.00019566377, 0.013083054, -0.0172498, 0.0056358925, -0.00644117, 0.009749327, -0.010631505, 0.021705946, 0.09615771, -0.044029847) * inp_0_2_1;
|
|
result0 += M4(-0.024415338, -0.18363151, -0.01793161, 0.023585768, -0.075834185, -0.0050683604, 0.018748691, -0.008734701, -0.011380264, -0.002517223, -0.007086535, 0.0369585, -0.013384224, 0.19569841, 0.02990988, 0.0053327563) * inp_0_0_2;
|
|
result0 += M4(-0.040272474, -0.0018999686, -0.036718227, 0.05683352, 0.14068884, -0.059090048, 0.0601508, -0.039334733, -0.14742632, -0.02577353, -0.058017954, 0.057766154, -0.071603104, -0.13627124, -0.042406775, 0.042193558) * inp_0_1_2;
|
|
result0 += M4(-0.03953849, -0.012823383, 0.012647818, -0.07757381, -0.06622981, 0.04571122, 0.02738234, -0.013842928, 0.029961985, 0.015561659, -0.015383494, 0.021000577, 0.057185896, 0.037138227, 0.02095205, -0.047683973) * inp_0_2_2;
|
|
result1 += M4(0.0057615293, -0.045122698, -0.01178126, -0.0726996, -0.0086135715, 0.065157086, 0.0034225588, 0.07258049, -0.03279438, 0.060885813, -0.00041689439, 0.08573385, -0.029732056, -0.031844694, 0.036845736, -0.10094928) * inp_0_0_0;
|
|
result1 += M4(-0.12540935, -0.059108384, 0.003970208, -0.12744214, 0.06369681, 0.028249402, 0.007363609, 0.122652315, 0.0926682, 0.027998177, -0.03176788, -0.16297963, 0.030422911, 0.07676895, -0.0079176305, 0.057718776) * inp_0_1_0;
|
|
result1 += M4(0.030936485, 0.031883776, -0.007541764, 0.009011137, -0.0032886202, 0.036986377, 0.0038485127, 0.07384179, 0.015596031, -0.028106213, -0.0056048124, 0.03516302, -0.03694747, -0.029075751, 0.011198101, -0.039395273) * inp_0_2_0;
|
|
result1 += M4(0.09053454, 0.016422741, -0.03282414, 0.0018926301, 0.045385055, -0.07430422, -0.022797182, -0.052842602, 0.04633681, 0.12838002, 0.015648523, 0.10493874, -0.11721678, 0.102124445, 0.009502804, -0.036585268) * inp_0_0_1;
|
|
result1 += M4(-0.04929572, -0.07120267, 0.11189219, 0.14805685, -0.071155205, 0.0014907664, -0.045666497, -0.2830528, 0.082157016, 0.3569714, -0.08436788, 0.45907578, 0.25751808, 0.007985261, -0.018987287, -0.077328935) * inp_0_1_1;
|
|
result1 += M4(0.001983042, 0.0573755, -0.059997357, 0.12338883, 0.0121495845, -0.04396366, 0.008587791, -0.014707099, 0.02200498, 0.08107376, -0.005465822, 0.068521366, -0.023648549, 0.055134445, 0.024040049, 0.012487843) * inp_0_2_1;
|
|
result1 += M4(-0.03344048, 0.018812401, -0.099432044, -0.092147544, -0.013474613, 0.12958124, -0.026809819, 0.12613833, -0.006871271, -0.004189431, -0.049348816, 0.01963418, 0.0218454, -0.1742686, 0.094456315, -0.057249658) * inp_0_0_2;
|
|
result1 += M4(-0.012384111, -0.17734681, 0.12383786, -0.112375386, 0.0319456, 0.27962422, 0.046854213, 0.398646, -0.034528103, -0.2823273, -0.11815615, 0.04903127, -0.010071655, -0.014617646, -0.20853819, 0.051345073) * inp_0_1_2;
|
|
result1 += M4(-0.017807463, 0.085978284, -0.004161402, 0.018392334, 0.004443354, 0.17043431, 0.0004278643, 0.26888165, -0.009966159, 0.039737593, -0.024603937, -0.06556405, -0.013565388, -0.02318172, 0.03753665, -0.01551806) * inp_0_2_2;
|
|
result2 += M4(-0.00054385135, -0.013860364, 0.037204344, 0.0643842, 0.009427588, 0.050591286, 0.015325365, 0.026667146, 0.019358223, 0.060875133, 0.003089811, -0.026155524, 0.054400604, -0.030613834, -0.0073381513, 0.0010448373) * inp_0_0_0;
|
|
result2 += M4(-0.11140873, -0.077490225, -0.0008244639, -0.111035444, 0.014723916, -0.111481555, -0.0516383, 0.066743694, 0.042972323, 0.01950517, 0.074459195, 0.050886415, 0.11208063, 0.020193007, -0.03352396, 0.055053547) * inp_0_1_0;
|
|
result2 += M4(0.026718343, -0.10349276, -0.10949595, 0.12181208, 0.020611692, 0.07150552, 0.03742183, -0.038923062, 0.021384154, -0.02058103, -0.15753624, 0.012840425, -0.0046102917, -0.09406839, -0.05100171, 0.032406077) * inp_0_2_0;
|
|
result2 += M4(-0.074565805, -0.013674222, 0.01627147, -0.09966034, 0.06198092, 0.03641343, -0.032527886, -0.06883835, 0.015009717, -0.026334036, -0.028027749, 0.06361123, 0.20694518, 0.06313499, -0.031564172, 0.05146381) * inp_0_0_1;
|
|
result2 += M4(-0.0727687, 0.042550147, -0.04598127, 0.3267873, 0.061976686, 0.04911097, 0.081161864, 0.010707485, 0.2691173, -0.043794725, -0.09971116, -0.0033741079, 0.021865325, 0.043241937, 0.18235348, -0.054897718) * inp_0_1_1;
|
|
result2 += M4(-0.00985757, -0.016703412, -0.1516014, 0.11700871, -0.018276513, -0.051278856, -0.24498534, 0.07523947, 0.08632356, -0.020398337, -0.111400016, -0.014939744, -0.04313168, -0.07714612, -0.008079888, 0.020002536) * inp_0_2_1;
|
|
result2 += M4(-0.013238882, -0.102667235, 0.0081237415, 0.035577383, -0.03021197, -0.10485464, 0.011287993, 0.078355186, 0.030628305, 0.05074276, -0.017268093, 0.025273146, 0.09983698, -0.018156344, 0.0009785192, -0.012089172) * inp_0_0_2;
|
|
result2 += M4(0.011669444, 0.018538315, 0.027060565, 0.092130765, -0.05551045, -0.076386236, -0.062335093, -0.15230888, 0.09960692, -0.013955326, 0.0051692463, 0.086296074, 0.03327316, -0.0583193, -0.05385146, 0.018058175) * inp_0_1_2;
|
|
result2 += M4(-0.010289144, -0.040999196, 0.072243415, -0.021734484, 0.031605948, 0.07325174, -0.060475502, -0.012607067, 0.021910584, 0.01122505, 0.054743167, 0.053025585, 0.015383842, -0.042237084, 0.010509368, -0.030073771) * inp_0_2_2;
|
|
result3 += M4(-0.09841719, -0.13165414, 0.045820847, -0.007834082, -0.007131807, -0.046782803, 0.02734628, 0.01637415, -0.119316526, 0.1290529, -0.067837425, -0.17189166, 0.0032192334, 0.116386436, -0.02809339, 0.06934349) * inp_0_0_0;
|
|
result3 += M4(0.09417788, 0.14279406, -0.061828166, 0.06364808, -0.040369745, 0.07213133, -0.031408317, 8.409518e-05, 0.11984481, -0.06191443, 0.07229216, -0.022234676, 0.005406791, 0.16676822, 0.22716026, 0.024176734) * inp_0_1_0;
|
|
result3 += M4(0.05202953, -0.14130098, -0.0026728045, -0.054721516, 0.02505792, 0.06512413, -0.012624254, -0.0026803268, -0.047477644, -0.031517543, -0.032461733, 0.0070039, -0.038684703, 0.12445629, -0.016449094, -0.00015740903) * inp_0_2_0;
|
|
result3 += M4(0.1427558, -0.26331204, 0.23879163, 0.1472501, 0.07586082, -0.19956431, 0.11914642, -0.018028205, 0.042948782, -0.18455848, 0.028981531, 0.06794226, -0.04177945, 0.09921873, 0.20470385, 0.11457745) * inp_0_0_1;
|
|
result3 += M4(0.4082949, 0.20943645, 0.081940874, 0.12100776, -0.002409686, -0.007796975, -0.039054655, 0.033258572, 0.12229252, 0.027062602, -0.3295776, -0.04143495, -0.06493101, -0.28356364, 0.51172125, -0.03805709) * inp_0_1_1;
|
|
result3 += M4(-0.09025853, -0.11847181, -0.15800075, -0.03284227, 0.009739382, -1.3548135e-05, 0.005809967, -0.016361618, -0.007935478, 0.045800693, 0.06297317, -0.014967157, 0.086511806, 0.1897742, -0.066169955, -0.008127524) * inp_0_2_1;
|
|
result3 += M4(0.038830314, -0.13548805, 0.10421148, 0.09081409, -0.07634593, -0.060178645, -0.07031697, 0.054025967, 0.020105308, -0.0027928702, 0.031130863, -0.026592439, 0.014720058, 0.19513103, 0.002097078, 0.18665725) * inp_0_0_2;
|
|
result3 += M4(-0.08901566, 0.105436236, 0.12266256, -0.008518654, 0.11778259, -0.053422395, 0.028649012, -0.040792372, -0.02262042, -0.10925559, -0.07001872, -0.035234697, -0.07897347, 0.08139603, 0.054484908, 0.019872947) * inp_0_1_2;
|
|
result3 += M4(0.030658608, -0.02215864, -0.0070415963, -0.016830377, 0.035668887, -0.068572775, 0.017487563, 0.021502042, -0.0044210944, 0.05756212, -0.020683121, -0.006085243, -0.008396225, 0.021756154, -0.049787946, 0.01207104) * inp_0_2_2;
|
|
const V4 inp_1_0_0 = inp[1][local_xy.y + 0][local_xy.x + 0];
|
|
const V4 inp_1_1_0 = inp[1][local_xy.y + 0][local_xy.x + 1];
|
|
const V4 inp_1_2_0 = inp[1][local_xy.y + 0][local_xy.x + 2];
|
|
const V4 inp_1_0_1 = inp[1][local_xy.y + 1][local_xy.x + 0];
|
|
const V4 inp_1_1_1 = inp[1][local_xy.y + 1][local_xy.x + 1];
|
|
const V4 inp_1_2_1 = inp[1][local_xy.y + 1][local_xy.x + 2];
|
|
const V4 inp_1_0_2 = inp[1][local_xy.y + 2][local_xy.x + 0];
|
|
const V4 inp_1_1_2 = inp[1][local_xy.y + 2][local_xy.x + 1];
|
|
const V4 inp_1_2_2 = inp[1][local_xy.y + 2][local_xy.x + 2];
|
|
result0 += M4(0.03791764, 0.03606116, 0.0045328056, 0.002800669, -0.009491753, 0.075630374, 0.04046336, -0.028276388, 0.04324208, 0.524792, -0.09134197, 0.08187007, 0.029704953, 0.08065349, 0.007941393, -0.026542963) * inp_1_0_0;
|
|
result0 += M4(0.02040871, -0.12661457, 0.16988854, -0.16411963, -0.08627451, -0.08485256, -0.13350333, 0.16505302, -0.03161001, 0.122473575, 0.29984474, -0.2572558, -0.030814288, 0.050678268, -0.008602223, 0.027681787) * inp_1_1_0;
|
|
result0 += M4(-0.006042254, 0.065050334, 0.04664241, -0.037401382, 0.03355854, 0.054504145, 0.047228526, -0.019673886, -0.109151475, -0.0869173, -0.21530285, 0.3827806, 0.020756625, -0.0028767965, 0.00789321, 0.021198263) * inp_1_2_0;
|
|
result0 += M4(-0.0031582236, -0.13864206, -0.049212236, 0.035027854, -0.040887404, -0.053039804, -0.06926417, 0.07521589, -0.0047208094, 0.15803817, -0.009107502, 5.1163806e-05, -0.035306234, -0.034462266, 0.032939598, -0.054286096) * inp_1_0_1;
|
|
result0 += M4(-0.05159398, 0.021360615, -0.12616381, 0.07439948, -0.04818794, 0.11798935, -0.24307126, 0.3104743, 0.16276325, 0.14583969, 0.021263607, -0.058796406, 0.12709579, -0.124082625, -0.06194249, 0.08661151) * inp_1_1_1;
|
|
result0 += M4(0.09366448, -0.0023552156, -0.114171155, 0.11347451, -0.05757338, -0.03278382, -0.039372697, 0.01685123, 0.12495786, 0.20478605, -0.06560785, 0.15574126, -0.04436519, 0.012480755, 0.084948376, -0.104769275) * inp_1_2_1;
|
|
result0 += M4(-0.09293131, -0.06462749, -0.007985044, -0.012933727, -0.04487777, -0.22023484, -0.0037138779, -0.013158153, 0.00241268, 0.30657583, -0.0056309095, -0.011388346, 0.035876844, 0.2619356, -0.0003885763, -0.035041932) * inp_1_0_2;
|
|
result0 += M4(0.18430625, 0.035722308, 0.039618477, -0.051990308, 0.15817565, 0.12178207, 0.024091955, -0.005528151, 0.026752066, 0.0029761952, 0.040815126, -0.018809414, -0.12874056, -0.062183477, -0.0050868164, 0.008158902) * inp_1_1_2;
|
|
result0 += M4(-0.070743024, -0.014308823, -0.00028148116, -0.010128748, -0.051406384, -0.03582649, 0.004809814, -0.015474758, -0.06934015, -0.019315397, 0.0028014528, -0.010465397, 0.03486373, 0.15004137, -0.0046289526, 0.011293259) * inp_1_2_2;
|
|
result1 += M4(0.017685123, 0.0065368274, -0.0015516317, 0.16557714, -0.03033398, 0.025646772, -0.010180449, 0.047718406, 0.11997764, 0.15046796, -0.03974632, 0.026449608, -0.022485612, -0.02235598, -0.0026649777, 0.0047121164) * inp_1_0_0;
|
|
result1 += M4(0.3358178, -0.013722061, 0.035134647, 0.14561415, -0.24687243, -0.07977394, -0.006837952, -0.2426016, -0.18184388, 0.14377251, -0.014832799, 0.22288579, 0.017362798, -0.12165396, 0.003879503, -0.117855765) * inp_1_1_0;
|
|
result1 += M4(0.04104673, 0.08014578, 0.007094119, 0.13034703, -0.021104911, 0.018781923, 0.021670882, -0.037076604, 0.17533164, 0.06820291, 0.07606097, 0.06600506, 0.029399764, 0.033080503, -0.001904172, 0.052032262) * inp_1_2_0;
|
|
result1 += M4(-0.007679661, -0.018380007, -0.016621359, 0.014491733, 0.04202803, 0.04733234, 0.050396703, -0.009298026, -0.008895612, -0.099489, -0.07935202, -0.050699767, -0.08127414, -0.019099122, 0.04923852, -0.06022952) * inp_1_0_1;
|
|
result1 += M4(-0.13803823, 0.13586937, -0.28232625, -0.32198295, -0.25998548, -0.0856141, 0.22959727, -0.005808529, -0.12802751, -0.53791004, 0.17587951, -0.4111663, 0.17720166, 0.059367396, 0.012145319, 0.022847874) * inp_1_1_1;
|
|
result1 += M4(-0.008384057, 0.07714925, -0.03306005, 0.011172002, -0.0613773, -0.14301087, 0.015359433, 0.008426981, -0.008028197, -0.082278445, -0.10858002, 0.08871151, -0.12919591, 0.028668016, -0.022689562, -0.015607325) * inp_1_2_1;
|
|
result1 += M4(0.0020820436, -0.005625805, -0.00054294884, -0.025311854, 0.006265746, 0.0085602505, -0.049143896, -0.024635661, -0.0070366124, 0.016309332, -0.0054611517, 0.07001931, -0.027046647, -0.045871366, 0.074525505, 0.044885844) * inp_1_0_2;
|
|
result1 += M4(0.0038440404, 0.3018618, 0.12885766, 0.16752897, 0.0049168784, 0.008830848, 0.25186682, 0.030004857, 0.0025120769, 0.3281529, 0.13829912, 0.24118833, -0.0017494321, -0.16340025, -0.17641474, -0.050659727) * inp_1_1_2;
|
|
result1 += M4(-0.0058690743, 0.017535014, 0.026015634, 0.0069791945, 0.009430142, 0.011086416, 0.037033264, 0.058812175, -0.0071097505, 0.056450658, 0.033287015, -0.031129349, 0.018289886, -0.019709148, 0.09328557, 0.03190014) * inp_1_2_2;
|
|
result2 += M4(0.06894697, -0.10339465, -0.035631135, 0.022989944, -0.01942999, -0.10801729, 0.0216407, 0.031430345, 0.0035577503, 0.024212655, 0.0071231346, -0.056087773, -0.00088516675, 0.025889328, -0.0022241352, -0.0057471152) * inp_1_0_0;
|
|
result2 += M4(0.2759868, -0.027209675, -0.013602369, 0.08910978, -0.14602913, -0.10279265, -0.06586457, -0.26223028, 0.025394203, -0.027354881, -0.06079269, 0.09261669, 0.069493055, -0.0055911513, 0.00068189483, -0.048568405) * inp_1_1_0;
|
|
result2 += M4(0.15641908, -0.035849106, 0.23181237, 0.054865167, -0.07924946, -0.04749266, -0.27688533, 0.110440694, 0.30449897, 0.07126261, -0.11757854, -0.0020133695, -0.05560786, 0.055558626, 0.018453365, 0.05488586) * inp_1_2_0;
|
|
result2 += M4(0.05669848, -0.00070812786, -0.0056745764, -0.06824219, 0.04319297, -0.06513989, -0.0015118121, -0.081020415, -0.024937613, -0.08211584, 0.008090276, 0.004001641, 0.043160763, -0.008445913, -0.004146166, 0.027650317) * inp_1_0_1;
|
|
result2 += M4(0.28002256, -0.08631263, -0.059070054, 0.019348877, 0.24592939, -0.08331847, -0.14509794, 0.22009538, 0.16204305, 0.04204707, -0.039180923, -0.018922726, 0.061964963, -0.05724076, 0.089339934, -0.16005886) * inp_1_1_1;
|
|
result2 += M4(0.004549639, -0.09183629, 0.1420763, 0.032508075, 0.10194179, -0.10927332, 0.27485055, -0.018509366, 0.06662383, 0.033839677, 0.08738133, 0.09459715, -0.034808353, 0.0061988346, 0.02458268, 0.012714486) * inp_1_2_1;
|
|
result2 += M4(-0.026079418, -0.038056526, 0.002398695, 0.007867446, -0.019024998, -0.087564975, 0.015248439, 0.010014585, -0.013561912, -0.08886625, -0.0134702865, 0.02232676, 0.037407853, 0.053970724, 0.00631281, -0.043232646) * inp_1_0_2;
|
|
result2 += M4(-0.0022799855, -0.0369194, 0.016964918, -0.008896998, 0.0009860508, -0.043279737, -0.0263229, 0.003257031, 0.012866944, -0.047402006, 0.0036670251, -0.017854307, 0.0634053, 0.027829783, -0.0018171946, 0.08086287) * inp_1_1_2;
|
|
result2 += M4(-0.013369, -0.101083554, -0.07694392, -0.020025238, 0.017319538, 0.023366045, -0.01640813, -0.04008224, -0.03418966, -0.017737657, -0.072516754, -0.07620339, 0.017667681, 0.04104676, -0.11862017, -0.022559365) * inp_1_2_2;
|
|
result3 += M4(0.045322407, -0.02171791, -0.061052725, -0.012117548, -0.015696114, -0.07482115, 0.04300487, -0.032141753, 0.0804886, -0.4146384, 0.29769376, -0.05980045, -0.026526874, 0.19134584, -0.1898744, 0.0952924) * inp_1_0_0;
|
|
result3 += M4(-0.113841474, -0.19846705, -0.082990475, -0.013619692, 0.18104905, -0.079608805, -0.041662235, 0.0829167, -0.18034573, 0.8529516, -0.46155354, 0.3802709, 0.041667365, -0.11552713, 0.217227, 0.049550664) * inp_1_1_0;
|
|
result3 += M4(-0.0020833479, 0.102125585, 0.008216506, 0.010471269, -0.06279899, 0.077044636, -0.097625405, -0.0039822925, -0.050997138, 0.7753718, 0.38631454, 0.05260894, -0.042959493, 0.009713864, 0.06273459, 0.0045036124) * inp_1_2_0;
|
|
result3 += M4(0.0746249, -0.027850332, -0.000757232, -0.11083669, 0.053586207, -0.05182651, 0.08614704, 0.15321635, -0.076562144, -0.17570184, 0.11280837, -0.116007045, -0.040575534, 0.065053694, -0.06028298, -0.13140927) * inp_1_0_1;
|
|
result3 += M4(-0.07656796, -0.08148092, -0.16069895, 0.04187618, -0.14711398, -0.24460338, -0.17429228, -0.16669545, 0.13918693, 0.20594399, 0.09800724, 0.18768425, -0.086857975, 0.062444944, 0.16921277, -0.13279808) * inp_1_1_1;
|
|
result3 += M4(-0.074363284, -0.19581212, 0.0012813862, -0.0075201634, 0.0020850392, 0.028007274, 0.023813803, -0.030834958, 0.07274119, 0.28911227, 0.095511995, 0.016141748, 0.109687485, 0.12591973, -0.20956549, 0.010454399) * inp_1_2_1;
|
|
result3 += M4(-0.03802425, -0.057888344, 0.06638698, 0.021309895, -0.040915377, -0.17378275, 0.049781483, -0.0038457427, -0.01987416, -0.075760044, 0.029176561, 0.053127218, 0.05793729, 0.001864354, 0.011282336, -0.11085149) * inp_1_0_2;
|
|
result3 += M4(0.009166591, 0.020393299, -0.013589619, -0.0043293405, 0.022690987, 0.1941181, -0.01498279, 0.018238517, 0.016372807, 0.08158214, -0.038956385, 0.090630956, 0.0064879647, 0.0019467361, 0.061438218, 0.12570952) * inp_1_1_2;
|
|
result3 += M4(-0.036306724, -0.022533497, 0.02743402, 0.009214226, -0.012935099, -0.08962785, 0.029733934, -0.0031940192, -0.016709115, 0.05964771, 0.063720234, 0.022471542, 0.012363867, 0.20758559, 0.011010401, 0.037424065) * inp_1_2_2;
|
|
const V4 inp_2_0_0 = inp[2][local_xy.y + 0][local_xy.x + 0];
|
|
const V4 inp_2_1_0 = inp[2][local_xy.y + 0][local_xy.x + 1];
|
|
const V4 inp_2_2_0 = inp[2][local_xy.y + 0][local_xy.x + 2];
|
|
const V4 inp_2_0_1 = inp[2][local_xy.y + 1][local_xy.x + 0];
|
|
const V4 inp_2_1_1 = inp[2][local_xy.y + 1][local_xy.x + 1];
|
|
const V4 inp_2_2_1 = inp[2][local_xy.y + 1][local_xy.x + 2];
|
|
const V4 inp_2_0_2 = inp[2][local_xy.y + 2][local_xy.x + 0];
|
|
const V4 inp_2_1_2 = inp[2][local_xy.y + 2][local_xy.x + 1];
|
|
const V4 inp_2_2_2 = inp[2][local_xy.y + 2][local_xy.x + 2];
|
|
result0 += M4(-0.013779818, 0.21916316, -0.01749961, -0.031653825, 0.019197604, -0.6749215, 0.025190348, 0.009256963, 0.037712105, -0.0427757, -0.0043518688, -0.014002386, 0.0035128384, -0.25496736, 0.005694031, -0.0139859775) * inp_2_0_0;
|
|
result0 += M4(0.058478884, -0.035180025, 0.16013937, -0.15942277, -0.03816243, 0.05733203, -0.06691323, -0.016521277, 0.0045641474, -0.083760835, 0.026783314, -0.031845126, 0.028373491, -0.047164347, 0.08748592, -0.18181978) * inp_2_1_0;
|
|
result0 += M4(-0.029648367, -0.0064177266, -0.007985839, -0.0039937594, 0.049200006, 0.04194013, -0.012315959, -0.058244944, -0.00062385213, 0.03707708, 0.045191098, -0.033270165, -0.029975431, -0.008462107, -0.043282945, -0.019695746) * inp_2_2_0;
|
|
result0 += M4(0.03685474, 0.21790871, 0.016414702, -0.0036660742, -0.02711461, -0.5112229, -0.0076950877, -0.027526189, -0.07460244, 0.1775834, 0.096185096, -0.09191064, 0.06321831, -0.06237055, -0.09763056, 0.07062399) * inp_2_0_1;
|
|
result0 += M4(-0.08479572, -0.14922842, -0.1637572, 0.30417043, -0.035105832, -0.24034815, -0.04410996, -0.046823483, 0.0038008555, -0.08467451, 0.31348646, -0.41025126, 0.12546113, 0.18967205, 0.04131372, 0.04792723) * inp_2_1_1;
|
|
result0 += M4(0.06739356, 0.013132563, 0.0705947, -0.09722243, -0.07827824, -0.034469802, -0.011826323, -0.055389136, -0.009527839, 0.025252374, 0.24526902, -0.20351002, 0.03863696, -0.0358095, -0.08841542, 0.08530016) * inp_2_2_1;
|
|
result0 += M4(0.0024189504, 0.18470794, 0.02421729, -0.026005035, 0.0714845, -0.38582283, -4.2597487e-05, -0.008914323, 0.10920884, 0.29031822, 0.0063368627, -0.009375747, -0.014497603, 0.1282962, -0.022127206, -0.013304615) * inp_2_0_2;
|
|
result0 += M4(-0.030173173, -0.103350885, 0.049853764, -0.067631066, -0.28206545, 0.11199615, -0.030596651, 0.020736802, -0.23197545, -0.08767077, 0.08164665, -0.09930701, 0.39759257, 0.01888446, 0.087139525, -0.11122967) * inp_2_1_2;
|
|
result0 += M4(0.044511873, 0.031926055, 0.013973623, -0.026140142, -0.16958922, 0.03009211, -0.018803382, 0.017162027, 0.037430823, 0.026251895, 0.001828609, 0.0070498935, 0.032491688, -0.06525586, -0.042710643, 0.009969354) * inp_2_2_2;
|
|
result1 += M4(-0.028666057, 0.102332056, 0.029340958, 0.097780496, -0.050370127, 0.024993198, -0.014046047, 0.06757201, 0.0028525956, 0.009097466, 0.0012397132, 0.020048415, 0.034758266, 0.001306835, 0.021822734, 0.040313948) * inp_2_0_0;
|
|
result1 += M4(0.12116667, 0.042435147, -0.011453865, 0.28418344, -0.054431155, -0.041074473, -0.021197021, -0.46425793, 0.024403436, 0.10476772, 0.00850296, 0.17764671, 0.19450468, 0.04881686, -0.017300593, 0.15328363) * inp_2_1_0;
|
|
result1 += M4(-0.014537472, -0.04785431, 0.0057213237, 0.08120087, 0.0027244955, 0.036710683, 0.0018011717, -0.11301808, -0.06367366, -0.014481189, -0.009738734, -0.01731468, 0.0046592485, 0.015897358, -0.0037436956, 0.0090299705) * inp_2_2_0;
|
|
result1 += M4(-0.0897561, -0.077474676, -0.040233955, 0.11166737, 0.012042107, -0.13035285, 0.0069526, -0.029161317, 0.04087425, -0.05005156, -0.02397979, -0.049189534, 0.08019726, 0.0033636147, -0.0046941456, -0.04718199) * inp_2_0_1;
|
|
result1 += M4(-0.019481955, -0.11552646, -0.15306385, 0.11965796, -0.06444712, -0.60503227, -0.03709784, -0.84860533, 0.101685636, 0.041876853, -0.018051691, -0.09969215, -0.117303394, -0.17066102, -0.19214894, -0.06141623) * inp_2_1_1;
|
|
result1 += M4(-0.08162457, 0.016572323, 0.009119889, 0.055340182, -0.013891592, -0.30231586, -0.021164672, -0.49435094, -0.051145483, 0.14773344, 0.07521264, 0.06259801, 0.03906362, -0.13338828, -0.030600214, -0.098313935) * inp_2_2_1;
|
|
result1 += M4(-0.0056195445, 0.28928724, 0.1379861, 0.0032541384, -0.0068257446, 0.011568706, -0.01583596, -0.026307525, -0.015456645, 0.016127326, -0.025889514, 0.050082583, -0.0007289241, -0.010907576, -0.12133015, 0.01162384) * inp_2_0_2;
|
|
result1 += M4(0.028638389, 0.23542702, 0.121689275, -0.23637827, -0.016180132, -0.67607766, -0.08673431, -0.10939401, -0.010357574, -0.08129334, -0.09344, 0.016316568, 0.025065977, 0.3341326, 0.22300017, 0.09743363) * inp_2_1_2;
|
|
result1 += M4(-0.015541606, 0.031042485, 0.051960707, -0.10814859, -0.0031766663, -0.13280825, -0.022083284, -0.06239775, -0.0022412688, -0.11887442, 0.047357682, -0.16150878, 0.010135667, 0.10523319, -0.025714098, 0.03423172) * inp_2_2_2;
|
|
result2 += M4(-0.022993447, -0.10126082, -0.008862645, 0.009558404, -0.054707915, -0.111216895, -0.012196099, 0.07278377, 0.0041023744, -0.019806284, -0.002214899, 0.030154316, 0.07870343, -0.013954103, 0.013901531, -0.0071220016) * inp_2_0_0;
|
|
result2 += M4(0.05095645, -0.042629406, -0.082931906, 0.09913234, -0.041729834, -0.081676796, 0.0075822813, -0.3088201, 0.028836848, -0.082174934, 0.032421783, -0.0060300524, 0.14015388, -0.06350905, 0.13277183, 0.051554084) * inp_2_1_0;
|
|
result2 += M4(-0.021707237, -0.032912396, 0.21629085, 0.014243769, -0.028248513, -0.085443296, -0.3280954, -0.13558732, -0.042475957, -0.04921339, 0.0012851176, 0.032892134, 0.051295515, 0.06387788, 0.021846004, -0.13766341) * inp_2_2_0;
|
|
result2 += M4(0.023979997, -0.053251967, -0.0006032927, 0.090948775, -0.03888054, -0.102491334, 0.01808955, -0.040047757, -0.021477126, -0.07267349, -0.012966651, 0.030831197, 0.10165511, -0.06750721, -0.017767964, -0.09202411) * inp_2_0_1;
|
|
result2 += M4(-0.046669472, -0.013510663, 0.059922248, 0.23835911, -0.07820152, 0.06794421, 0.035604022, -0.1144176, 0.07068062, -0.04091932, 0.05944978, 0.11380249, 0.09088974, -0.1002041, -0.088322915, -0.030037394) * inp_2_1_1;
|
|
result2 += M4(-0.051205207, 0.05612446, 0.07506183, -0.09648897, -0.024628337, -0.016302107, -0.91603076, -0.29067314, -0.04936889, -0.08389894, -0.32963422, -0.07402971, 0.074949786, -0.057981458, 0.5925726, 0.038325448) * inp_2_2_1;
|
|
result2 += M4(-0.045664266, -0.054451637, -0.010032334, 0.034178797, 0.029870953, -0.11087751, -0.008587346, 0.005013674, 0.010290779, -0.10230002, -0.016301338, -0.0010879014, -0.061455965, -0.013092095, -0.0126922345, 0.021366991) * inp_2_0_2;
|
|
result2 += M4(-0.20278515, -0.11891043, -0.047657784, -0.16653648, 0.0005207907, -0.10197919, -0.012389828, 0.039248805, -0.027018683, 0.016838243, 0.10375171, -0.13223636, 0.060682498, 0.010384079, 0.05070221, -0.07521598) * inp_2_1_2;
|
|
result2 += M4(-0.06732721, -0.09149638, -0.01440614, -0.00046416372, -0.024689594, -0.086670674, 0.0056709866, 0.04906158, -0.011234572, -0.00085715187, 0.041561518, -0.011770444, 0.05219869, -0.005724713, 0.07658236, -0.07809252) * inp_2_2_2;
|
|
result3 += M4(-0.043154147, 0.1813734, -0.24296522, -0.18964617, -0.00053656293, 0.073823325, 0.07934629, 0.0077484837, -0.033539794, 0.07983527, -0.04132395, 0.0221986, -0.046767786, -0.2046515, 0.093034185, 0.017791785) * inp_2_0_0;
|
|
result3 += M4(-0.07986458, 0.24019942, 0.2890016, 0.05044439, 0.053371403, -0.68155944, -0.21055256, -0.19590291, 0.04625938, -0.09285042, 0.005184423, -0.024542352, -0.1242854, 0.16712302, 0.25536102, -0.06046315) * inp_2_1_0;
|
|
result3 += M4(0.110498555, 0.07516408, -0.07876151, 0.01883748, -0.114289135, -0.29517758, -0.14721058, 0.022091808, -0.0359855, 0.08394964, -0.019007629, 0.019101847, 0.019339366, -0.11714168, -0.07094147, 0.00451848) * inp_2_2_0;
|
|
result3 += M4(-0.012919992, 0.1369222, -0.2732416, -0.17526399, -0.010347271, -0.01868432, -0.015816586, -0.14344539, 0.018896159, 0.19946118, -0.037967134, -0.10760814, 0.032594014, -0.41689512, 0.058743387, 0.19437246) * inp_2_0_1;
|
|
result3 += M4(0.11102623, 0.1572921, 0.23168686, -0.069944575, -0.1813538, -0.6271781, -0.24745354, -0.17729184, -0.33937684, -0.0861067, -0.002288483, 0.055063516, -0.041452993, 0.5463022, 0.028170375, 0.11677861) * inp_2_1_1;
|
|
result3 += M4(0.08435565, 0.032648746, -0.055590957, 0.0014671348, 0.11301972, -0.3885799, 0.011423688, 0.02565552, 0.18209329, 0.074671805, -0.041186612, 0.025571901, -0.14688689, -0.27317134, -0.03077098, 0.0117346905) * inp_2_2_1;
|
|
result3 += M4(0.04817234, 0.26817775, -0.10161488, 0.040350743, 0.010257333, 0.031333394, 0.002950192, 0.0534698, -0.012836827, 0.11579518, -0.08975087, -0.032125667, -0.15702203, -0.15051942, -0.015707918, -0.03141244) * inp_2_0_2;
|
|
result3 += M4(0.09410685, -0.013702716, 0.17390637, -0.030210355, 0.0147846555, -0.06855739, 0.04796251, 0.009910011, 0.1618562, 0.050896388, -0.0011444244, -0.059056938, 0.08476411, 0.20332877, -0.012074825, -0.0039784717) * inp_2_1_2;
|
|
result3 += M4(-0.033710867, 0.13790971, -0.029413365, 0.041634776, -0.030713236, 0.035875082, 0.028113496, 0.0027906392, -0.03885695, 0.12731767, 0.012205976, 0.0035707527, -0.01529997, -0.15364954, 0.04945567, -0.023066707) * inp_2_2_2;
|
|
const V4 inp_3_0_0 = inp[3][local_xy.y + 0][local_xy.x + 0];
|
|
const V4 inp_3_1_0 = inp[3][local_xy.y + 0][local_xy.x + 1];
|
|
const V4 inp_3_2_0 = inp[3][local_xy.y + 0][local_xy.x + 2];
|
|
const V4 inp_3_0_1 = inp[3][local_xy.y + 1][local_xy.x + 0];
|
|
const V4 inp_3_1_1 = inp[3][local_xy.y + 1][local_xy.x + 1];
|
|
const V4 inp_3_2_1 = inp[3][local_xy.y + 1][local_xy.x + 2];
|
|
const V4 inp_3_0_2 = inp[3][local_xy.y + 2][local_xy.x + 0];
|
|
const V4 inp_3_1_2 = inp[3][local_xy.y + 2][local_xy.x + 1];
|
|
const V4 inp_3_2_2 = inp[3][local_xy.y + 2][local_xy.x + 2];
|
|
result0 += M4(0.038062073, 0.2141279, 0.06481006, -0.12940322, -0.0027764095, 0.08240676, -0.008070355, -0.011360152, -0.018044285, -0.22364011, -0.016826315, 0.04532728, -0.021932011, 0.17901145, -0.04023682, 0.00414411) * inp_3_0_0;
|
|
result0 += M4(0.024813054, 0.053320397, -0.068416364, 0.085439764, 0.019156259, 0.09772809, 0.013872583, 0.003128646, -0.0377687, 0.06855879, -0.023625996, -0.013501507, -0.037352577, -0.12048335, -0.06935224, 0.079339765) * inp_3_1_0;
|
|
result0 += M4(-0.0008948348, -0.05550685, 0.007604002, -0.0158738, 0.00076379447, 0.020687131, 0.015978998, 0.02077568, 0.017053058, -0.004057524, -0.12129772, 0.058062665, 0.053945795, 0.1649665, -0.030655291, 0.05742684) * inp_3_2_0;
|
|
result0 += M4(0.103066035, -0.7300315, 0.085574366, -0.038748726, -0.052253325, -0.27357453, -0.035952266, 0.055022333, 0.024551407, -0.25668055, 0.0007889168, -0.0007591933, -0.0949221, 0.072160475, 0.016339399, -0.09055286) * inp_3_0_1;
|
|
result0 += M4(0.07770222, 0.1035488, 0.11635461, -0.19613256, -0.17844269, -0.10049437, -0.032167118, -0.069166064, 0.22103043, 0.06767758, -0.16874984, 0.17686485, 0.24360006, -0.088182166, 0.06325561, 0.04732989) * inp_3_1_1;
|
|
result0 += M4(0.01281408, -0.052539933, -0.036869086, -0.0019218755, 0.033333823, 0.058557454, -0.047289293, 0.0734493, -0.011906149, -0.09344646, -0.1792974, 0.21100967, -0.12567343, 0.08980369, 0.03961268, -0.0020021321) * inp_3_2_1;
|
|
result0 += M4(0.23851325, -0.2964688, -0.024860052, 0.0076622204, 0.08764992, 0.050132558, -0.0065116147, -0.0029406226, -0.019137815, -0.11900036, 0.011086529, -0.0067911097, -0.28993663, 0.076079234, -0.011847647, -0.100516155) * inp_3_0_2;
|
|
result0 += M4(0.04419162, 0.10790811, 0.050464034, -0.052768122, -0.13191228, 0.01942084, 0.02409991, 0.013205228, -0.215829, 0.010005103, -0.061933685, 0.05324168, -0.4616059, -0.070461564, -0.064849354, 0.11797088) * inp_3_1_2;
|
|
result0 += M4(-0.04070436, -0.09416829, -0.04929169, 0.067432515, -0.0864388, 0.01788289, 0.04142661, -0.042931058, 0.03439083, 0.009610911, 0.0030174495, 0.005878064, 0.07801756, 0.08367224, 0.04520196, -0.06358246) * inp_3_2_2;
|
|
result1 += M4(0.040575773, 0.009816436, 0.00020496556, -0.027174527, 0.0061966665, -0.010212162, -0.007030445, -0.09197709, -0.014679208, -0.11147172, -0.0038630906, -0.059626117, -0.064454876, -0.18848884, -0.026746308, -0.2612213) * inp_3_0_0;
|
|
result1 += M4(0.025270196, -0.008648287, -0.006241156, 0.11304264, 0.002291121, -0.17409967, -0.0030635125, -0.15197915, 0.31160298, -0.18276493, -0.007551214, -0.29215118, 0.032651596, -0.09381023, -0.033310868, -0.30864105) * inp_3_1_0;
|
|
result1 += M4(0.029827468, -0.07212181, -0.0059149708, -0.02832478, -0.0036889983, 0.11503275, -0.0133342985, 0.35067505, 0.069052294, 0.021306505, -0.006124511, -0.052335735, -0.051988155, -0.010415701, 0.0292754, -0.16619146) * inp_3_2_0;
|
|
result1 += M4(-0.017186752, -0.024710158, -0.054002713, 0.09658648, 0.021105714, 0.09553063, 0.005458199, 0.1535918, -0.052033518, 0.05249869, 0.023075325, 0.0017748644, -0.007927314, 0.21851934, 0.076110005, 0.1549553) * inp_3_0_1;
|
|
result1 += M4(-0.24649884, 0.2037065, 0.0050564874, -0.0838121, -0.053290866, 0.24137548, 0.03268435, -0.19582611, 0.2728014, 0.11516617, -0.2776526, 0.29564765, 0.056608714, 0.066695906, 0.039385546, 0.3590516) * inp_3_1_1;
|
|
result1 += M4(0.021940233, 0.09766358, -0.0024492894, 0.056543294, -0.042301793, -0.18701898, -0.04649817, -0.39435723, 0.17607027, -0.14804529, -0.13436085, -0.038023666, 0.017116247, 0.12693964, 0.040699497, 0.32045344) * inp_3_2_1;
|
|
result1 += M4(0.030560251, 0.033483144, 0.06478091, -0.08260862, -0.0003347912, -0.28518027, -0.014617922, -0.10500096, -0.0015620848, -0.038950246, 0.028218228, -0.03802862, -0.01546767, -0.9264438, -0.008355967, -0.5635481) * inp_3_0_2;
|
|
result1 += M4(-0.03922487, -0.115100004, 0.18206236, 0.031616636, 0.010903155, -0.21015875, -0.06844489, 0.08924375, 0.0004909406, -0.19463973, -0.25094545, -0.17086059, 0.022693226, -0.2251329, -0.014191069, -0.22457352) * inp_3_1_2;
|
|
result1 += M4(0.005867922, -0.07154546, -0.041601587, -0.0068623656, -0.001936714, 0.11745422, 0.03150495, -0.093714595, 0.029465571, 0.08250033, -0.1579151, -0.028160485, -0.02360907, 0.0055461796, -0.009449481, -0.07122842) * inp_3_2_2;
|
|
result2 += M4(0.014997094, -0.073501885, 0.0132263275, 0.12340946, 0.018715253, -0.03774917, -0.030047366, -0.13164493, -0.019094057, -0.019109681, -0.008549362, -0.020587744, -0.014902967, -0.09828911, -0.023465153, -0.07441592) * inp_3_0_0;
|
|
result2 += M4(-0.0036458373, -0.077869564, 0.046160117, 0.0006182458, -0.04060173, 0.041398875, 0.017186413, 0.2170062, 0.15190022, 0.00671332, 0.007910316, -0.03564122, 0.07395223, 0.043946113, -0.023532797, -0.09109481) * inp_3_1_0;
|
|
result2 += M4(-0.009333379, -0.07201044, 0.34406257, -0.008108068, 0.017962933, 0.07170349, -0.13883772, -0.097861245, 0.10507732, -0.017253805, 0.22346519, -0.060781628, -0.042994563, -0.034118805, 0.11651774, 0.068822056) * inp_3_2_0;
|
|
result2 += M4(0.107567824, -0.014996469, 0.0468502, -0.03507376, -0.016626038, -0.11044043, 0.012853235, 0.022993937, -0.03965704, -0.013101877, -0.005181674, 0.0720093, -0.04458476, -0.044698216, -0.056906916, -0.15644144) * inp_3_0_1;
|
|
result2 += M4(-0.24450101, -0.05021081, 0.28329143, 0.052905396, 0.03132615, 0.024134472, 0.025940863, -0.31099752, -0.15341069, 0.053456385, 0.11059655, -0.22808413, -0.03157951, 0.02424628, 0.17435497, -0.13484989) * inp_3_1_1;
|
|
result2 += M4(0.038007554, -0.07408078, -0.31231526, -0.046669047, -0.075324856, 0.009619571, -0.30759802, 0.14287001, 0.049111176, 0.04579521, -0.10265535, 0.047160428, 0.014466494, 0.058858827, -0.40376583, -0.01559601) * inp_3_2_1;
|
|
result2 += M4(0.029162662, -0.08115085, 0.0015537582, -0.047456466, -0.025844254, 0.07617664, -0.014251737, 0.024603898, 0.028121594, 0.019586843, 0.0012635064, -0.021388637, -0.043075148, -0.084901094, 0.0054910085, -0.17058848) * inp_3_0_2;
|
|
result2 += M4(0.15652397, -0.023679573, 0.10785547, -0.023840956, -0.03806573, -0.041285273, 0.025743026, 0.1401332, -0.11558319, 0.019962313, -0.035437133, 0.12046317, 0.0058546956, 0.021801556, 0.08483396, 0.16000974) * inp_3_1_2;
|
|
result2 += M4(-0.01603148, 0.0026883425, 0.10843955, 0.04351186, -0.036719613, -0.109948635, -0.061967216, -0.14250335, -0.05838408, 0.0007986963, -0.14958149, 0.03388766, 0.04054899, 0.081719644, 0.05053678, -0.03529535) * inp_3_2_2;
|
|
result3 += M4(-0.085353546, 0.08461731, -0.13915083, -0.036257822, -0.008904922, -0.09930966, 0.13293777, 0.03638514, 0.03201791, 0.1338242, -0.09031037, -0.03228892, 0.034745544, -0.18244584, -0.20624673, -0.08474406) * inp_3_0_0;
|
|
result3 += M4(0.07409441, -0.053643093, -0.07329052, -0.029934444, 0.047175847, 0.043111406, -0.20345508, 0.07938889, -0.016340598, -0.38565966, 0.21107996, -0.04949173, 0.11025018, 0.2827765, 0.27820566, 0.03526538) * inp_3_1_0;
|
|
result3 += M4(0.030461181, -0.07242497, 0.046613485, 0.01554022, -0.021772174, -0.10526904, 0.121392906, -0.01915928, -0.06995953, -0.08755693, -0.1146613, -0.056353655, -0.030497294, 0.38102558, -0.14643161, 0.013574363) * inp_3_2_0;
|
|
result3 += M4(0.02252646, 0.19843449, 0.01796448, -0.114927374, 0.009417218, -0.14562178, 0.009520882, -0.24945049, -0.03411223, 0.019948045, -0.059774544, -0.16049027, -0.160916, 0.2315701, -0.33952394, -0.16819766) * inp_3_0_1;
|
|
result3 += M4(-0.120750055, -0.101981714, -0.6174327, 0.04210056, -0.12963599, -0.26512286, -0.15236779, 0.23088011, 0.17519988, -0.18581973, 0.2061288, 0.1486004, 0.15724893, -0.035897397, 0.27736568, 0.06209648) * inp_3_1_1;
|
|
result3 += M4(-0.043066334, -0.2650026, 0.13015231, -0.021486249, -0.021243062, 0.12762718, -0.16992107, -0.017701978, -0.1198713, -0.14685233, 0.08095718, 0.016187131, 0.027274022, 0.17814746, -0.1705369, 0.037348423) * inp_3_2_1;
|
|
result3 += M4(-0.020911487, 0.012172589, 0.030367784, -0.19994633, 0.004376709, 0.09316046, -0.05491899, -0.12120255, 0.048024613, 0.055273246, -0.004435442, -0.00016166511, -0.22648303, -0.30258325, -0.01430347, -0.21556045) * inp_3_0_2;
|
|
result3 += M4(0.07303654, 0.16753973, -0.03591846, -0.018747568, 0.025375472, -0.26433265, 0.12862511, 0.02518775, -0.14287944, -0.17940018, -0.02466985, 0.11916799, -0.242104, -0.108683094, 0.055211473, -0.13809764) * inp_3_1_2;
|
|
result3 += M4(-0.026391529, -0.11112396, 0.030737549, -0.011259499, 0.023542786, -0.0055174553, -0.049245745, 0.027468752, 0.057162017, -0.12012449, -0.03709937, -0.0076213237, 0.0522986, 0.131559, 0.044747207, -0.006477326) * inp_3_2_2;
|
|
const ivec2 output_base = ivec2(gl_GlobalInvocationID) * ivec2(2, 2);
|
|
imageStore(out_image, output_base + ivec2(0, 0), max(result0, V4(0.0)));
|
|
imageStore(out_image, output_base + ivec2(1, 0), max(result1, V4(0.0)));
|
|
imageStore(out_image, output_base + ivec2(0, 1), max(result2, V4(0.0)));
|
|
imageStore(out_image, output_base + ivec2(1, 1), max(result3, V4(0.0)));
|
|
}
|
|
|
|
//!DESC ArtCNN C4F16 DS (Conv2D-5)
|
|
//!COMPUTE 24 32 12 16
|
|
//!HOOK LUMA
|
|
//!BIND conv2d_4
|
|
//!SAVE conv2d_5
|
|
//!WIDTH LUMA.w 2.0 *
|
|
//!HEIGHT LUMA.h 2.0 *
|
|
//!COMPONENTS 4
|
|
//!WHEN OUTPUT.w LUMA.w / 1.3 > OUTPUT.h LUMA.h / 1.3 > *
|
|
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
|
|
#ifdef GL_EXT_shader_explicit_arithmetic_types_float16
|
|
# define V4 f16vec4
|
|
# define M4 f16mat4
|
|
# define F float16_t
|
|
#else
|
|
# define V4 vec4
|
|
# define M4 mat4
|
|
# define F float
|
|
#endif
|
|
|
|
const ivec2 ksize = ivec2(3, 3);
|
|
const ivec2 offset = ksize / 2;
|
|
const ivec2 wg_size = ivec2(gl_WorkGroupSize);
|
|
const ivec2 isize = wg_size + ksize - 1;
|
|
shared V4 inp[4][isize.y][isize.x];
|
|
void hook() {
|
|
const uvec2 local_xy = gl_LocalInvocationID.xy;
|
|
ivec2 base = ivec2(gl_WorkGroupID) * wg_size;
|
|
for (uint y = local_xy.y; y < isize.y; y += wg_size.y) {
|
|
for (uint x = local_xy.x; x < isize.x; x += wg_size.x) {
|
|
const ivec2 input_base = (base + ivec2(x,y) - offset) * ivec2(2, 2);
|
|
inp[0][y][x] = V4(conv2d_4_mul * texelFetch(conv2d_4_raw, input_base + ivec2(0, 0), 0));
|
|
inp[1][y][x] = V4(conv2d_4_mul * texelFetch(conv2d_4_raw, input_base + ivec2(1, 0), 0));
|
|
inp[2][y][x] = V4(conv2d_4_mul * texelFetch(conv2d_4_raw, input_base + ivec2(0, 1), 0));
|
|
inp[3][y][x] = V4(conv2d_4_mul * texelFetch(conv2d_4_raw, input_base + ivec2(1, 1), 0));
|
|
}
|
|
}
|
|
|
|
barrier();
|
|
V4 result0 = V4(0.016554128, 0.0026448318, 0.013652129, -0.013316665);
|
|
V4 result1 = V4(0.0044759065, 0.006209177, -0.013305955, -0.0016899946);
|
|
V4 result2 = V4(0.009802427, 0.011932272, 0.0051619564, 0.0023012678);
|
|
V4 result3 = V4(-0.0024697422, 0.007943644, 0.033936143, -0.01641893);
|
|
const V4 inp_0_0_0 = inp[0][local_xy.y + 0][local_xy.x + 0];
|
|
const V4 inp_0_1_0 = inp[0][local_xy.y + 0][local_xy.x + 1];
|
|
const V4 inp_0_2_0 = inp[0][local_xy.y + 0][local_xy.x + 2];
|
|
const V4 inp_0_0_1 = inp[0][local_xy.y + 1][local_xy.x + 0];
|
|
const V4 inp_0_1_1 = inp[0][local_xy.y + 1][local_xy.x + 1];
|
|
const V4 inp_0_2_1 = inp[0][local_xy.y + 1][local_xy.x + 2];
|
|
const V4 inp_0_0_2 = inp[0][local_xy.y + 2][local_xy.x + 0];
|
|
const V4 inp_0_1_2 = inp[0][local_xy.y + 2][local_xy.x + 1];
|
|
const V4 inp_0_2_2 = inp[0][local_xy.y + 2][local_xy.x + 2];
|
|
result0 += M4(0.08474402, -0.22917373, -0.14206196, -0.010539577, -0.046236325, 0.0013173359, 0.060659897, -0.055962656, 0.08355562, 0.013202813, -0.12521583, 0.08384832, -0.18861641, -0.047398612, 0.15795282, -0.09313713) * inp_0_0_0;
|
|
result0 += M4(-0.461722, -0.94063747, -0.22885977, -0.35344717, 0.072716385, 0.059156727, 0.015389428, -0.030055461, -0.038264215, -0.033173826, 0.033325374, -0.025504412, -0.026117543, 0.051860537, 0.028839415, 0.04346096) * inp_0_1_0;
|
|
result0 += M4(0.5087371, -0.34765637, -0.6182863, -0.3837225, 0.15503408, 0.10887861, 0.10084102, -0.20092858, -0.06793961, -0.07963712, -0.0017726015, 0.00066041463, 0.001742279, 0.047694772, -0.02344845, 0.02737981) * inp_0_2_0;
|
|
result0 += M4(-0.009285821, 0.11000235, -0.031392742, 0.11225282, 0.05141218, 0.12055548, 0.082777604, -0.020320954, -0.06680321, -0.049921807, -0.04921422, -0.03562363, 0.06059489, 0.017469844, -0.09669497, -0.0026134958) * inp_0_0_1;
|
|
result0 += M4(0.14124833, 0.19630684, 0.1194282, 0.26702908, 0.18400134, 0.10256528, 0.4572562, 0.15124409, 0.3166469, -0.6715967, 0.014648809, -0.102525435, -0.5180954, 0.66877353, -0.019614657, -0.002470084) * inp_0_1_1;
|
|
result0 += M4(0.18672962, -0.017508673, -0.16939771, 0.047094487, -0.060122382, 0.29416057, -0.11709289, 0.025598915, -0.14421585, -0.11846913, 0.03920522, 0.10352751, 0.18691705, 0.1590042, 0.02073003, -0.11663061) * inp_0_2_1;
|
|
result0 += M4(0.0056254803, 0.0057388344, 0.0374958, -0.004648934, -0.043297503, 0.0689816, 0.027923174, -0.03876041, -0.06407546, -0.025186531, 0.16828129, 0.0034699684, 0.031381328, 0.023971673, -0.20254338, -0.02430013) * inp_0_0_2;
|
|
result0 += M4(0.15775095, -0.15862629, 0.23583956, 0.049442884, 0.15699472, 0.13332577, -0.2572897, -0.040875558, -0.0002704255, 0.02988306, -0.006920376, 0.03397873, -0.28567457, 0.042112727, -0.3724105, -0.023333224) * inp_0_1_2;
|
|
result0 += M4(-0.014858677, 0.032062486, -0.076343395, -0.00083215634, -0.10055656, 0.09093586, 0.17731445, -0.057983246, 0.12641528, 0.04933168, -0.023279864, 0.028127778, -0.26475126, 0.0478974, -0.015684502, -0.039201133) * inp_0_2_2;
|
|
result1 += M4(0.392307, 0.03353582, 0.07128573, 0.3369635, -0.051541477, -0.04448266, 0.04055149, -0.021615157, 0.048823766, 0.090479605, -0.0072554573, 0.09644649, -0.053131856, -0.1262546, 0.105549574, -0.06123314) * inp_0_0_0;
|
|
result1 += M4(-0.15502161, -0.46141383, -0.09890623, 1.527269, -0.28750673, -0.008933071, -0.15267625, -0.01864867, 0.074853286, -0.028399043, -0.11274298, 0.007967588, 0.031121511, 0.077898726, 0.049051795, 0.053952634) * inp_0_1_0;
|
|
result1 += M4(-0.83457303, -0.31433025, 0.7315325, 0.4259749, -0.19004081, -0.067098886, -0.45684782, -0.0074499478, 0.02546783, -0.028332973, -0.045866985, -0.005292344, -0.017153805, 0.010074401, 0.016775275, 0.041146822) * inp_0_2_0;
|
|
result1 += M4(0.09953403, -0.0014643228, 0.10400452, 0.0056565623, -0.18444373, 0.04270818, -0.011462894, -0.0616828, 0.03475264, -0.065023266, 0.11439611, -0.0103913285, -0.093443975, -0.04991801, -0.17465092, 0.060669404) * inp_0_0_1;
|
|
result1 += M4(0.205716, -0.007968987, -0.21627533, -0.040425707, -0.09754452, 0.018082334, -0.00956709, 0.0848001, 0.43502063, 0.15083931, -0.052782185, -0.07282074, -0.3702116, -0.16925444, 0.0008385632, 0.088931836) * inp_0_1_1;
|
|
result1 += M4(0.09588992, -0.20940185, 0.10019888, 0.08854741, -0.40017736, -0.10545165, 0.14159274, -0.13410604, -0.01952819, -0.10284345, -0.22501552, 0.039318863, -0.010106364, 0.1133827, 0.22189315, 0.088626176) * inp_0_2_1;
|
|
result1 += M4(0.009421535, 0.02838454, 0.029579297, 0.014318628, -0.10293239, -0.018695444, 0.07494177, -0.1269828, 0.013530136, 0.067722216, 0.12684286, -0.022022089, 0.05124597, -0.13029617, -0.14270568, -0.061728578) * inp_0_0_2;
|
|
result1 += M4(-0.050228037, 0.06668443, 0.14026366, 0.26039708, -0.13540196, -0.13174525, -0.24864364, -0.22513062, 0.02472696, -0.14155997, -0.08290799, -0.020995213, 0.007919788, -0.03275966, 0.023754396, -0.28459167) * inp_0_1_2;
|
|
result1 += M4(0.023433143, -0.012505106, -0.039584465, -0.079250574, -0.06416037, -0.038407087, 0.11237008, -0.1196359, -0.026753819, -0.028015738, 0.0019825825, -0.07621049, 0.095602036, -0.0055057956, -0.12222531, -0.142331) * inp_0_2_2;
|
|
result2 += M4(0.21173902, -0.13234012, -0.4541014, 0.061877493, 0.0068946453, 0.022941818, -0.056972146, 0.01937574, 0.011626747, -0.020704554, 0.014839189, 0.020399058, 0.064279646, 0.008993624, -0.119472414, 0.011998723) * inp_0_0_0;
|
|
result2 += M4(-0.58199537, 0.36287612, -1.4199176, -0.39885208, -0.22821763, -0.007374069, 0.086480334, -0.016935853, 0.019518664, -0.015480163, -0.08576964, -0.024676455, -0.010108951, 0.037544332, 0.14300823, 0.06268861) * inp_0_1_0;
|
|
result2 += M4(-0.50271744, 0.025554385, -0.5727786, -0.17825632, -0.37914005, 0.051398177, 0.15490708, 0.046360016, 0.10120417, 0.02269559, -0.009419459, -0.03541369, -0.07406952, 0.002039037, -0.0006407997, -0.009875078) * inp_0_2_0;
|
|
result2 += M4(0.03687174, -0.01882471, 0.03676522, 0.021757437, -0.079064965, -0.037517067, 0.0642965, 0.014232467, 0.08768756, 0.018420901, -0.020334335, -0.0012831561, -0.114168204, -0.01205998, -0.11075036, 0.029242856) * inp_0_0_1;
|
|
result2 += M4(-0.047860306, -0.09711567, -0.17504969, -0.15081553, -0.3049216, -0.12750636, 0.02584121, 0.2795914, 0.4429374, -0.13278486, -0.17154583, -0.021857448, -0.5705778, 0.24980383, -0.042743698, 0.31541702) * inp_0_1_1;
|
|
result2 += M4(-0.19484288, -0.038754307, -0.07002575, 0.0040455586, -0.29587415, -0.044193868, -0.23622102, 0.029706014, 0.013432993, 0.083246335, 0.04279412, -0.08268909, -0.08376078, -0.07014374, -0.07676366, 0.18538836) * inp_0_2_1;
|
|
result2 += M4(0.010501982, -0.0005035494, 0.0004140859, 0.011096164, -0.039264694, -0.0066616437, 0.00040288252, -0.05843721, 0.047616445, -0.011402459, 0.082378514, 0.1362359, -0.022662984, -0.057075776, -0.071417384, -0.14695801) * inp_0_0_2;
|
|
result2 += M4(0.050130617, 0.113135085, 0.058322534, -0.13720304, -0.14341824, -0.051400222, 0.0505717, -0.15685362, 0.045417894, 0.07490942, -0.0060528195, 0.10542394, -0.12937509, -0.13544343, -0.11515363, -0.15704823) * inp_0_1_2;
|
|
result2 += M4(0.027637873, -0.02083409, -0.06730315, 0.015425108, -0.18975328, -0.003320729, 0.17947742, -0.04795124, -0.10879897, 0.005358679, -0.039596763, -0.061216958, 0.10180654, -0.03081845, 0.0511258, 0.14519742) * inp_0_2_2;
|
|
result3 += M4(0.014618818, 0.47584814, -0.17367727, 0.07147966, 0.034681022, 0.04903415, -0.02405715, -0.056848254, 0.05098864, -0.010156212, -0.04675307, 0.072642356, -0.036260676, -0.046609793, 0.053831514, 0.052008834) * inp_0_0_0;
|
|
result3 += M4(-0.20621313, 0.83789265, -0.13280529, -0.06215082, 0.18894519, 0.21933381, -0.09273566, -0.10503467, 0.14807668, -0.032840773, 0.029420458, -0.021397702, -0.29949743, -0.0031396316, -0.0962584, 0.04149515) * inp_0_1_0;
|
|
result3 += M4(-0.1368257, 0.52912736, 0.2355702, -0.123453274, -0.121077135, 0.0942226, 0.038726307, -0.17363513, 0.052847683, 0.07187939, -0.025024164, -0.008812916, -0.0051011033, 0.0018807795, -0.007282722, 0.023519143) * inp_0_2_0;
|
|
result3 += M4(-0.025657542, -0.002634332, -0.05383891, 0.14213623, -0.017881516, 0.11927832, 0.045409627, -0.05097621, 0.11048047, -0.29666448, 0.0103217615, 0.021556247, -0.06984235, 0.20101684, -0.007980527, 0.09391604) * inp_0_0_1;
|
|
result3 += M4(0.15439488, -0.20458573, -0.26687387, -0.054746483, -0.103493385, 0.60655016, -0.005280907, -0.12099173, 0.42942765, 0.13243759, -0.077581435, 0.23282799, -0.452252, -0.14954813, 0.15053895, -0.13070299) * inp_0_1_1;
|
|
result3 += M4(-0.1478404, 0.003927139, -0.017289838, 0.14824122, 0.26184613, 0.33273512, -0.0047554015, -0.20846145, 0.11270909, 0.09684709, -0.118098505, 0.20708843, -0.27642372, -0.13249397, 0.17980523, -0.16313562) * inp_0_2_1;
|
|
result3 += M4(0.008574112, 0.05413337, 0.01967447, -0.011820304, 0.08030224, 0.09292828, 0.006041947, -0.039917428, 0.15336455, 0.048226394, 0.026484624, 0.010617635, -0.14265074, -0.08177329, -0.074674845, 0.03954346) * inp_0_0_2;
|
|
result3 += M4(-0.035967097, -0.051198352, -0.0147744715, -0.006058455, 0.018199014, 0.20261578, 0.02218917, 0.02864353, 0.20123912, -0.20021546, -0.10024832, 0.1425623, -0.01632511, 0.19673872, 0.11388972, -0.09187605) * inp_0_1_2;
|
|
result3 += M4(-0.051516704, -0.037184518, -9.7836884e-05, -0.033820845, 0.2095811, 0.2926858, -0.07219224, -0.043460887, 0.055939294, -0.018073456, -0.010785669, 0.07900805, -0.032115173, -0.0025410457, -0.030606363, -0.12241656) * inp_0_2_2;
|
|
const V4 inp_1_0_0 = inp[1][local_xy.y + 0][local_xy.x + 0];
|
|
const V4 inp_1_1_0 = inp[1][local_xy.y + 0][local_xy.x + 1];
|
|
const V4 inp_1_2_0 = inp[1][local_xy.y + 0][local_xy.x + 2];
|
|
const V4 inp_1_0_1 = inp[1][local_xy.y + 1][local_xy.x + 0];
|
|
const V4 inp_1_1_1 = inp[1][local_xy.y + 1][local_xy.x + 1];
|
|
const V4 inp_1_2_1 = inp[1][local_xy.y + 1][local_xy.x + 2];
|
|
const V4 inp_1_0_2 = inp[1][local_xy.y + 2][local_xy.x + 0];
|
|
const V4 inp_1_1_2 = inp[1][local_xy.y + 2][local_xy.x + 1];
|
|
const V4 inp_1_2_2 = inp[1][local_xy.y + 2][local_xy.x + 2];
|
|
result0 += M4(-0.0073166075, 0.045552894, -0.007951411, -0.023334121, 0.025722718, 0.064874984, -0.10358456, 0.04715832, 0.017686395, -0.041548744, -0.081770375, 0.039507207, 0.06641115, 0.023399614, -0.06840236, -0.0043447604) * inp_1_0_0;
|
|
result0 += M4(0.20941794, 0.09051338, -0.14984657, -0.074408896, -0.18479207, 0.33516723, 0.3928294, 0.14506263, -0.1711134, -0.17116591, 0.13062029, 0.07105616, 0.14903083, 0.049338274, -0.24173051, 0.004623255) * inp_1_1_0;
|
|
result0 += M4(0.07846061, 0.10122042, 0.0720976, 0.047867063, 0.14466134, 0.1670706, -0.08569032, -0.065952614, -0.18422887, -0.17921495, -0.14734626, 0.059171673, 0.03520425, -0.07958087, -0.019477837, -0.011459567) * inp_1_2_0;
|
|
result0 += M4(0.0014838969, 0.06830755, 0.06123817, 0.021657676, -0.024793208, 0.09558475, -0.031950496, 0.025265368, 0.03680227, -0.038717628, -0.1164916, -0.0011885002, -0.11559365, -0.093189925, -0.08363963, -0.04834404) * inp_1_0_1;
|
|
result0 += M4(0.40580162, 0.22511373, 0.0917264, 0.085031815, 0.36542302, 0.070314296, 0.27281147, 0.056904998, 0.3947338, -0.24021517, 0.30139968, -0.07091092, 0.6364601, -0.4326636, -0.09677784, 0.21916817) * inp_1_1_1;
|
|
result0 += M4(0.06285387, 0.063603476, 0.01333808, -0.11350428, -0.09640258, -0.01745298, 0.016524717, -0.084454134, -0.044837467, -0.05721756, 0.043622483, 0.046772335, -0.06767603, -0.054249935, 0.05514638, -0.031198056) * inp_1_2_1;
|
|
result0 += M4(0.0037593278, 0.058119446, 0.007772395, -0.045545187, -0.02386189, 0.033547785, 0.012052209, -0.014283093, -0.027987773, -0.008577758, 0.026769858, 0.01840433, -0.0116568385, -0.10390512, 0.18343972, -0.05923133) * inp_1_0_2;
|
|
result0 += M4(0.15633376, -0.024260672, 0.13842222, 0.09513615, 0.22078265, -0.086084336, 0.11120577, 0.02944383, 0.08267881, 0.040523216, 0.18060447, -0.055403788, -0.049887385, -0.15995465, 0.051882185, 0.027126146) * inp_1_1_2;
|
|
result0 += M4(0.08325832, -0.03183006, 0.017627114, 0.0028026348, 0.066393, -0.03619683, 0.08243465, 8.244323e-05, 0.00065900537, -0.042117447, 0.029968508, 0.022710547, 0.032688536, 0.0069056484, -0.10295506, -0.03543248) * inp_1_2_2;
|
|
result1 += M4(-0.057469558, 0.0059905495, -0.079491735, -0.03306272, 0.22499478, 0.122497976, -0.19126062, 0.06376063, 0.19904874, 0.107790366, -0.025252093, 0.021401089, -0.012737553, 0.05863985, -0.11861096, 0.05267514) * inp_1_0_0;
|
|
result1 += M4(-0.048021965, 0.009858718, 0.14495559, -0.15178134, -0.028742641, -0.1255706, -0.23628315, -0.099888876, 0.2575705, 0.158808, -0.14856035, -0.063091286, -0.063021936, -0.008114058, 0.19375907, -0.10753101) * inp_1_1_0;
|
|
result1 += M4(-0.11135972, -0.036761303, -0.0040923324, -0.089887746, -0.08558438, 0.1262421, 0.23975913, -0.08594608, 0.1670396, 0.06534481, 0.10865409, -0.103865735, 0.07964133, 0.008221018, 0.04624729, 0.017136093) * inp_1_2_0;
|
|
result1 += M4(-0.13777937, 0.076541856, 0.068616554, 0.054874852, 0.00012002991, 0.059381004, 0.0116826305, -0.06327681, 0.06994238, 0.03399867, -0.075316906, 0.07877732, 0.0399726, -0.0532057, 0.13116238, -0.09443346) * inp_1_0_1;
|
|
result1 += M4(-0.3907863, -0.08430246, -0.11643456, 0.2000658, 0.045306966, 0.17947012, -0.18730734, 0.055325676, 0.07598895, 0.40362227, 0.3625906, 0.3149084, -0.14925346, 0.33247408, 0.2575065, -0.30229175) * inp_1_1_1;
|
|
result1 += M4(-0.11553566, -0.23831218, -0.09813154, -0.06359181, 0.0940421, 0.07896055, 0.16879584, 0.21001212, 0.04249928, 0.12710789, 0.18103158, 0.119974695, -0.073143214, -0.22413246, -0.21876724, 0.1720369) * inp_1_2_1;
|
|
result1 += M4(-0.07137546, -0.03094973, 0.003292551, -0.041568562, -0.031684592, -0.016679576, 0.044499274, -0.052121527, 0.0154636195, 0.023929989, 0.0068467464, 0.0066959793, -0.0034232545, 0.07880061, 0.08291338, 0.064238854) * inp_1_0_2;
|
|
result1 += M4(-0.014572152, -0.013946868, -0.019180208, 0.11636705, -0.030346757, 0.12613934, 0.03705543, 0.0452622, -0.023375073, 0.08974235, -0.008709684, -0.044204757, -0.18795705, -0.18832888, 0.1379181, 0.13087544) * inp_1_1_2;
|
|
result1 += M4(-0.057705898, -0.031566918, 0.0013814529, 0.09302121, 0.008427629, 0.020180352, 0.05593306, 0.1297789, 0.008376333, -0.005297203, 0.021587873, 0.04491888, -0.0134414965, 0.0054050824, -0.057093758, -0.22655176) * inp_1_2_2;
|
|
result2 += M4(-0.080743715, 0.017110262, 0.085858025, -0.049398597, 0.14745821, 0.048592985, 0.12289114, 0.009377926, 0.1671251, -0.11968603, 0.055208463, -0.030808775, -0.052694857, -0.050542895, 0.108538054, -0.03228694) * inp_1_0_0;
|
|
result2 += M4(0.115866296, -0.07366202, 0.042411987, 0.08331246, -0.25421214, 0.24341306, 0.010557693, -0.102649584, 0.10870931, -0.12877654, -0.04819396, -0.34243214, 0.11449533, -0.12116078, -0.03230502, -0.02296135) * inp_1_1_0;
|
|
result2 += M4(-0.16150624, -0.058298904, 0.069828205, -0.0060394495, -0.05517869, -0.04788711, 0.08831893, -0.047414172, 0.20191658, 0.10358992, -0.0038464211, -0.121002756, 0.031794656, -0.010044565, -0.08703155, 0.015908001) * inp_1_2_0;
|
|
result2 += M4(-0.012656746, 0.081062734, 0.093692675, 0.1958319, 0.1272179, -0.012527262, 0.10199331, 0.18368192, 0.043714743, 0.018916927, 0.027921028, -0.11653393, 0.060847517, -0.062236406, -0.019688752, 0.0077735567) * inp_1_0_1;
|
|
result2 += M4(0.17710909, -0.07107738, 0.08273162, 0.0046328073, -0.08467732, 0.10024806, 0.047003683, -0.123511486, -0.015360459, 0.102026045, -0.08240464, 0.029658364, 0.4172425, -0.14342386, 0.1480498, -0.18634751) * inp_1_1_1;
|
|
result2 += M4(-0.12391618, -0.05871014, 0.040260173, -0.025818344, 0.012931218, -0.04979273, -0.001939683, -0.03158152, 0.056002043, -0.016798038, -0.050334938, 0.03402695, -0.079693966, 0.025994321, 0.020700725, 0.10635983) * inp_1_2_1;
|
|
result2 += M4(-0.026964232, -0.022047423, 0.06918982, 0.048541494, -0.038205724, 0.008799015, -0.0026270822, -0.026919778, -0.010267494, 0.013637323, 0.014320703, 0.04809512, 0.026248137, -0.032727137, 0.054396283, 0.028740866) * inp_1_0_2;
|
|
result2 += M4(-0.029081121, 0.04486759, 0.09537123, -0.04666429, -0.0030022755, 0.01980827, 0.11681267, 0.10332237, -0.05755216, -0.023530334, 0.10857501, -0.014354214, 0.021850962, 0.10997835, 0.096964404, -0.21275812) * inp_1_1_2;
|
|
result2 += M4(-0.021180212, 0.018354837, -0.014735963, -0.06836179, 0.032596614, 0.0017078082, -0.007971494, -0.026358705, 0.047004454, 0.0073685586, 0.05836899, -0.0055675255, 0.004838257, 0.0485349, -0.048474196, -0.04883732) * inp_1_2_2;
|
|
result3 += M4(0.03244459, -0.034479953, -0.0064135827, -0.08737613, -0.089818925, 0.04632484, -0.10045086, 0.008881872, -0.13907796, -0.062879585, -0.012990814, 0.15110466, 0.038008627, 0.04984243, 0.044478714, 0.009779906) * inp_1_0_0;
|
|
result3 += M4(0.1842782, -0.008685026, -0.042807627, -0.08820507, -0.50758076, 0.2103546, 0.11072308, 0.06511318, -0.3334045, 0.20163654, -0.029536664, 0.011301137, 0.4022046, -0.04273883, -0.026847176, -0.039757486) * inp_1_1_0;
|
|
result3 += M4(0.009022588, -0.023068365, -0.027991936, 0.00427787, -0.32369635, -0.11838789, -0.06324538, -0.16311373, -0.08104427, -0.026818093, -0.101193525, 0.1561913, 0.0131432135, -0.059175897, 0.03514937, 0.025877465) * inp_1_2_0;
|
|
result3 += M4(0.20467058, 0.112527, 0.011012977, -0.028550608, -0.08784855, -0.062379327, -0.021806065, -0.011393705, -0.16238952, -0.26693502, 0.06391084, 0.06478358, 0.1172006, -0.24499363, 0.039030664, 0.05566198) * inp_1_0_1;
|
|
result3 += M4(0.6883158, -0.3521105, -0.096033536, -0.011642683, -0.68677497, 0.17748499, -0.16916241, 0.038282525, -0.26609233, -0.10134399, 0.25453985, 0.06322925, 0.80599284, -0.20447275, 0.096553475, -0.02597967) * inp_1_1_1;
|
|
result3 += M4(0.38438025, 0.14472944, 0.086811066, -0.13257246, 0.028960865, 0.0317766, 0.045688264, -0.23838268, -0.12198645, -0.036164884, -0.041628513, -0.010525341, 0.2980953, 0.18289775, 0.092270166, 0.09970921) * inp_1_2_1;
|
|
result3 += M4(0.046299033, -0.0038731368, -0.00625676, -0.009460825, 0.00062204845, 0.047890205, -0.0041801203, -0.028481482, -0.0037050957, 0.039239805, -0.008649513, -0.03477906, 0.12597682, 0.08933818, 0.042497773, -0.06470036) * inp_1_0_2;
|
|
result3 += M4(0.14617918, 0.005229593, -0.0501567, 0.069478005, -0.31181592, -0.10519494, -0.029231973, 0.06010956, -0.13888495, 0.12517981, -0.00579049, -0.09823023, 0.6077278, 0.055234488, 0.078748465, 0.022114666) * inp_1_1_2;
|
|
result3 += M4(0.05200068, 0.055454627, 0.02337076, 0.011462933, -0.006175603, 0.057744995, 0.012468431, 0.05055063, -0.082297675, 0.006953254, -0.024784455, 0.013913281, -0.041024067, -0.06821284, -0.005379725, 0.025868118) * inp_1_2_2;
|
|
const V4 inp_2_0_0 = inp[2][local_xy.y + 0][local_xy.x + 0];
|
|
const V4 inp_2_1_0 = inp[2][local_xy.y + 0][local_xy.x + 1];
|
|
const V4 inp_2_2_0 = inp[2][local_xy.y + 0][local_xy.x + 2];
|
|
const V4 inp_2_0_1 = inp[2][local_xy.y + 1][local_xy.x + 0];
|
|
const V4 inp_2_1_1 = inp[2][local_xy.y + 1][local_xy.x + 1];
|
|
const V4 inp_2_2_1 = inp[2][local_xy.y + 1][local_xy.x + 2];
|
|
const V4 inp_2_0_2 = inp[2][local_xy.y + 2][local_xy.x + 0];
|
|
const V4 inp_2_1_2 = inp[2][local_xy.y + 2][local_xy.x + 1];
|
|
const V4 inp_2_2_2 = inp[2][local_xy.y + 2][local_xy.x + 2];
|
|
result0 += M4(0.03519386, 0.020771546, -0.06092822, 0.06694439, 0.009966364, 0.08845796, -0.087970845, -0.036957614, -0.22353762, 0.054692257, 0.32780975, 0.0006839374, -0.01862221, 0.11244745, 0.049653698, -0.040946584) * inp_2_0_0;
|
|
result0 += M4(-3.3792534e-05, -0.037827175, -0.0021955103, 0.038683914, -0.027731497, -0.008400333, -0.039806634, -0.062449142, -0.16357751, 0.046085265, -0.077466585, 0.17157899, 0.16051912, -0.009569097, -0.15054378, 0.07224473) * inp_2_1_0;
|
|
result0 += M4(-0.12121477, 0.00741661, 0.049274296, 0.0367287, -0.046093628, 0.040898, 0.05136005, 0.08081049, 0.012140579, -0.037353013, -0.07112558, -0.034909766, 0.16167422, 0.03786511, -0.015378625, -0.01624157) * inp_2_2_0;
|
|
result0 += M4(-0.0663365, -0.06115137, 0.09096096, -0.031260535, -0.025968976, -0.010863611, -0.016588423, 0.012816596, -0.736851, 0.15209731, 0.10394599, -0.672264, 0.15394175, -0.23876093, -0.00424517, -0.08416222) * inp_2_0_1;
|
|
result0 += M4(-0.040628262, -0.18910486, 0.24239047, -0.02539226, -0.032604747, -0.0074777473, 0.021508304, 0.010910954, -0.07856097, 0.11829924, -0.09963228, 0.20279798, -0.11184747, 0.20169383, -0.12306833, 0.0010044349) * inp_2_1_1;
|
|
result0 += M4(0.06593747, -0.021727772, -0.078049734, -0.027246097, 0.042461947, 0.020668674, -0.07633065, 0.008510407, -0.059274238, 0.0015911838, 0.119982466, 0.009135035, 0.109331414, 0.22766785, 0.09203822, 0.021122389) * inp_2_2_1;
|
|
result0 += M4(0.02380947, 0.061284587, -0.05441463, 0.0042443695, 0.0030761897, -0.015208306, 0.078272484, 0.07259532, -0.69980097, 0.053883918, -0.66474944, -0.28192675, -0.0073506925, -0.02169072, -0.0030826845, 0.056019828) * inp_2_0_2;
|
|
result0 += M4(-0.09168379, 0.1576644, -0.21850447, -0.02639354, -0.057879586, 0.040835056, 0.04107616, -0.051199373, 0.14163044, -0.041191746, 0.00058892305, 0.18929687, 0.15363866, 0.0118714785, 0.045324255, 0.059029303) * inp_2_1_2;
|
|
result0 += M4(-0.05409983, 0.035369724, -0.054201934, -0.029860638, 0.07611687, 0.03037094, -0.017978035, -0.02643063, 0.054626707, -0.053077925, -0.07135389, 0.021989368, 0.27177697, -0.120982274, 0.05465457, -0.0055884663) * inp_2_2_2;
|
|
result1 += M4(0.03493821, 0.043080125, 0.0035047126, 0.03849063, 0.06393502, 0.080511086, 0.016567344, -0.04203288, -0.13447708, -0.18894845, -0.019703919, -0.38417652, -0.23795354, -0.05671812, -0.116186336, -0.19162156) * inp_2_0_0;
|
|
result1 += M4(0.11228363, 0.03397793, -0.07902017, 0.058452863, -0.06233874, 0.006367689, -0.015540989, -0.04156123, 0.08450284, 0.062186997, 0.11030411, 0.25390634, 0.07090044, -0.031088028, 0.26935562, -0.20305486) * inp_2_1_0;
|
|
result1 += M4(0.0050216704, 0.007666345, -0.09408847, -0.027508035, 0.07744078, -0.0858829, -0.027486075, 0.08274661, 0.006676095, -0.013881031, 0.04249068, 0.069553934, -0.041518852, -0.005108007, 0.036308933, 0.0030829133) * inp_2_2_0;
|
|
result1 += M4(0.033124767, 0.02691598, 0.12422652, 0.022402922, 0.003461032, 0.0834087, 0.0049670525, -0.08223503, 0.19833384, -0.39163148, -0.0032815367, -1.0814372, 0.12159805, -0.010273577, -0.23912533, 0.37317404) * inp_2_0_1;
|
|
result1 += M4(-0.08536951, -0.17612375, 0.05276799, -0.20962986, -0.08263536, -0.052930966, 0.00743057, -0.029009147, 0.10561041, 0.2818868, -0.29869884, -0.27491766, 0.05908428, 0.30368373, -0.26572517, 0.9404553) * inp_2_1_1;
|
|
result1 += M4(0.0016916043, 0.028522108, 0.07193734, 0.056363445, -0.046301708, -0.092516795, -0.08134731, 0.077689245, 0.040148728, 0.035003696, -0.07245444, 0.033462606, -0.0028245777, 0.053184345, -0.09853116, -0.10275217) * inp_2_2_1;
|
|
result1 += M4(-0.10386956, 0.036102064, -0.04801716, 0.024403296, 0.03979467, -0.010715334, -0.092453316, 0.009072639, -0.14853695, -0.4095759, -0.13726044, -1.181292, 0.12713557, -0.049974903, -0.0067785005, -0.025925767) * inp_2_0_2;
|
|
result1 += M4(0.17170316, -0.13200247, -0.10278675, -0.21036159, 0.010567157, -0.06862603, 0.044727184, -0.06545584, 0.062359527, 0.089575015, 0.020037299, 0.030505616, -0.14575754, 0.23602366, -0.0005095541, 0.34895372) * inp_2_1_2;
|
|
result1 += M4(0.043262783, 0.0039280364, -0.03025543, -0.03344463, -0.04560934, -0.034239992, 0.004803988, -0.03988103, -0.015801929, -0.03631273, 0.07549837, -0.050647344, -0.19037472, -0.051564816, 0.18334927, 0.317741) * inp_2_2_2;
|
|
result2 += M4(0.017939113, 0.033808067, 0.013337525, 0.013338571, -0.026104536, 0.0021523843, 0.063422084, -0.008435395, 0.0014796632, -0.01593725, 0.3626783, 0.053695522, -0.23224196, -0.02838407, 0.19632407, -0.10586231) * inp_2_0_0;
|
|
result2 += M4(0.07115929, -0.10127339, -0.08562357, 0.014140073, -0.08980039, -0.05160459, 0.07157348, 0.068261, 0.023636142, 0.06479512, 0.12661268, 0.021240188, 0.06230698, -0.04328473, -0.22837892, -0.001120956) * inp_2_1_0;
|
|
result2 += M4(0.035445914, 0.044792846, 0.043199282, -0.058268394, -0.025570352, -0.07725152, 0.06447988, 0.08822611, -0.01431678, -0.0058982414, -0.09265362, 0.008680186, -0.13813685, -0.031577934, -0.07259829, 0.031110931) * inp_2_2_0;
|
|
result2 += M4(0.046934675, -0.046772964, -0.079469174, -0.041171886, 0.08315339, -0.030660095, -0.046969607, -0.044603206, 0.1088084, -0.3271513, 0.4827227, -0.00073664036, -0.11883226, -0.04861558, -0.107335076, 0.019172063) * inp_2_0_1;
|
|
result2 += M4(-0.06326496, 0.17220448, 0.18118903, -0.07713064, 0.05416083, 0.027322814, 0.09561443, 0.083409004, -0.15603146, -0.080915086, -0.32056457, 0.29618284, 0.6779934, 0.23570892, 0.05733165, -0.0550815) * inp_2_1_1;
|
|
result2 += M4(-0.09341537, -0.012737399, -0.11717243, 0.13527763, -0.03188427, 0.081400484, -0.08873576, -0.08301504, 0.059625685, 0.020269057, 0.010163222, 0.01648694, -0.023158077, -0.10110675, -0.03397466, -0.03787835) * inp_2_2_1;
|
|
result2 += M4(0.009182551, 0.020346174, 0.022581046, -0.058744755, 0.024768678, -0.057315476, 0.002723283, 0.02727885, -0.10255867, -0.24883124, 0.11087838, 0.20040388, -0.022830166, -0.020115368, -0.038202837, 0.094623245) * inp_2_0_2;
|
|
result2 += M4(0.028593393, -0.009744626, -0.15439138, 0.18110561, -0.058348615, 0.08616409, -0.04128328, 0.043372136, -0.19122921, -0.026798584, 0.0474954, -0.2528174, -0.101200946, -0.062434033, 0.03201408, 0.057487406) * inp_2_1_2;
|
|
result2 += M4(-0.0013166185, -0.018694969, -0.06734548, 0.06859655, 0.08430593, -0.01940095, 0.041605387, -0.055890627, -0.026176976, 0.011013619, 0.008539067, -0.019987924, -0.05475008, 0.027200852, 0.06283524, -0.2617577) * inp_2_2_2;
|
|
result3 += M4(0.013725179, -0.061932757, 0.045672245, 0.050379913, 0.0788672, -0.07998998, -0.016853781, -0.083918825, -0.15966989, 0.019984895, 0.0062870965, 0.28606316, 0.04915436, -0.011288526, 0.014948044, -0.07217862) * inp_2_0_0;
|
|
result3 += M4(0.099728964, 0.019777179, -0.05474517, 0.07847718, 0.08612672, -0.0842426, -0.060081817, 0.042335063, -0.08624977, 0.019552974, -0.09990619, 0.13993576, 0.2796346, -0.19697501, -0.035683982, -0.064266436) * inp_2_1_0;
|
|
result3 += M4(0.04046714, 0.044116072, -0.02477698, 0.046863623, -0.023625096, -0.05061273, 0.097901285, 0.04918668, 0.05531759, -0.0047716047, 0.03271952, 0.012900423, 0.010711064, -0.06245105, 0.038325533, -0.013503033) * inp_2_2_0;
|
|
result3 += M4(-0.016706655, -0.025500009, -0.002248052, -0.033774734, -0.079098985, -0.0051736236, -0.03546944, 0.07374924, -0.808723, -0.54684496, 0.05268168, 0.43765536, -0.08996461, 0.09093547, -0.0009905121, -0.0023711629) * inp_2_0_1;
|
|
result3 += M4(-0.14838111, -0.09827265, 0.2247343, -0.07767093, -0.08234698, 0.017499017, 0.048514254, 0.041478686, 0.20503302, -0.06761944, -0.26098138, 0.22514617, 0.19708426, 0.43080166, -0.17789333, -0.005553808) * inp_2_1_1;
|
|
result3 += M4(-0.09141383, -0.05565759, 0.052848406, 0.07877255, 0.092179105, 0.024564177, -0.010492621, 0.03952363, -0.011052075, -0.020905806, -0.014796553, -0.010458158, 0.16574955, 0.017756697, -0.112289995, -0.19061698) * inp_2_2_1;
|
|
result3 += M4(-0.012942791, -0.09701864, 0.039916683, 0.014046996, 0.02276102, -0.038104624, -0.05454621, -0.017268764, 0.14468248, 0.22772491, -0.13311438, 0.021296846, 0.018167509, 0.07592232, -0.043419812, 0.070708245) * inp_2_0_2;
|
|
result3 += M4(0.027744744, -0.04099133, -0.047819644, 0.0120844925, -0.033083685, -0.08534956, -0.07539677, -0.057747744, -0.0737084, -0.09107881, -0.0014842128, 0.4171075, -0.2807641, -0.14693853, 0.020089995, 0.12872282) * inp_2_1_2;
|
|
result3 += M4(-0.02645917, -0.036230065, 0.0013453881, -0.053119272, -0.04390945, -0.07620047, -0.061788194, 0.100186884, -0.021604344, 0.018383984, -0.006580493, 0.07360969, 0.06294429, 0.114074826, 0.0922581, 0.10860966) * inp_2_2_2;
|
|
const V4 inp_3_0_0 = inp[3][local_xy.y + 0][local_xy.x + 0];
|
|
const V4 inp_3_1_0 = inp[3][local_xy.y + 0][local_xy.x + 1];
|
|
const V4 inp_3_2_0 = inp[3][local_xy.y + 0][local_xy.x + 2];
|
|
const V4 inp_3_0_1 = inp[3][local_xy.y + 1][local_xy.x + 0];
|
|
const V4 inp_3_1_1 = inp[3][local_xy.y + 1][local_xy.x + 1];
|
|
const V4 inp_3_2_1 = inp[3][local_xy.y + 1][local_xy.x + 2];
|
|
const V4 inp_3_0_2 = inp[3][local_xy.y + 2][local_xy.x + 0];
|
|
const V4 inp_3_1_2 = inp[3][local_xy.y + 2][local_xy.x + 1];
|
|
const V4 inp_3_2_2 = inp[3][local_xy.y + 2][local_xy.x + 2];
|
|
result0 += M4(0.19773234, 0.0040025134, -0.19928102, 0.05982445, 0.10238352, -0.04136469, -0.10717808, 0.11060676, -0.024479374, 0.006888121, -0.049817488, -0.032475132, 0.0005060402, 0.027150108, -0.01389521, 0.03011166) * inp_3_0_0;
|
|
result0 += M4(-0.10382486, 0.016261084, 0.10074889, -0.036058605, -0.28665686, -0.22176702, 0.16188395, 0.072151855, 0.16958801, 0.19100334, -0.07586979, -0.008691376, -0.15168019, -0.076181546, 0.10785355, -0.044232473) * inp_3_1_0;
|
|
result0 += M4(-0.034232676, 0.12174497, 0.12540244, -0.00615272, -0.104418986, -0.1441321, -0.061950747, -0.039816953, -0.020058908, 0.09946141, 0.06271718, 0.03362542, -0.081220925, -0.07135675, -0.09202891, 0.031732865) * inp_3_2_0;
|
|
result0 += M4(-0.019257972, 0.23448941, 0.2296378, -0.013084976, 0.14259407, -0.23327808, 0.18332508, -0.0981734, -0.13848236, 0.21527474, -0.18854348, 0.03302416, -0.11364098, -0.062779635, -0.0003776771, 0.056423634) * inp_3_0_1;
|
|
result0 += M4(0.39262342, -0.60916656, 0.11890576, 0.008154147, -0.25903428, -0.6944411, -0.30703035, -0.26949775, 0.33802876, 0.5117939, 0.30353963, 0.22217596, -0.3185454, -0.19438665, -0.38905796, 0.20958135) * inp_3_1_1;
|
|
result0 += M4(-0.31000933, -0.095929846, -0.09873588, 0.0526352, -0.13051024, -0.06996045, 0.05097093, 0.12195579, 0.16819753, 0.06406409, -0.037765402, -0.05460069, -0.025167787, -0.6610153, -0.1280548, -0.18461606) * inp_3_2_1;
|
|
result0 += M4(-0.07626507, -0.027585149, 0.16430154, -0.068786874, -0.11812134, -0.20239244, 0.20651695, 0.117651924, 0.103023574, 0.06551238, -0.17495835, -0.053802766, 0.02019847, 0.042211883, -0.0792073, 0.04661087) * inp_3_0_2;
|
|
result0 += M4(0.18774892, -0.22588675, 0.11697573, -0.02098111, -0.098473765, -0.12450782, -0.2092941, -0.0026268938, 0.19874147, 0.13118155, 0.1781314, -0.00842974, -0.031992726, -0.05670131, 0.20423128, -0.00074609526) * inp_3_1_2;
|
|
result0 += M4(-0.06835169, 0.016473854, 0.029923515, -0.008443641, 0.23226224, -0.08359965, 0.1304451, 0.040005654, -0.050561354, 0.048547532, 0.04456618, 0.016158111, 0.062703654, 0.0092108315, -0.10469197, -0.06267775) * inp_3_2_2;
|
|
result1 += M4(0.15233582, 0.19007146, -0.041311294, 0.11373788, 0.11148112, 0.04161408, 0.022334483, 0.092816405, 0.028468959, 0.026755488, 0.008363913, 0.06473331, 0.0011780987, -0.019047763, 0.09523404, -0.08244701) * inp_3_0_0;
|
|
result1 += M4(-0.0098125795, -0.04201503, -0.1326031, -0.08045925, 0.20360228, 0.026760735, -0.26419288, 0.15499987, 0.056399953, 0.14250176, 0.028358502, -0.033754945, 0.12579347, -0.052264683, 0.2556933, 0.18257244) * inp_3_1_0;
|
|
result1 += M4(-0.095059596, 0.021393813, -0.026516682, -0.007583113, 0.048850205, -0.046467386, 0.030013174, 0.10140715, 0.05695302, 0.089559756, 0.021871507, -0.014836824, 0.1737422, 0.13004734, 0.20477152, 0.23618454) * inp_3_2_0;
|
|
result1 += M4(0.05689886, 0.1384205, 0.055440158, -0.2634709, 0.40263698, -0.018188346, -0.19718939, -0.021893239, -0.25167263, -0.0302215, 0.1494233, -0.060639318, 0.17440826, -0.005995121, 0.1752686, 0.08358246) * inp_3_0_1;
|
|
result1 += M4(-0.11837638, -0.27371556, 0.20988613, -0.92515045, 0.7948933, 0.19478232, 0.08756208, 0.09321127, -0.6920545, -0.11605966, -0.0602716, -0.47996294, 0.5221947, -0.036128834, -0.085451365, -0.52814066) * inp_3_1_1;
|
|
result1 += M4(0.09521452, -0.08006706, -0.08897774, -0.21125159, -0.03079481, 0.046952967, -0.026647106, -0.047232367, -0.124124646, 0.033768218, 0.052450426, -0.12203157, 0.47032583, -0.22275928, -0.102736816, 0.026622942) * inp_3_2_1;
|
|
result1 += M4(-0.07079185, 0.14451918, 0.17062652, 0.025385572, 0.22244516, 0.034709886, 0.17516658, -0.033176485, -0.19369905, -0.020304324, -0.14346293, 0.16571403, 0.049146105, 0.019382618, -0.058748543, 0.12871866) * inp_3_0_2;
|
|
result1 += M4(0.102248736, -0.0337236, 0.0071350574, -0.03248206, 0.06347463, -0.041544788, -0.1049145, 0.010971773, -0.085705966, 0.18427896, 0.17459114, 0.3247006, 0.20611103, 0.119550996, 0.08841569, 0.15119329) * inp_3_1_2;
|
|
result1 += M4(0.17931168, 0.16252843, -0.020840246, -0.08419524, -0.12440272, -0.006738672, 0.14970261, 0.157197, 0.039103635, 0.099136084, -0.07311536, 0.09651158, 0.05364095, -0.15176328, -0.13609104, -0.0833755) * inp_3_2_2;
|
|
result2 += M4(0.090288036, -0.055971667, 0.1480006, 0.04578793, 0.09695163, -0.03401928, -0.05294258, 0.07264915, 0.034997385, 0.033377692, -0.012243337, 0.006176055, 0.0420061, 0.0017590974, 0.059313867, 0.055868845) * inp_3_0_0;
|
|
result2 += M4(0.12215243, 0.049981244, 0.078155525, -0.03721692, 0.00081549905, 0.070513256, 0.023916868, -0.068173625, 0.21320094, -0.0793969, 0.12491556, 0.06683648, 0.12349867, 0.09697064, -0.19347167, 0.16261415) * inp_3_1_0;
|
|
result2 += M4(0.0036620256, -0.008251636, 0.16044019, 0.0038811674, 0.17711705, 0.056581743, -0.059190765, 0.00294404, 0.024791382, 0.013117927, 0.1321051, -0.023086015, 0.21306372, -0.060495336, -0.06985338, 0.12514174) * inp_3_2_0;
|
|
result2 += M4(0.0312112, -0.11488951, 0.31389415, -0.11953462, -0.068046354, -0.14420296, 0.12760502, -0.09679173, 0.08755888, 0.07094847, -0.095602505, -0.016698444, 0.012757536, 0.045577306, -0.10753733, -0.042271823) * inp_3_0_1;
|
|
result2 += M4(-0.49250764, -0.12012084, 0.2342404, -0.6256464, 0.367146, 0.14924204, 0.6591491, 0.048179522, -0.35884148, -0.1678469, -0.3900169, -0.30488762, -0.22384477, 0.004431448, -0.00562058, -0.24071592) * inp_3_1_1;
|
|
result2 += M4(0.12204024, 0.074117884, 0.12283045, -0.07810565, 0.13246378, 0.08325109, 0.22934341, -0.2973156, -0.043851215, -0.085699715, -0.25161895, 0.020704068, 0.21119992, 0.04622388, -0.055380527, 0.14471737) * inp_3_2_1;
|
|
result2 += M4(-0.074895434, -0.06270641, 0.060614184, 0.13333398, -0.035534456, -0.018466184, -0.10737744, 0.14484845, 0.12699716, 0.07270939, 0.10774756, 4.7753732e-05, 0.14590403, 0.03653374, -0.01690645, 0.05341992) * inp_3_0_2;
|
|
result2 += M4(0.165294, 0.12768765, 0.16723107, -0.12056439, 0.12686896, -0.030246196, -0.15657917, 0.07142477, -0.0709559, -0.008950173, -0.036407184, -0.078462794, 0.22401553, 0.11724086, -0.09732008, 0.26752585) * inp_3_1_2;
|
|
result2 += M4(0.05091784, -0.0036859773, -0.025390401, 0.22719525, -0.06461589, 0.036899187, 0.099762656, -0.20599271, 0.054967627, -0.008055024, -0.031206395, 0.106771916, 0.056380093, 0.058899213, -0.121471696, -0.055402637) * inp_3_2_2;
|
|
result3 += M4(-0.026850104, 0.032930214, -0.07506575, 0.029188028, 0.028521763, 0.099263564, -0.015998442, 0.15630326, 0.006329006, -0.053651378, 0.022603864, -0.027682748, 0.012513269, -0.03688965, 0.009188487, 0.066707715) * inp_3_0_0;
|
|
result3 += M4(0.13048565, 0.1046288, 0.059150916, 0.046943232, -0.12111611, -0.058344387, 0.06613853, 0.15171036, -0.026610931, 0.059549157, -0.0065440265, -0.030438675, -0.3000826, -0.10964842, -0.019215975, 0.042909347) * inp_3_1_0;
|
|
result3 += M4(-0.14147408, 0.020621499, -0.061839666, -0.0786287, 0.05444145, 0.09064469, 0.051645663, 0.02046913, -0.12140889, -0.06441972, -0.14962567, -0.07454032, -0.21723782, -0.12441716, 0.0063856477, 0.07751481) * inp_3_2_0;
|
|
result3 += M4(-0.15052302, 0.12364792, -0.07333968, -0.106281765, -0.26465237, 0.18124165, -0.15878557, -0.0863266, 0.28953597, -0.104721405, 0.010609605, 0.00013846748, 0.023336895, -0.09811715, -0.062712066, 0.15814303) * inp_3_0_1;
|
|
result3 += M4(-0.1505338, -0.25127456, -0.010185877, 0.10018608, -0.46148112, 0.7820113, -0.2994022, 0.16474903, 0.393985, -0.53803116, 0.2856222, -0.1782298, 0.10528467, -0.16190316, 0.018561272, 0.008876788) * inp_3_1_1;
|
|
result3 += M4(0.16577095, 0.07553432, 0.074692585, 0.05206232, -0.087388374, 0.090719454, -0.25296533, 0.09053648, 0.20446649, 0.0515907, 0.13263685, -0.122028425, -0.04356239, -0.15438107, 0.17627592, 0.03334808) * inp_3_2_1;
|
|
result3 += M4(0.038740914, 0.18525128, 0.04935017, -0.058124084, 0.054568756, 0.21678539, -0.08873143, 0.01656806, 0.026606297, -0.4565078, 0.09821042, 0.04551522, -0.059372384, -0.22013853, 0.015601125, 0.093543395) * inp_3_0_2;
|
|
result3 += M4(-0.0804898, 0.0008990494, -0.030963572, 0.12500906, -0.104006045, -0.014657448, 0.022204293, 0.12654208, 0.06328144, -0.09054795, 0.096949615, -0.06529515, 0.06216197, -0.20189077, -0.014465873, -0.061704617) * inp_3_1_2;
|
|
result3 += M4(-0.16651894, -0.15619473, -0.038332544, 0.05948654, 0.04223932, 0.110509776, 0.015117032, 0.13600627, -0.161015, -0.1257106, -0.037504923, -0.059075683, 0.09092152, -0.022672275, 0.017996153, -0.10931976) * inp_3_2_2;
|
|
const ivec2 output_base = ivec2(gl_GlobalInvocationID) * ivec2(2, 2);
|
|
imageStore(out_image, output_base + ivec2(0, 0), result0);
|
|
imageStore(out_image, output_base + ivec2(1, 0), result1);
|
|
imageStore(out_image, output_base + ivec2(0, 1), result2);
|
|
imageStore(out_image, output_base + ivec2(1, 1), result3);
|
|
}
|
|
|
|
//!DESC ArtCNN C4F16 DS (Conv2D-6)
|
|
//!COMPUTE 12 16 12 16
|
|
//!HOOK LUMA
|
|
//!BIND conv2d
|
|
//!BIND conv2d_5
|
|
//!SAVE conv2d_6
|
|
//!WIDTH LUMA.w 1.0 *
|
|
//!HEIGHT LUMA.h 1.0 *
|
|
//!COMPONENTS 4
|
|
//!WHEN OUTPUT.w LUMA.w / 1.3 > OUTPUT.h LUMA.h / 1.3 > *
|
|
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
|
|
#ifdef GL_EXT_shader_explicit_arithmetic_types_float16
|
|
# define V4 f16vec4
|
|
# define M4 f16mat4
|
|
# define F float16_t
|
|
#else
|
|
# define V4 vec4
|
|
# define M4 mat4
|
|
# define F float
|
|
#endif
|
|
|
|
const ivec2 ksize = ivec2(3, 3);
|
|
const ivec2 offset = ksize / 2;
|
|
const ivec2 wg_size = ivec2(gl_WorkGroupSize);
|
|
const ivec2 isize = wg_size + ksize - 1;
|
|
shared V4 inp[4][isize.y][isize.x];
|
|
void hook() {
|
|
const uvec2 local_xy = gl_LocalInvocationID.xy;
|
|
ivec2 base = ivec2(gl_WorkGroupID) * wg_size;
|
|
for (uint y = local_xy.y; y < isize.y; y += wg_size.y) {
|
|
for (uint x = local_xy.x; x < isize.x; x += wg_size.x) {
|
|
const ivec2 input_base = (base + ivec2(x,y) - offset) * ivec2(2, 2);
|
|
inp[0][y][x] = V4(conv2d_5_mul * texelFetch(conv2d_5_raw, input_base + ivec2(0, 0), 0) + conv2d_mul * texelFetch(conv2d_raw, input_base + ivec2(0, 0), 0));
|
|
inp[1][y][x] = V4(conv2d_5_mul * texelFetch(conv2d_5_raw, input_base + ivec2(1, 0), 0) + conv2d_mul * texelFetch(conv2d_raw, input_base + ivec2(1, 0), 0));
|
|
inp[2][y][x] = V4(conv2d_5_mul * texelFetch(conv2d_5_raw, input_base + ivec2(0, 1), 0) + conv2d_mul * texelFetch(conv2d_raw, input_base + ivec2(0, 1), 0));
|
|
inp[3][y][x] = V4(conv2d_5_mul * texelFetch(conv2d_5_raw, input_base + ivec2(1, 1), 0) + conv2d_mul * texelFetch(conv2d_raw, input_base + ivec2(1, 1), 0));
|
|
}
|
|
}
|
|
|
|
barrier();
|
|
V4 result0 = V4(0.07942048, 0.07821269, 0.082289174, 0.07892917);
|
|
const V4 inp_0_0_0 = inp[0][local_xy.y + 0][local_xy.x + 0];
|
|
const V4 inp_0_1_0 = inp[0][local_xy.y + 0][local_xy.x + 1];
|
|
const V4 inp_0_2_0 = inp[0][local_xy.y + 0][local_xy.x + 2];
|
|
const V4 inp_0_0_1 = inp[0][local_xy.y + 1][local_xy.x + 0];
|
|
const V4 inp_0_1_1 = inp[0][local_xy.y + 1][local_xy.x + 1];
|
|
const V4 inp_0_2_1 = inp[0][local_xy.y + 1][local_xy.x + 2];
|
|
const V4 inp_0_0_2 = inp[0][local_xy.y + 2][local_xy.x + 0];
|
|
const V4 inp_0_1_2 = inp[0][local_xy.y + 2][local_xy.x + 1];
|
|
const V4 inp_0_2_2 = inp[0][local_xy.y + 2][local_xy.x + 2];
|
|
result0 += M4(-0.11636982, -0.059329264, -0.032452207, -0.05000699, 0.0027298967, -0.014784354, -0.018898953, -0.026387399, -0.04866522, -0.0034517667, -0.036040183, -0.015461949, 0.15003136, 0.1010024, 0.06595967, 0.08420603) * inp_0_0_0;
|
|
result0 += M4(-0.17095192, -0.23162094, -0.0027919277, 0.018584667, 0.0009346624, 0.006145241, -0.06807188, 0.005982439, -0.10259334, -0.23113409, 0.05056581, 0.015863553, 0.021340631, 0.13316575, 0.05575838, 0.008457389) * inp_0_1_0;
|
|
result0 += M4(-0.008553572, -0.054848403, -0.01347387, -0.04445713, -0.08472665, -0.08376867, -0.077503465, -0.10499659, -0.056032624, -0.013359254, -0.030728646, 0.043963596, 0.08870928, 0.045146573, 0.05971686, 0.03278427) * inp_0_2_0;
|
|
result0 += M4(0.19588293, -0.04648465, 0.027297234, 0.05453258, 0.06473069, -0.043464746, 0.223268, -0.02648231, 0.0062739355, 0.024977092, 0.08271271, -0.005075647, 0.17125973, 0.10962147, 0.1618092, 0.11265603) * inp_0_0_1;
|
|
result0 += M4(0.098922394, 0.26493427, -0.08755517, -0.13988777, -0.24570113, 0.14201127, -0.18969332, 0.32599792, -0.06624849, 0.08593866, -0.022810698, -0.05292953, 0.19407418, 0.19186307, 0.14618841, 0.22404681) * inp_0_1_1;
|
|
result0 += M4(0.0024095443, 0.09542953, 0.05893327, 0.04963878, 0.043981288, -0.09458787, 0.048440147, -0.13017173, 0.010817999, 0.009894588, -0.007898175, 0.11123364, 0.15274785, 0.18122944, 0.14603429, 0.21482544) * inp_0_2_1;
|
|
result0 += M4(-0.015920863, 0.0045611686, 0.030535664, -0.0011332423, -0.005275443, -0.0005943754, -0.025727643, -0.0031081287, 0.04642552, 0.018361727, 0.058243725, 0.029665668, 0.10241001, 0.02798229, 0.18251711, 0.004523951) * inp_0_0_2;
|
|
result0 += M4(-0.043556556, -0.065940246, 0.09670369, 0.07387089, 0.046474013, -0.00025676662, 0.04237812, -0.08965837, 0.037509654, 0.017891284, -0.09904582, 0.07562159, 0.038349077, 0.067137405, 0.06425105, 0.1743836) * inp_0_1_2;
|
|
result0 += M4(-0.019333778, -0.01060451, -0.0059877997, 0.06399235, 0.09297337, 0.07869079, 0.11191077, 0.041706722, 0.010740478, 0.011300985, -0.030533317, -0.109226234, 0.061926227, 0.124412335, 0.073686674, 0.10737882) * inp_0_2_2;
|
|
const V4 inp_1_0_0 = inp[1][local_xy.y + 0][local_xy.x + 0];
|
|
const V4 inp_1_1_0 = inp[1][local_xy.y + 0][local_xy.x + 1];
|
|
const V4 inp_1_2_0 = inp[1][local_xy.y + 0][local_xy.x + 2];
|
|
const V4 inp_1_0_1 = inp[1][local_xy.y + 1][local_xy.x + 0];
|
|
const V4 inp_1_1_1 = inp[1][local_xy.y + 1][local_xy.x + 1];
|
|
const V4 inp_1_2_1 = inp[1][local_xy.y + 1][local_xy.x + 2];
|
|
const V4 inp_1_0_2 = inp[1][local_xy.y + 2][local_xy.x + 0];
|
|
const V4 inp_1_1_2 = inp[1][local_xy.y + 2][local_xy.x + 1];
|
|
const V4 inp_1_2_2 = inp[1][local_xy.y + 2][local_xy.x + 2];
|
|
result0 += M4(-0.07330701, -0.026571156, -0.043166053, -0.057155345, -0.06023793, -0.08201396, -0.017985104, 0.00259785, -0.046082836, 0.025884246, 0.015084661, -0.012299801, 0.07279219, 0.08398021, 0.01444768, 0.013628334) * inp_1_0_0;
|
|
result0 += M4(0.028485537, -0.13664272, -0.14637338, -0.03216143, 0.15768725, 0.17180555, 0.22409278, 0.07486649, 0.054554522, -0.06476858, 0.029637309, 0.03151017, -0.031520635, -0.13686366, 0.0027829686, -0.0009537584) * inp_1_1_0;
|
|
result0 += M4(-0.07279511, 0.047922287, -0.039518423, -0.03210193, -0.001693885, 0.10270714, 0.011595117, 0.141228, -0.041493673, 0.068810634, -0.011981957, 0.05599499, 0.02523943, -0.058428146, 0.016682947, -0.020516759) * inp_1_2_0;
|
|
result0 += M4(-0.011101113, -0.09268024, 0.078101635, -0.06144573, -0.20039465, -0.10002031, -0.27066922, -0.23258542, 0.13703087, 0.015634691, -0.00731571, 0.06905305, 0.007901865, 0.06765415, 0.1434028, 0.09154159) * inp_1_0_1;
|
|
result0 += M4(0.20163926, 0.020502245, 0.22196925, 0.04834591, 0.14234507, 0.12385057, -0.14618975, 0.05025507, -0.08665632, 0.028551226, -0.03954753, -0.07490453, 0.08381951, -0.0992167, -0.122530974, 0.047114052) * inp_1_1_1;
|
|
result0 += M4(0.050744314, 0.12697436, 0.014369487, 0.062857516, -0.03313285, 0.013857357, -0.08860406, -0.14726935, 0.01688374, -0.078021325, 0.011625158, 0.01023778, 0.027463386, 0.046657313, 0.023117632, -0.013085234) * inp_1_2_1;
|
|
result0 += M4(-0.009447039, -0.0062994813, -0.030561144, 0.015915044, -0.09330408, -0.0530166, -0.12407174, -0.015852062, 0.0051147086, -0.007351941, 0.002542378, -0.008233577, -0.014825013, -0.0015592017, -0.05729564, -0.017423365) * inp_1_0_2;
|
|
result0 += M4(-0.004105685, 0.07282294, 0.069418795, -0.05498573, -0.00208156, -0.040487334, 0.1445179, 0.051521864, -0.025368946, -0.013219188, -0.006728517, -0.07625849, -0.024420908, -0.0044598207, -0.015377865, -0.13657852) * inp_1_1_2;
|
|
result0 += M4(0.03599884, -0.0105478745, 0.038746145, 0.0507682, -0.025521498, -0.024232486, 0.011020209, 0.10047172, 0.05457663, 0.006208522, 0.055378985, -0.08648434, -0.0041283113, -0.01650629, 0.004291274, 0.023595793) * inp_1_2_2;
|
|
const V4 inp_2_0_0 = inp[2][local_xy.y + 0][local_xy.x + 0];
|
|
const V4 inp_2_1_0 = inp[2][local_xy.y + 0][local_xy.x + 1];
|
|
const V4 inp_2_2_0 = inp[2][local_xy.y + 0][local_xy.x + 2];
|
|
const V4 inp_2_0_1 = inp[2][local_xy.y + 1][local_xy.x + 0];
|
|
const V4 inp_2_1_1 = inp[2][local_xy.y + 1][local_xy.x + 1];
|
|
const V4 inp_2_2_1 = inp[2][local_xy.y + 1][local_xy.x + 2];
|
|
const V4 inp_2_0_2 = inp[2][local_xy.y + 2][local_xy.x + 0];
|
|
const V4 inp_2_1_2 = inp[2][local_xy.y + 2][local_xy.x + 1];
|
|
const V4 inp_2_2_2 = inp[2][local_xy.y + 2][local_xy.x + 2];
|
|
result0 += M4(0.02625175, 0.044448208, 0.025820026, 0.042625286, -0.14325926, -0.07368913, -0.035370324, -0.0483055, 0.022411536, 0.07837778, 0.014711007, 0.0089601325, 0.14614141, -0.013843195, -0.00183343, 0.030992392) * inp_2_0_0;
|
|
result0 += M4(-0.13943392, -0.027668366, -0.00605077, -0.010792682, -0.13432038, -0.06360804, 0.023869146, 0.018617587, 0.010982281, -0.12519816, -0.01816795, -0.012343567, -0.14264211, 0.0608064, -0.057635155, -0.12452771) * inp_2_1_0;
|
|
result0 += M4(-0.0023510873, -0.10876355, -0.020073038, -0.06581549, 0.024724813, 0.02788125, 0.012666816, 0.0075549004, 0.05302175, 0.062297385, 0.05456386, 0.035653763, -0.023079487, 0.04906416, 0.003889142, 0.048161693) * inp_2_2_0;
|
|
result0 += M4(0.013490228, 0.07731822, 0.024770379, 0.07787996, -0.24148642, -0.060349345, -0.287201, -0.058063217, 0.022927651, 0.07323395, 0.052245375, 0.14675435, 0.13006328, -0.0066324794, 0.13043395, -0.049508587) * inp_2_0_1;
|
|
result0 += M4(-0.06425463, -0.22909608, -0.30965263, -0.26137868, -0.34838507, -0.29899263, -0.1757177, -0.14641738, 0.04428377, -0.120601766, -0.072301604, -0.20429672, -0.029560901, -0.09850866, -0.010453421, 0.10752421) * inp_2_1_1;
|
|
result0 += M4(-0.12946495, -0.026556134, -0.054903407, -0.053088266, 0.4175077, -0.0046152826, 0.4301187, 0.08127058, -0.07252503, -0.1153844, -0.047318444, -0.099470355, -0.041942548, -0.054621693, 0.0133567965, 0.042263526) * inp_2_2_1;
|
|
result0 += M4(0.025980724, 0.014801285, 0.072112806, 0.0075275926, -0.0875942, -0.0022423542, -0.13813959, 0.011640581, 0.0011581844, 0.010904438, 0.009565574, 0.025610453, -0.006016893, 0.008746938, 0.007246336, -0.0147160785) * inp_2_0_2;
|
|
result0 += M4(0.0027364742, -0.043662306, 0.12877765, 0.017689677, 0.068794884, 0.019460868, -0.15110509, -0.13969134, 0.029709235, 0.078996494, 0.09302569, -0.1334812, 0.0063821794, -0.04602352, -0.024196027, -0.041869137) * inp_2_1_2;
|
|
result0 += M4(0.0033705158, 0.023012036, 0.00072865334, -0.013672455, 0.122846015, 0.15923437, 0.16657215, 0.10410814, 0.004356331, 0.010504295, 0.022496942, 0.07418264, -0.012007829, 0.032746863, 0.0027330485, 0.046688467) * inp_2_2_2;
|
|
const V4 inp_3_0_0 = inp[3][local_xy.y + 0][local_xy.x + 0];
|
|
const V4 inp_3_1_0 = inp[3][local_xy.y + 0][local_xy.x + 1];
|
|
const V4 inp_3_2_0 = inp[3][local_xy.y + 0][local_xy.x + 2];
|
|
const V4 inp_3_0_1 = inp[3][local_xy.y + 1][local_xy.x + 0];
|
|
const V4 inp_3_1_1 = inp[3][local_xy.y + 1][local_xy.x + 1];
|
|
const V4 inp_3_2_1 = inp[3][local_xy.y + 1][local_xy.x + 2];
|
|
const V4 inp_3_0_2 = inp[3][local_xy.y + 2][local_xy.x + 0];
|
|
const V4 inp_3_1_2 = inp[3][local_xy.y + 2][local_xy.x + 1];
|
|
const V4 inp_3_2_2 = inp[3][local_xy.y + 2][local_xy.x + 2];
|
|
result0 += M4(-0.051054563, -0.00456595, -0.042562753, -0.024147434, -0.00081743393, 0.013612098, 0.01936531, 0.002067961, 0.024665983, 0.012183512, 0.021107063, 0.009794894, -0.117395766, -0.0810702, -0.04262109, -0.062447358) * inp_3_0_0;
|
|
result0 += M4(-0.03957509, -0.09165404, 0.007803734, -0.0041818433, 0.03165929, 0.027758801, -0.029112441, -0.0097638285, -0.03506947, -0.05936886, -0.05541801, -0.042584933, -0.059623193, -0.078187235, 0.024642546, 0.025905272) * inp_3_1_0;
|
|
result0 += M4(0.020659385, 0.05987362, 0.019810732, 0.054049026, -0.11781507, 0.03852306, -0.06292107, 0.0017821233, 0.040627886, 0.060671348, 0.02914991, 0.040991165, -0.07832766, -0.089677714, -0.055762652, -0.051040996) * inp_3_2_0;
|
|
result0 += M4(-0.040154513, -0.05843724, 0.06427843, -0.017596226, -0.051048756, 0.02946234, -0.07376165, 0.0671994, 0.035830647, -0.025981605, 0.022482721, 0.029612, -0.060820963, 0.005068918, -0.095560856, 0.030644644) * inp_3_0_1;
|
|
result0 += M4(0.24542196, 0.28494206, -0.21151958, -0.045248456, 0.14401782, -0.25568277, 0.1352637, -0.14852664, -0.48348254, -0.38713354, -0.39508206, -0.36013454, 0.35612994, 0.3228882, 0.34563643, 0.2669279) * inp_3_1_1;
|
|
result0 += M4(0.019076044, -0.0065221814, 0.03210429, -0.044332933, -0.06425407, 0.15088789, -0.10005643, 0.058653727, 0.026831932, -0.028788581, -0.019654373, -0.021367684, -0.11187411, -0.13136683, -0.10140025, -0.14363958) * inp_3_2_1;
|
|
result0 += M4(-0.03955165, -0.01919648, -0.06769437, -0.015274202, -0.012194205, 0.019330177, -0.0036170697, -0.0025878681, 0.1370301, 0.068571426, 0.18895936, 0.022519762, -0.032886382, 0.019316927, -0.05880236, 0.016897488) * inp_3_0_2;
|
|
result0 += M4(0.0019176262, -0.017462652, 0.126458, 0.06268182, 0.033145472, -0.025241157, 0.03292383, -0.102716744, 0.17617765, 0.16449314, -0.025039112, 0.07926759, 0.14934446, 0.055328105, 0.08959302, -0.0035233765) * inp_3_1_2;
|
|
result0 += M4(-0.03253326, -0.031951077, -0.018886896, -0.010784618, 0.009762149, 0.025690984, 0.034363244, 0.12997335, 0.10610043, 0.17722273, 0.09206027, 0.11330327, -0.018846225, 0.0020707906, -0.032037612, -0.03225054) * inp_3_2_2;
|
|
const ivec2 output_base = ivec2(gl_GlobalInvocationID) * ivec2(1, 1);
|
|
imageStore(out_image, output_base + ivec2(0, 0), result0);
|
|
}
|
|
|
|
//!DESC ArtCNN C4F16 DS (Depth-To-Space)
|
|
//!COMPUTE 12 16 12 16
|
|
//!HOOK LUMA
|
|
//!BIND conv2d_6
|
|
//!WIDTH LUMA.w 2.0 *
|
|
//!HEIGHT LUMA.h 2.0 *
|
|
//!COMPONENTS 4
|
|
//!WHEN OUTPUT.w LUMA.w / 1.3 > OUTPUT.h LUMA.h / 1.3 > *
|
|
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
|
|
#ifdef GL_EXT_shader_explicit_arithmetic_types_float16
|
|
# define V4 f16vec4
|
|
# define M4 f16mat4
|
|
# define F float16_t
|
|
#else
|
|
# define V4 vec4
|
|
# define M4 mat4
|
|
# define F float
|
|
#endif
|
|
|
|
void hook() {
|
|
vec4 result = vec4(0.0, 0.0, 0.0, 1.0);
|
|
vec2 f0 = fract(conv2d_6_pos * conv2d_6_size);
|
|
ivec2 i0 = ivec2(f0 * vec2(2.0));
|
|
result.x = conv2d_6_tex((vec2(0.5) - f0) * conv2d_6_pt + conv2d_6_pos)[i0.y * 2 + i0.x];
|
|
imageStore(out_image, ivec2(gl_GlobalInvocationID), clamp(result, 0.0, 1.0));
|
|
}
|