astc_decoder: Combine FastReplicate functions to work around new NV driver bug
The new Nvidia drivers have a bug where the FastReplicateTo6 function produces a lookup into the REPLICATE_TO_8 table rather than the REPLICATE_TO_6 table. This seems to be an optimization gone wrong. Combining the logic of the FastReplicate functions seems to address the bug.
This commit is contained in:
parent
480b03b645
commit
a5bff8e9b3
|
@ -155,9 +155,6 @@ uint SwizzleOffset(uvec2 pos) {
|
||||||
// Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)]
|
// Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)]
|
||||||
// is the same as [(num_bits - 1):0] and repeats all the way down.
|
// is the same as [(num_bits - 1):0] and repeats all the way down.
|
||||||
uint Replicate(uint val, uint num_bits, uint to_bit) {
|
uint Replicate(uint val, uint num_bits, uint to_bit) {
|
||||||
if (num_bits == 0 || to_bit == 0) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
const uint v = val & uint((1 << num_bits) - 1);
|
const uint v = val & uint((1 << num_bits) - 1);
|
||||||
uint res = v;
|
uint res = v;
|
||||||
uint reslen = num_bits;
|
uint reslen = num_bits;
|
||||||
|
@ -187,7 +184,29 @@ uint ReplicateBitTo9(uint value) {
|
||||||
return REPLICATE_1_BIT_TO_9_TABLE[value];
|
return REPLICATE_1_BIT_TO_9_TABLE[value];
|
||||||
}
|
}
|
||||||
|
|
||||||
uint FastReplicateTo8(uint value, uint num_bits) {
|
uint FastReplicate(uint value, uint num_bits, uint to_bit) {
|
||||||
|
if (num_bits == 0) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
if (num_bits == to_bit) {
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
if (to_bit == 6) {
|
||||||
|
switch (num_bits) {
|
||||||
|
case 1:
|
||||||
|
return REPLICATE_1_BIT_TO_6_TABLE[value];
|
||||||
|
case 2:
|
||||||
|
return REPLICATE_2_BIT_TO_6_TABLE[value];
|
||||||
|
case 3:
|
||||||
|
return REPLICATE_3_BIT_TO_6_TABLE[value];
|
||||||
|
case 4:
|
||||||
|
return REPLICATE_4_BIT_TO_6_TABLE[value];
|
||||||
|
case 5:
|
||||||
|
return REPLICATE_5_BIT_TO_6_TABLE[value];
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else { /* if (to_bit == 8) */
|
||||||
switch (num_bits) {
|
switch (num_bits) {
|
||||||
case 1:
|
case 1:
|
||||||
return REPLICATE_1_BIT_TO_8_TABLE[value];
|
return REPLICATE_1_BIT_TO_8_TABLE[value];
|
||||||
|
@ -203,26 +222,19 @@ uint FastReplicateTo8(uint value, uint num_bits) {
|
||||||
return REPLICATE_6_BIT_TO_8_TABLE[value];
|
return REPLICATE_6_BIT_TO_8_TABLE[value];
|
||||||
case 7:
|
case 7:
|
||||||
return REPLICATE_7_BIT_TO_8_TABLE[value];
|
return REPLICATE_7_BIT_TO_8_TABLE[value];
|
||||||
case 8:
|
default:
|
||||||
return value;
|
break;
|
||||||
}
|
}
|
||||||
return Replicate(value, num_bits, 8);
|
}
|
||||||
|
return Replicate(value, num_bits, to_bit);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint FastReplicateTo8(uint value, uint num_bits) {
|
||||||
|
return FastReplicate(value, num_bits, 8);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint FastReplicateTo6(uint value, uint num_bits) {
|
uint FastReplicateTo6(uint value, uint num_bits) {
|
||||||
switch (num_bits) {
|
return FastReplicate(value, num_bits, 6);
|
||||||
case 1:
|
|
||||||
return REPLICATE_1_BIT_TO_6_TABLE[value];
|
|
||||||
case 2:
|
|
||||||
return REPLICATE_2_BIT_TO_6_TABLE[value];
|
|
||||||
case 3:
|
|
||||||
return REPLICATE_3_BIT_TO_6_TABLE[value];
|
|
||||||
case 4:
|
|
||||||
return REPLICATE_4_BIT_TO_6_TABLE[value];
|
|
||||||
case 5:
|
|
||||||
return REPLICATE_5_BIT_TO_6_TABLE[value];
|
|
||||||
}
|
|
||||||
return Replicate(value, num_bits, 6);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
uint Div3Floor(uint v) {
|
uint Div3Floor(uint v) {
|
||||||
|
|
Reference in New Issue