Merge pull request #2423 from FernandoS27/half-correct

Corrections on Half Float operations: HADD2 HMUL2 and HFMA2
2024-12-20 00:23:26 +00:00 · 2019-04-28 22:24:22 -04:00 · 2019-04-28 22:24:22 -04:00 · 9a3737120d
commit 9a3737120d
parent 78574e7a47 623b2e4b8f
2 changed files with 16 additions and 15 deletions
--- a/src/video_core/shader/decode/arithmetic_half.cpp
+++ b/src/video_core/shader/decode/arithmetic_half.cpp
@ -9,6 +9,7 @@

 namespace VideoCommon::Shader {

+using Tegra::Shader::HalfType;
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;

@ -22,7 +23,6 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
            LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
        }
    }
-    UNIMPLEMENTED_IF_MSG(instr.alu_half.saturate != 0, "Half float saturation not implemented");

    const bool negate_a =
        opcode->get().GetId() != OpCode::Id::HMUL2_R && instr.alu_half.negate_a != 0;
@ -32,35 +32,37 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
    Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a);
    op_a = GetOperandAbsNegHalf(op_a, instr.alu_half.abs_a, negate_a);

-    Node op_b = [&]() {
+    auto [type_b, op_b] = [&]() -> std::tuple<HalfType, Node> {
        switch (opcode->get().GetId()) {
        case OpCode::Id::HADD2_C:
        case OpCode::Id::HMUL2_C:
-            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
+            return {HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
        case OpCode::Id::HADD2_R:
        case OpCode::Id::HMUL2_R:
-            return GetRegister(instr.gpr20);
+            return {instr.alu_half.type_b, GetRegister(instr.gpr20)};
        default:
            UNREACHABLE();
-            return Immediate(0);
+            return {HalfType::F32, Immediate(0)};
        }
    }();
-    op_b = UnpackHalfFloat(op_b, instr.alu_half.type_b);
-    op_b = GetOperandAbsNegHalf(op_b, instr.alu_half.abs_b, negate_b);
+    op_b = UnpackHalfFloat(op_b, type_b);
+    // redeclaration to avoid a bug in clang with reusing local bindings in lambdas
+    Node op_b_alt = GetOperandAbsNegHalf(op_b, instr.alu_half.abs_b, negate_b);

    Node value = [&]() {
        switch (opcode->get().GetId()) {
        case OpCode::Id::HADD2_C:
        case OpCode::Id::HADD2_R:
-            return Operation(OperationCode::HAdd, PRECISE, op_a, op_b);
+            return Operation(OperationCode::HAdd, PRECISE, op_a, op_b_alt);
        case OpCode::Id::HMUL2_C:
        case OpCode::Id::HMUL2_R:
-            return Operation(OperationCode::HMul, PRECISE, op_a, op_b);
+            return Operation(OperationCode::HMul, PRECISE, op_a, op_b_alt);
        default:
            UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName());
            return Immediate(0);
        }
    }();
+    value = GetSaturatedHalfFloat(value, instr.alu_half.saturate);
    value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half.merge);

    SetRegister(bb, instr.gpr0, value);
@ -68,4 +70,4 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
    return pc;
 }

-} // namespace VideoCommon::Shader
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/hfma2.cpp
+++ b/src/video_core/shader/decode/hfma2.cpp
@ -34,15 +34,14 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
        case OpCode::Id::HFMA2_CR:
            neg_b = instr.hfma2.negate_b;
            neg_c = instr.hfma2.negate_c;
-            return {instr.hfma2.saturate, instr.hfma2.type_b,
+            return {instr.hfma2.saturate, HalfType::F32,
                    GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
                    instr.hfma2.type_reg39, GetRegister(instr.gpr39)};
        case OpCode::Id::HFMA2_RC:
            neg_b = instr.hfma2.negate_b;
            neg_c = instr.hfma2.negate_c;
            return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39),
-                    instr.hfma2.type_b,
-                    GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
+                    HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
        case OpCode::Id::HFMA2_RR:
            neg_b = instr.hfma2.rr.negate_b;
            neg_c = instr.hfma2.rr.negate_c;
@ -56,13 +55,13 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
            return {false, identity, Immediate(0), identity, Immediate(0)};
        }
    }();
-    UNIMPLEMENTED_IF_MSG(saturate, "HFMA2 saturation is not implemented");

    const Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hfma2.type_a);
    op_b = GetOperandAbsNegHalf(UnpackHalfFloat(op_b, type_b), false, neg_b);
    op_c = GetOperandAbsNegHalf(UnpackHalfFloat(op_c, type_c), false, neg_c);

    Node value = Operation(OperationCode::HFma, PRECISE, op_a, op_b, op_c);
+    value = GetSaturatedHalfFloat(value, saturate);
    value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge);

    SetRegister(bb, instr.gpr0, value);
@ -70,4 +69,4 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
    return pc;
 }

-} // namespace VideoCommon::Shader
+} // namespace VideoCommon::Shader