From 68d075d1e8af66c3f8044b162344bffc943168a8 Mon Sep 17 00:00:00 2001
From: ameerj <52414509+ameerj@users.noreply.github.com>
Date: Mon, 31 May 2021 14:17:00 -0400
Subject: [PATCH] glsl: Fix atomic SSBO offsets

and implement misc getters
---
 .../backend/glsl/emit_glsl_atomic.cpp         | 133 +++++++++---------
 .../backend/glsl/emit_glsl_instructions.h     |   2 +-
 .../glsl/emit_glsl_not_implemented.cpp        |   4 +-
 .../backend/glsl/emit_glsl_warp.cpp           |   2 +-
 4 files changed, 74 insertions(+), 67 deletions(-)

diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp
index 9714ffe33..5394f4a8c 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp
@@ -27,16 +27,16 @@ void SharedCasFunction(EmitContext& ctx, IR::Inst& inst, std::string_view offset
 void SsboCasFunction(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
                      const IR::Value& offset, std::string_view value, std::string_view function) {
     const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)};
-    const std::string ssbo{
-        fmt::format("{}_ssbo{}[{}]", ctx.stage_name, binding.U32(), offset.U32())};
+    const std::string ssbo{fmt::format("{}_ssbo{}[{}>>2]", ctx.stage_name, binding.U32(),
+                                       ctx.var_alloc.Consume(offset))};
     ctx.Add(cas_loop.data(), ssbo, ret, ssbo, function, ssbo, value, ret);
 }
 
 void SsboCasFunctionF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
                         const IR::Value& offset, std::string_view value,
                         std::string_view function) {
-    const std::string ssbo{
-        fmt::format("{}_ssbo{}[{}]", ctx.stage_name, binding.U32(), offset.U32())};
+    const std::string ssbo{fmt::format("{}_ssbo{}[{}>>2]", ctx.stage_name, binding.U32(),
+                                       ctx.var_alloc.Consume(offset))};
     const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)};
     ctx.Add(cas_loop.data(), ssbo, ret, ssbo, function, ssbo, value, ret);
     ctx.AddF32("{}=uintBitsToFloat({});", inst, ret);
@@ -111,8 +111,8 @@ void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, std::string_vi
 
 void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
                              const IR::Value& offset, std::string_view value) {
-    ctx.AddU32("{}=atomicAdd({}_ssbo{}[{}],{});", inst, ctx.stage_name, binding.U32(), offset.U32(),
-               value);
+    ctx.AddU32("{}=atomicAdd({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(),
+               ctx.var_alloc.Consume(offset), value);
 }
 
 void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
@@ -123,8 +123,8 @@ void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value&
 
 void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
                              const IR::Value& offset, std::string_view value) {
-    ctx.AddU32("{}=atomicMin({}_ssbo{}[{}],{});", inst, ctx.stage_name, binding.U32(), offset.U32(),
-               value);
+    ctx.AddU32("{}=atomicMin({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(),
+               ctx.var_alloc.Consume(offset), value);
 }
 
 void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
@@ -135,8 +135,8 @@ void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value&
 
 void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
                              const IR::Value& offset, std::string_view value) {
-    ctx.AddU32("{}=atomicMax({}_ssbo{}[{}],{});", inst, ctx.stage_name, binding.U32(), offset.U32(),
-               value);
+    ctx.AddU32("{}=atomicMax({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(),
+               ctx.var_alloc.Consume(offset), value);
 }
 
 void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
@@ -151,116 +151,123 @@ void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& b
 
 void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
                             const IR::Value& offset, std::string_view value) {
-    ctx.AddU32("{}=atomicAnd({}_ssbo{}[{}],{});", inst, ctx.stage_name, binding.U32(), offset.U32(),
-               value);
+    ctx.AddU32("{}=atomicAnd({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(),
+               ctx.var_alloc.Consume(offset), value);
 }
 
 void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
                            const IR::Value& offset, std::string_view value) {
-    ctx.AddU32("{}=atomicOr({}_ssbo{}[{}],{});", inst, ctx.stage_name, binding.U32(), offset.U32(),
-               value);
+    ctx.AddU32("{}=atomicOr({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(),
+               ctx.var_alloc.Consume(offset), value);
 }
 
 void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
                             const IR::Value& offset, std::string_view value) {
-    ctx.AddU32("{}=atomicXor({}_ssbo{}[{}],{});", inst, ctx.stage_name, binding.U32(), offset.U32(),
-               value);
+    ctx.AddU32("{}=atomicXor({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(),
+               ctx.var_alloc.Consume(offset), value);
 }
 
 void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
                                  const IR::Value& offset, std::string_view value) {
-    ctx.AddU32("{}=atomicExchange({}_ssbo{}[{}],{});", inst, ctx.stage_name, binding.U32(),
-               offset.U32(), value);
+    ctx.AddU32("{}=atomicExchange({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(),
+               ctx.var_alloc.Consume(offset), value);
 }
 
 void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
                              const IR::Value& offset, std::string_view value) {
     // LOG_WARNING(..., "Op falling to non-atomic");
-    ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}],{}_ssbo{}[{}]));", inst, ctx.stage_name,
-               binding.U32(), offset.U32(), ctx.stage_name, binding.U32(), offset.U32() + 1);
-    ctx.Add("{}_ssbo{}[{}]+=unpackUint2x32({}).x;{}_ssbo{}[{}]+=unpackUint2x32({}).y;",
-            ctx.stage_name, binding.U32(), offset.U32(), value, ctx.stage_name, binding.U32(),
-            offset.U32() + 1, value);
+    ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst,
+               ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
+               binding.U32(), ctx.var_alloc.Consume(offset));
+    ctx.Add("{}_ssbo{}[{}>>2]+=unpackUint2x32({}).x;{}_ssbo{}[({}>>2)+1]+=unpackUint2x32({}).y;",
+            ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, ctx.stage_name,
+            binding.U32(), ctx.var_alloc.Consume(offset), value);
 }
 
 void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
                              const IR::Value& offset, std::string_view value) {
     // LOG_WARNING(..., "Op falling to non-atomic");
-    ctx.AddS64("{}=packInt2x32(ivec2({}_ssbo{}[{}],{}_ssbo{}[{}]));", inst, ctx.stage_name,
-               binding.U32(), offset.U32(), ctx.stage_name, binding.U32(), offset.U32() + 1);
+    ctx.AddS64("{}=packInt2x32(ivec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst,
+               ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
+               binding.U32(), ctx.var_alloc.Consume(offset));
     ctx.Add("for(int i=0;i<2;++i){{ "
-            "{}_ssbo{}[{}+i]=uint(min(int({}_ssbo{}[{}+i]),unpackInt2x32(int64_t({}))[i]));}}",
-            ctx.stage_name, binding.U32(), offset.U32(), ctx.stage_name, binding.U32(),
-            offset.U32(), value);
+            "{}_ssbo{}[({}>>2)+i]=uint(min(int({}_ssbo{}[({}>>2)+i]),unpackInt2x32(int64_t({}))[i])"
+            ");}}",
+            ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
+            binding.U32(), ctx.var_alloc.Consume(offset), value);
 }
 
 void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
                              const IR::Value& offset, std::string_view value) {
     // LOG_WARNING(..., "Op falling to non-atomic");
-    ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}],{}_ssbo{}[{}]));", inst, ctx.stage_name,
-               binding.U32(), offset.U32(), ctx.stage_name, binding.U32(), offset.U32() + 1);
+    ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst,
+               ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
+               binding.U32(), ctx.var_alloc.Consume(offset));
     ctx.Add("for(int i=0;i<2;++i){{ "
-            "{}_ssbo{}[{}+i]=min({}_ssbo{}[{}+i],unpackUint2x32(uint64_t({}))[i]);}}",
-            ctx.stage_name, binding.U32(), offset.U32(), ctx.stage_name, binding.U32(),
-            offset.U32(), value);
+            "{}_ssbo{}[({}>>2)+i]=min({}_ssbo{}[({}>>2)+i],unpackUint2x32(uint64_t({}))[i]);}}",
+            ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
+            binding.U32(), ctx.var_alloc.Consume(offset), value);
 }
 
 void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
                              const IR::Value& offset, std::string_view value) {
     // LOG_WARNING(..., "Op falling to non-atomic");
-    ctx.AddS64("{}=packInt2x32(ivec2({}_ssbo{}[{}],{}_ssbo{}[{}]));", inst, ctx.stage_name,
-               binding.U32(), offset.U32(), ctx.stage_name, binding.U32(), offset.U32() + 1);
+    ctx.AddS64("{}=packInt2x32(ivec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst,
+               ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
+               binding.U32(), ctx.var_alloc.Consume(offset));
     ctx.Add("for(int i=0;i<2;++i){{ "
-            "{}_ssbo{}[{}+i]=uint(max(int({}_ssbo{}[{}+i]),unpackInt2x32(int64_t({}))[i]));}}",
-            ctx.stage_name, binding.U32(), offset.U32(), ctx.stage_name, binding.U32(),
-            offset.U32(), value);
+            "{}_ssbo{}[({}>>2)+i]=uint(max(int({}_ssbo{}[({}>>2)+i]),unpackInt2x32(int64_t({}))[i])"
+            ");}}",
+            ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
+            binding.U32(), ctx.var_alloc.Consume(offset), value);
 }
 
 void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
                              const IR::Value& offset, std::string_view value) {
     // LOG_WARNING(..., "Op falling to non-atomic");
-    ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}],{}_ssbo{}[{}]));", inst, ctx.stage_name,
-               binding.U32(), offset.U32(), ctx.stage_name, binding.U32(), offset.U32() + 1);
+
+    ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst,
+               ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
+               binding.U32(), ctx.var_alloc.Consume(offset));
     ctx.Add("for(int "
-            "i=0;i<2;++i){{{}_ssbo{}[{}+i]=max({}_ssbo{}[{}+i],unpackUint2x32(uint64_t({}))[i]);}}",
-            ctx.stage_name, binding.U32(), offset.U32(), ctx.stage_name, binding.U32(),
-            offset.U32(), value);
+            "i=0;i<2;++i){{{}_ssbo{}[({}>>2)+i]=max({}_ssbo{}[({}>>2)+i],unpackUint2x32(uint64_t({}"
+            "))[i]);}}",
+            ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
+            binding.U32(), ctx.var_alloc.Consume(offset), value);
 }
 
 void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
                             const IR::Value& offset, std::string_view value) {
-    ctx.AddU64("{}=packUint2x32(uvec2(atomicAnd({}_ssbo{}[{}],unpackUint2x32({}).x),atomicAnd({}_"
-               "ssbo{}[{}],"
-               "unpackUint2x32({}).y)));",
-               inst, ctx.stage_name, binding.U32(), offset.U32(), value, ctx.stage_name,
-               binding.U32(), offset.U32() + 1, value);
+    ctx.AddU64(
+        "{}=packUint2x32(uvec2(atomicAnd({}_ssbo{}[{}>>2],unpackUint2x32({}).x),atomicAnd({}_"
+        "ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));",
+        inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, ctx.stage_name,
+        binding.U32(), ctx.var_alloc.Consume(offset), value);
 }
 
 void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
                            const IR::Value& offset, std::string_view value) {
-    ctx.AddU64(
-        "{}=packUint2x32(uvec2(atomicOr({}_ssbo{}[{}],unpackUint2x32({}).x),atomicOr({}_ssbo{}[{}],"
-        "unpackUint2x32({}).y)));",
-        inst, ctx.stage_name, binding.U32(), offset.U32(), value, ctx.stage_name, binding.U32(),
-        offset.U32() + 1, value);
+    ctx.AddU64("{}=packUint2x32(uvec2(atomicOr({}_ssbo{}[{}>>2],unpackUint2x32({}).x),atomicOr({}_"
+               "ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));",
+               inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value,
+               ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value);
 }
 
 void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
                             const IR::Value& offset, std::string_view value) {
-    ctx.AddU64("{}=packUint2x32(uvec2(atomicXor({}_ssbo{}[{}],unpackUint2x32({}).x),atomicXor({}_"
-               "ssbo{}[{}],"
-               "unpackUint2x32({}).y)));",
-               inst, ctx.stage_name, binding.U32(), offset.U32(), value, ctx.stage_name,
-               binding.U32(), offset.U32() + 1, value);
+    ctx.AddU64(
+        "{}=packUint2x32(uvec2(atomicXor({}_ssbo{}[{}>>2],unpackUint2x32({}).x),atomicXor({}_"
+        "ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));",
+        inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, ctx.stage_name,
+        binding.U32(), ctx.var_alloc.Consume(offset), value);
 }
 
 void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
                                  const IR::Value& offset, std::string_view value) {
-    ctx.AddU64(
-        "{}=packUint2x32(uvec2(atomicExchange({}_ssbo{}[{}],unpackUint2x32({}).x),atomicExchange("
-        "{}_ssbo{}[{}],unpackUint2x32({}).y)));",
-        inst, ctx.stage_name, binding.U32(), offset.U32(), value, ctx.stage_name, binding.U32(),
-        offset.U32() + 1, value);
+    ctx.AddU64("{}=packUint2x32(uvec2(atomicExchange({}_ssbo{}[{}>>2],unpackUint2x32({}).x),"
+               "atomicExchange({}_ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));",
+               inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value,
+               ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value);
 }
 
 void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h
index 5e0195b0f..24c36bbda 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h
@@ -92,7 +92,7 @@ void EmitWorkgroupId(EmitContext& ctx, IR::Inst& inst);
 void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst);
 void EmitInvocationId(EmitContext& ctx, IR::Inst& inst);
 void EmitSampleId(EmitContext& ctx, IR::Inst& inst);
-void EmitIsHelperInvocation(EmitContext& ctx);
+void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst);
 void EmitYDirection(EmitContext& ctx, IR::Inst& inst);
 void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, std::string_view word_offset);
 void EmitWriteLocal(EmitContext& ctx, std::string_view word_offset, std::string_view value);
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp
index 9e5715605..3ed4e04d3 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp
@@ -211,8 +211,8 @@ void EmitSampleId(EmitContext& ctx, IR::Inst& inst) {
     NotImplemented();
 }
 
-void EmitIsHelperInvocation(EmitContext& ctx) {
-    NotImplemented();
+void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst) {
+    ctx.AddU1("{}=gl_HelperInvocation;", inst);
 }
 
 void EmitYDirection(EmitContext& ctx, IR::Inst& inst) {
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp
index fa11c656f..38c49b164 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp
@@ -38,7 +38,7 @@ std::string GetMaxThreadId(std::string_view thread_id, std::string_view clamp,
 } // namespace
 
 void EmitLaneId([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst) {
-    NotImplemented();
+    ctx.AddU32("{}=gl_SubGroupInvocationARB&31u;", inst);
 }
 
 void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {