From f4799e8fa15b92d8d5607dc5dfca4974901ee06c Mon Sep 17 00:00:00 2001
From: ameerj <52414509+ameerj@users.noreply.github.com>
Date: Wed, 2 Jun 2021 00:33:03 -0400
Subject: [PATCH] glsl: Implement transform feedback

---
 .../backend/glsl/emit_context.cpp             | 53 +++++++++++++++----
 .../backend/glsl/emit_context.h               |  8 +++
 .../glsl/emit_glsl_context_get_set.cpp        | 15 ++++--
 .../renderer_opengl/gl_shader_cache.cpp       | 18 +++++--
 4 files changed, 76 insertions(+), 18 deletions(-)

diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp
index 6f10002fe..58355d5e3 100644
--- a/src/shader_recompiler/backend/glsl/emit_context.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_context.cpp
@@ -37,7 +37,6 @@ bool StoresPerVertexAttributes(Stage stage) {
     case Stage::VertexA:
     case Stage::VertexB:
     case Stage::Geometry:
-    case Stage::TessellationControl:
     case Stage::TessellationEval:
         return true;
     default:
@@ -154,9 +153,7 @@ void SetupOutPerVertex(Stage stage, const Info& info, std::string& header) {
         return;
     }
     header += "out gl_PerVertex{";
-    if (info.stores_position) {
-        header += "vec4 gl_Position;";
-    }
+    header += "vec4 gl_Position;";
     if (info.stores_point_size) {
         header += "float gl_PointSize;";
     }
@@ -236,10 +233,8 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile
     }
     for (size_t index = 0; index < info.stores_generics.size(); ++index) {
         // TODO: Properly resolve attribute issues
-        const auto declaration{fmt::format("layout(location={}) out vec4 out_attr{}{};", index,
-                                           index, OutputDecorator(stage, program.invocations))};
         if (info.stores_generics[index] || stage == Stage::VertexA || stage == Stage::VertexB) {
-            header += declaration;
+            DefineGenericOutput(index, program.invocations);
         }
     }
     header += "\n";
@@ -312,13 +307,53 @@ void EmitContext::DefineStorageBuffers(Bindings& bindings) {
     }
 }
 
+void EmitContext::DefineGenericOutput(size_t index, u32 invocations) {
+    static constexpr std::string_view swizzle{"xyzw"};
+    const size_t base_index{static_cast<size_t>(IR::Attribute::Generic0X) + index * 4};
+    u32 element{0};
+    while (element < 4) {
+        std::string definition{fmt::format("layout(location={}", index)};
+        const u32 remainder{4 - element};
+        const TransformFeedbackVarying* xfb_varying{};
+        if (!runtime_info.xfb_varyings.empty()) {
+            xfb_varying = &runtime_info.xfb_varyings[base_index + element];
+            xfb_varying = xfb_varying && xfb_varying->components > 0 ? xfb_varying : nullptr;
+        }
+        const u32 num_components{xfb_varying ? xfb_varying->components : remainder};
+        if (element > 0) {
+            definition += fmt::format(",component={}", element);
+        }
+        if (xfb_varying) {
+            definition +=
+                fmt::format(",xfb_buffer={},xfb_stride={},xfb_offset={}", xfb_varying->buffer,
+                            xfb_varying->stride, xfb_varying->offset);
+        }
+        std::string name{fmt::format("out_attr{}", index)};
+        if (num_components < 4 || element > 0) {
+            name += fmt::format("_{}", swizzle.substr(element, num_components));
+        }
+        const auto type{num_components == 1 ? "float" : fmt::format("vec{}", num_components)};
+        definition += fmt::format(")out {} {}{};", type, name, OutputDecorator(stage, invocations));
+        header += definition;
+
+        const GenericElementInfo element_info{
+            .name = name,
+            .first_element = element,
+            .num_components = num_components,
+        };
+        std::fill_n(output_generics[index].begin() + element, num_components, element_info);
+        element += num_components;
+    }
+    header += "\n";
+}
+
 void EmitContext::DefineHelperFunctions() {
     if (info.uses_global_increment || info.uses_shared_increment) {
         header += "uint CasIncrement(uint op_a,uint op_b){return(op_a>=op_b)?0u:(op_a+1u);}\n";
     }
     if (info.uses_global_decrement || info.uses_shared_decrement) {
-        header +=
-            "uint CasDecrement(uint op_a,uint op_b){return(op_a==0||op_a>op_b)?op_b:(op_a-1u);}\n";
+        header += "uint CasDecrement(uint op_a,uint "
+                  "op_b){return(op_a==0||op_a>op_b)?op_b:(op_a-1u);}\n";
     }
     if (info.uses_atomic_f32_add) {
         header += "uint CasFloatAdd(uint op_a,float op_b){return "
diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h
index 48786a2c7..5d48675e6 100644
--- a/src/shader_recompiler/backend/glsl/emit_context.h
+++ b/src/shader_recompiler/backend/glsl/emit_context.h
@@ -30,6 +30,12 @@ struct Program;
 
 namespace Shader::Backend::GLSL {
 
+struct GenericElementInfo {
+    std::string name{};
+    u32 first_element{};
+    u32 num_components{};
+};
+
 class EmitContext {
 public:
     explicit EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_,
@@ -149,6 +155,7 @@ public:
     std::vector<u32> image_buffer_bindings;
     std::vector<u32> texture_bindings;
     std::vector<u32> image_bindings;
+    std::array<std::array<GenericElementInfo, 4>, 32> output_generics{};
 
     bool uses_y_direction{};
     bool uses_cc_carry{};
@@ -157,6 +164,7 @@ private:
     void SetupExtensions(std::string& header);
     void DefineConstantBuffers(Bindings& bindings);
     void DefineStorageBuffers(Bindings& bindings);
+    void DefineGenericOutput(size_t index, u32 invocations);
     void DefineHelperFunctions();
     void SetupImages(Bindings& bindings);
 };
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
index 0cf31329d..c48492a17 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
@@ -200,13 +200,21 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr,
 
 void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value,
                       [[maybe_unused]] std::string_view vertex) {
-    const u32 element{static_cast<u32>(attr) % 4};
-    const char swizzle{"xyzw"[element]};
     if (IR::IsGeneric(attr)) {
         const u32 index{IR::GenericAttributeIndex(attr)};
-        ctx.Add("out_attr{}{}.{}={};", index, OutputVertexIndex(ctx, vertex), swizzle, value);
+        const u32 element{IR::GenericAttributeElement(attr)};
+        const GenericElementInfo& info{ctx.output_generics.at(index).at(element)};
+        const auto output_decorator{OutputVertexIndex(ctx, vertex)};
+        if (info.num_components == 1) {
+            ctx.Add("{}{}={};", info.name, output_decorator, value);
+        } else {
+            const u32 index_element{element - info.first_element};
+            ctx.Add("{}{}.{}={};", info.name, output_decorator, "xyzw"[index_element], value);
+        }
         return;
     }
+    const u32 element{static_cast<u32>(attr) % 4};
+    const char swizzle{"xyzw"[element]};
     switch (attr) {
     case IR::Attribute::PointSize:
         ctx.Add("gl_PointSize={};", value);
@@ -233,7 +241,6 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view val
         break;
     }
     default:
-        fmt::print("Set attribute {}", attr);
         throw NotImplementedException("Set attribute {}", attr);
     }
 }
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index cd11ff653..0a1ba363b 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -92,9 +92,15 @@ GLenum AssemblyStage(size_t stage_index) {
 
 Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key,
                                     const Shader::IR::Program& program,
-                                    bool glasm_use_storage_buffers) {
+                                    bool glasm_use_storage_buffers, bool use_assembly_shaders) {
     Shader::RuntimeInfo info;
     switch (program.stage) {
+    case Shader::Stage::VertexB:
+    case Shader::Stage::Geometry:
+        if (!use_assembly_shaders && key.xfb_enabled != 0) {
+            info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.xfb_state);
+        }
+        break;
     case Shader::Stage::TessellationEval:
         info.tess_clockwise = key.tessellation_clockwise != 0;
         info.tess_primitive = [&key] {
@@ -420,7 +426,8 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
     OGLProgram source_program;
     std::array<OGLAssemblyProgram, 5> assembly_programs;
     Shader::Backend::Bindings binding;
-    if (!device.UseAssemblyShaders()) {
+    const bool use_glasm{device.UseAssemblyShaders()};
+    if (!use_glasm) {
         source_program.handle = glCreateProgram();
     }
     const size_t first_index = uses_vertex_a && uses_vertex_b ? 1 : 0;
@@ -434,8 +441,9 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
         const size_t stage_index{index - 1};
         infos[stage_index] = &program.info;
 
-        const auto runtime_info{MakeRuntimeInfo(key, program, glasm_use_storage_buffers)};
-        if (device.UseAssemblyShaders()) {
+        const auto runtime_info{
+            MakeRuntimeInfo(key, program, glasm_use_storage_buffers, use_glasm)};
+        if (use_glasm) {
             const std::string code{EmitGLASM(profile, runtime_info, program, binding)};
             assembly_programs[stage_index] = CompileProgram(code, AssemblyStage(stage_index));
         } else {
@@ -443,7 +451,7 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
             AttachShader(Stage(stage_index), source_program.handle, code);
         }
     }
-    if (!device.UseAssemblyShaders()) {
+    if (!use_glasm) {
         LinkProgram(source_program.handle);
     }
     return std::make_unique<GraphicsPipeline>(