mirror of
https://github.com/yuzu-mirror/yuzu
synced 2024-12-24 05:03:06 +00:00
glasm: Catch more register leaks
Add support for null registers. These are used when an instruction has no usages. This comes handy when an instruction is only used for its CC value, with the caveat of having to invalidate all pseudo-instructions before defining the instruction itself in the register allocator. This commits changes this. Workaround a bug on Nvidia's condition codes conditional execution using branches.
This commit is contained in:
parent
9fbfe7d676
commit
ca05a13c62
8 changed files with 114 additions and 41 deletions
|
@ -203,7 +203,13 @@ void Precolor(EmitContext& ctx, const IR::Program& program) {
|
||||||
for (size_t i = 0; i < num_args; ++i) {
|
for (size_t i = 0; i < num_args; ++i) {
|
||||||
IR::Block& phi_block{*phi.PhiBlock(i)};
|
IR::Block& phi_block{*phi.PhiBlock(i)};
|
||||||
auto it{std::find_if_not(phi_block.rbegin(), phi_block.rend(), IsReference).base()};
|
auto it{std::find_if_not(phi_block.rbegin(), phi_block.rend(), IsReference).base()};
|
||||||
IR::IREmitter{phi_block, it}.PhiMove(phi, phi.Arg(i));
|
IR::IREmitter ir{phi_block, it};
|
||||||
|
const IR::Value arg{phi.Arg(i)};
|
||||||
|
if (arg.IsImmediate()) {
|
||||||
|
ir.PhiMove(phi, arg);
|
||||||
|
} else {
|
||||||
|
ir.PhiMove(phi, IR::Value{&RegAlloc::AliasInst(*arg.Inst())});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
for (size_t i = 0; i < num_args; ++i) {
|
for (size_t i = 0; i < num_args; ++i) {
|
||||||
IR::IREmitter{*phi.PhiBlock(i)}.Reference(IR::Value{&phi});
|
IR::IREmitter{*phi.PhiBlock(i)}.Reference(IR::Value{&phi});
|
||||||
|
|
|
@ -23,7 +23,13 @@ void EmitIdentity(EmitContext&, IR::Inst& inst, const IR::Value& value) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) {
|
void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) {
|
||||||
ctx.Add("MOV.S {},{};", inst, ScalarS32{ctx.reg_alloc.Consume(value)});
|
// Fake one usage to get a real register out of the condition
|
||||||
|
inst.DestructiveAddUsage(1);
|
||||||
|
const Register ret{ctx.reg_alloc.Define(inst)};
|
||||||
|
const ScalarS32 input{ctx.reg_alloc.Consume(value)};
|
||||||
|
if (ret != input) {
|
||||||
|
ctx.Add("MOV.S {},{};", ret, input);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitBitCastU16F16(EmitContext&, IR::Inst& inst, const IR::Value& value) {
|
void EmitBitCastU16F16(EmitContext&, IR::Inst& inst, const IR::Value& value) {
|
||||||
|
|
|
@ -52,7 +52,9 @@ void CompositeInsert(EmitContext& ctx, IR::Inst& inst, Register composite, Objec
|
||||||
// The input composite is not aliased with the return value so we have to copy it before
|
// The input composite is not aliased with the return value so we have to copy it before
|
||||||
// hand. But the insert object is not aliased with the return value, so we don't have to
|
// hand. But the insert object is not aliased with the return value, so we don't have to
|
||||||
// worry about that
|
// worry about that
|
||||||
ctx.Add("MOV.{} {},{};MOV.{} {}.{},{};", type, ret, composite, type, ret, swizzle, object);
|
ctx.Add("MOV.{} {},{};"
|
||||||
|
"MOV.{} {}.{},{};",
|
||||||
|
type, ret, composite, type, ret, swizzle, object);
|
||||||
} else {
|
} else {
|
||||||
// The return value is alised so we can just insert the object, it doesn't matter if it's
|
// The return value is alised so we can just insert the object, it doesn't matter if it's
|
||||||
// aliased
|
// aliased
|
||||||
|
|
|
@ -181,7 +181,6 @@ void StoreSparse(EmitContext& ctx, IR::Inst* sparse_inst) {
|
||||||
ctx.Add("MOV.S {},-1;"
|
ctx.Add("MOV.S {},-1;"
|
||||||
"MOV.S {}(NONRESIDENT),0;",
|
"MOV.S {}(NONRESIDENT),0;",
|
||||||
sparse_ret, sparse_ret);
|
sparse_ret, sparse_ret);
|
||||||
sparse_inst->Invalidate();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string_view FormatStorage(ImageFormat format) {
|
std::string_view FormatStorage(ImageFormat format) {
|
||||||
|
@ -215,12 +214,20 @@ void ImageAtomic(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Regis
|
||||||
const Register ret{ctx.reg_alloc.Define(inst)};
|
const Register ret{ctx.reg_alloc.Define(inst)};
|
||||||
ctx.Add("ATOMIM.{} {},{},{},{},{};", op, ret, value, coord, image, type);
|
ctx.Add("ATOMIM.{} {},{},{},{},{};", op, ret, value, coord, image, type);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
IR::Inst* PrepareSparse(IR::Inst& inst) {
|
||||||
|
const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)};
|
||||||
|
if (sparse_inst) {
|
||||||
|
sparse_inst->Invalidate();
|
||||||
|
}
|
||||||
|
return sparse_inst;
|
||||||
|
}
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
|
void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
|
||||||
const IR::Value& coord, Register bias_lc, const IR::Value& offset) {
|
const IR::Value& coord, Register bias_lc, const IR::Value& offset) {
|
||||||
const auto info{inst.Flags<IR::TextureInstInfo>()};
|
const auto info{inst.Flags<IR::TextureInstInfo>()};
|
||||||
const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)};
|
const auto sparse_inst{PrepareSparse(inst)};
|
||||||
const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
|
const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
|
||||||
const std::string_view lod_clamp_mod{info.has_lod_clamp ? ".LODCLAMP" : ""};
|
const std::string_view lod_clamp_mod{info.has_lod_clamp ? ".LODCLAMP" : ""};
|
||||||
const std::string_view type{TextureType(info)};
|
const std::string_view type{TextureType(info)};
|
||||||
|
@ -259,7 +266,7 @@ void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Valu
|
||||||
void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
|
void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
|
||||||
const IR::Value& coord, ScalarF32 lod, const IR::Value& offset) {
|
const IR::Value& coord, ScalarF32 lod, const IR::Value& offset) {
|
||||||
const auto info{inst.Flags<IR::TextureInstInfo>()};
|
const auto info{inst.Flags<IR::TextureInstInfo>()};
|
||||||
const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)};
|
const auto sparse_inst{PrepareSparse(inst)};
|
||||||
const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
|
const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
|
||||||
const std::string_view type{TextureType(info)};
|
const std::string_view type{TextureType(info)};
|
||||||
const std::string texture{Texture(ctx, info, index)};
|
const std::string texture{Texture(ctx, info, index)};
|
||||||
|
@ -288,7 +295,7 @@ void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::
|
||||||
}
|
}
|
||||||
const ScalarF32 dref_val{ctx.reg_alloc.Consume(dref)};
|
const ScalarF32 dref_val{ctx.reg_alloc.Consume(dref)};
|
||||||
const Register bias_lc_vec{ctx.reg_alloc.Consume(bias_lc)};
|
const Register bias_lc_vec{ctx.reg_alloc.Consume(bias_lc)};
|
||||||
const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)};
|
const auto sparse_inst{PrepareSparse(inst)};
|
||||||
const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
|
const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
|
||||||
const std::string_view type{TextureType(info)};
|
const std::string_view type{TextureType(info)};
|
||||||
const std::string texture{Texture(ctx, info, index)};
|
const std::string texture{Texture(ctx, info, index)};
|
||||||
|
@ -393,7 +400,7 @@ void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::
|
||||||
}
|
}
|
||||||
const ScalarF32 dref_val{ctx.reg_alloc.Consume(dref)};
|
const ScalarF32 dref_val{ctx.reg_alloc.Consume(dref)};
|
||||||
const ScalarF32 lod_val{ctx.reg_alloc.Consume(lod)};
|
const ScalarF32 lod_val{ctx.reg_alloc.Consume(lod)};
|
||||||
const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)};
|
const auto sparse_inst{PrepareSparse(inst)};
|
||||||
const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
|
const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
|
||||||
const std::string_view type{TextureType(info)};
|
const std::string_view type{TextureType(info)};
|
||||||
const std::string texture{Texture(ctx, info, index)};
|
const std::string texture{Texture(ctx, info, index)};
|
||||||
|
@ -436,7 +443,7 @@ void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
|
||||||
const auto [off_x, off_y]{AllocOffsetsRegs(ctx, offset2)};
|
const auto [off_x, off_y]{AllocOffsetsRegs(ctx, offset2)};
|
||||||
const auto info{inst.Flags<IR::TextureInstInfo>()};
|
const auto info{inst.Flags<IR::TextureInstInfo>()};
|
||||||
const char comp{"xyzw"[info.gather_component]};
|
const char comp{"xyzw"[info.gather_component]};
|
||||||
const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)};
|
const auto sparse_inst{PrepareSparse(inst)};
|
||||||
const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
|
const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
|
||||||
const std::string_view type{TextureType(info)};
|
const std::string_view type{TextureType(info)};
|
||||||
const std::string texture{Texture(ctx, info, index)};
|
const std::string texture{Texture(ctx, info, index)};
|
||||||
|
@ -462,7 +469,7 @@ void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde
|
||||||
// Allocate offsets early so they don't overwrite any consumed register
|
// Allocate offsets early so they don't overwrite any consumed register
|
||||||
const auto [off_x, off_y]{AllocOffsetsRegs(ctx, offset2)};
|
const auto [off_x, off_y]{AllocOffsetsRegs(ctx, offset2)};
|
||||||
const auto info{inst.Flags<IR::TextureInstInfo>()};
|
const auto info{inst.Flags<IR::TextureInstInfo>()};
|
||||||
const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)};
|
const auto sparse_inst{PrepareSparse(inst)};
|
||||||
const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
|
const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
|
||||||
const std::string_view type{TextureType(info)};
|
const std::string_view type{TextureType(info)};
|
||||||
const std::string texture{Texture(ctx, info, index)};
|
const std::string texture{Texture(ctx, info, index)};
|
||||||
|
@ -500,7 +507,7 @@ void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde
|
||||||
void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
|
void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
|
||||||
const IR::Value& coord, const IR::Value& offset, ScalarS32 lod, ScalarS32 ms) {
|
const IR::Value& coord, const IR::Value& offset, ScalarS32 lod, ScalarS32 ms) {
|
||||||
const auto info{inst.Flags<IR::TextureInstInfo>()};
|
const auto info{inst.Flags<IR::TextureInstInfo>()};
|
||||||
const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)};
|
const auto sparse_inst{PrepareSparse(inst)};
|
||||||
const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
|
const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
|
||||||
const std::string_view type{TextureType(info)};
|
const std::string_view type{TextureType(info)};
|
||||||
const std::string texture{Texture(ctx, info, index)};
|
const std::string texture{Texture(ctx, info, index)};
|
||||||
|
@ -547,7 +554,7 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
|
||||||
dpdx = ScopedRegister{ctx.reg_alloc};
|
dpdx = ScopedRegister{ctx.reg_alloc};
|
||||||
dpdy = ScopedRegister{ctx.reg_alloc};
|
dpdy = ScopedRegister{ctx.reg_alloc};
|
||||||
}
|
}
|
||||||
const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)};
|
const auto sparse_inst{PrepareSparse(inst)};
|
||||||
const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
|
const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
|
||||||
const std::string_view type{TextureType(info)};
|
const std::string_view type{TextureType(info)};
|
||||||
const std::string texture{Texture(ctx, info, index)};
|
const std::string texture{Texture(ctx, info, index)};
|
||||||
|
@ -581,7 +588,7 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
|
||||||
|
|
||||||
void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord) {
|
void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord) {
|
||||||
const auto info{inst.Flags<IR::TextureInstInfo>()};
|
const auto info{inst.Flags<IR::TextureInstInfo>()};
|
||||||
const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)};
|
const auto sparse_inst{PrepareSparse(inst)};
|
||||||
const std::string_view format{FormatStorage(info.image_format)};
|
const std::string_view format{FormatStorage(info.image_format)};
|
||||||
const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
|
const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
|
||||||
const std::string_view type{TextureType(info)};
|
const std::string_view type{TextureType(info)};
|
||||||
|
|
|
@ -9,6 +9,17 @@
|
||||||
namespace Shader::Backend::GLASM {
|
namespace Shader::Backend::GLASM {
|
||||||
|
|
||||||
void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
|
void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
|
||||||
|
const std::array flags{
|
||||||
|
inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp),
|
||||||
|
inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp),
|
||||||
|
inst.GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp),
|
||||||
|
inst.GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp),
|
||||||
|
};
|
||||||
|
for (IR::Inst* const flag_inst : flags) {
|
||||||
|
if (flag_inst) {
|
||||||
|
flag_inst->Invalidate();
|
||||||
|
}
|
||||||
|
}
|
||||||
const bool cc{inst.HasAssociatedPseudoOperation()};
|
const bool cc{inst.HasAssociatedPseudoOperation()};
|
||||||
const std::string_view cc_mod{cc ? ".CC" : ""};
|
const std::string_view cc_mod{cc ? ".CC" : ""};
|
||||||
if (cc) {
|
if (cc) {
|
||||||
|
@ -19,20 +30,22 @@ void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
|
||||||
if (!cc) {
|
if (!cc) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
static constexpr std::array<std::string_view, 4> masks{"EQ", "SF", "CF", "OF"};
|
static constexpr std::array<std::string_view, 4> masks{"", "SF", "CF", "OF"};
|
||||||
const std::array flags{
|
for (size_t flag_index = 0; flag_index < flags.size(); ++flag_index) {
|
||||||
inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp),
|
if (!flags[flag_index]) {
|
||||||
inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp),
|
continue;
|
||||||
inst.GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp),
|
}
|
||||||
inst.GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp),
|
const auto flag_ret{ctx.reg_alloc.Define(*flags[flag_index])};
|
||||||
};
|
if (flag_index == 0) {
|
||||||
for (size_t i = 0; i < flags.size(); ++i) {
|
ctx.Add("SEQ.S {}.x,{}.x,0;", flag_ret, ret);
|
||||||
if (flags[i]) {
|
} else {
|
||||||
const auto flag_ret{ctx.reg_alloc.Define(*flags[i])};
|
// We could use conditional execution here, but it's broken on Nvidia's compiler
|
||||||
ctx.Add("MOV.S {},0;"
|
ctx.Add("IF {}.x;"
|
||||||
"MOV.S {}({}.x),-1;",
|
"MOV.S {}.x,-1;"
|
||||||
flag_ret, flag_ret, masks[i]);
|
"ELSE;"
|
||||||
flags[i]->Invalidate();
|
"MOV.S {}.x,0;"
|
||||||
|
"ENDIF;",
|
||||||
|
masks[flag_index], flag_ret, flag_ret);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -136,6 +149,17 @@ void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, Scal
|
||||||
|
|
||||||
void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 offset,
|
void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 offset,
|
||||||
ScalarU32 count) {
|
ScalarU32 count) {
|
||||||
|
const auto zero = inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp);
|
||||||
|
const auto sign = inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp);
|
||||||
|
if (zero) {
|
||||||
|
zero->Invalidate();
|
||||||
|
}
|
||||||
|
if (sign) {
|
||||||
|
sign->Invalidate();
|
||||||
|
}
|
||||||
|
if (zero || sign) {
|
||||||
|
ctx.reg_alloc.InvalidateConditionCodes();
|
||||||
|
}
|
||||||
const Register ret{ctx.reg_alloc.Define(inst)};
|
const Register ret{ctx.reg_alloc.Define(inst)};
|
||||||
if (count.type != Type::Register && offset.type != Type::Register) {
|
if (count.type != Type::Register && offset.type != Type::Register) {
|
||||||
ctx.Add("BFE.U {},{{{},{},0,0}},{};", ret, count, offset, base);
|
ctx.Add("BFE.U {},{{{},{},0,0}},{};", ret, count, offset, base);
|
||||||
|
@ -145,13 +169,11 @@ void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, Scal
|
||||||
"BFE.U {},RC,{};",
|
"BFE.U {},RC,{};",
|
||||||
count, offset, ret, base);
|
count, offset, ret, base);
|
||||||
}
|
}
|
||||||
if (const auto zero = inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp)) {
|
if (zero) {
|
||||||
ctx.Add("SEQ.S {},{},0;", *zero, ret);
|
ctx.Add("SEQ.S {},{},0;", *zero, ret);
|
||||||
zero->Invalidate();
|
|
||||||
}
|
}
|
||||||
if (const auto sign = inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp)) {
|
if (sign) {
|
||||||
ctx.Add("SLT.S {},{},0;", *sign, ret);
|
ctx.Add("SLT.S {},{},0;", *sign, ret);
|
||||||
sign->Invalidate();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -51,6 +51,10 @@ void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst) {
|
||||||
static void Shuffle(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
|
static void Shuffle(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
|
||||||
const IR::Value& clamp, const IR::Value& segmentation_mask,
|
const IR::Value& clamp, const IR::Value& segmentation_mask,
|
||||||
std::string_view op) {
|
std::string_view op) {
|
||||||
|
IR::Inst* const in_bounds{inst.GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)};
|
||||||
|
if (in_bounds) {
|
||||||
|
in_bounds->Invalidate();
|
||||||
|
}
|
||||||
std::string mask;
|
std::string mask;
|
||||||
if (clamp.IsImmediate() && segmentation_mask.IsImmediate()) {
|
if (clamp.IsImmediate() && segmentation_mask.IsImmediate()) {
|
||||||
mask = fmt::to_string(clamp.U32() | (segmentation_mask.U32() << 8));
|
mask = fmt::to_string(clamp.U32() | (segmentation_mask.U32() << 8));
|
||||||
|
@ -61,13 +65,11 @@ static void Shuffle(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32
|
||||||
ScalarU32{ctx.reg_alloc.Consume(clamp)});
|
ScalarU32{ctx.reg_alloc.Consume(clamp)});
|
||||||
}
|
}
|
||||||
const Register value_ret{ctx.reg_alloc.Define(inst)};
|
const Register value_ret{ctx.reg_alloc.Define(inst)};
|
||||||
IR::Inst* const in_bounds{inst.GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)};
|
|
||||||
if (in_bounds) {
|
if (in_bounds) {
|
||||||
const Register bounds_ret{ctx.reg_alloc.Define(*in_bounds)};
|
const Register bounds_ret{ctx.reg_alloc.Define(*in_bounds)};
|
||||||
ctx.Add("SHF{}.U {},{},{},{};"
|
ctx.Add("SHF{}.U {},{},{},{};"
|
||||||
"MOV.U {}.x,{}.y;",
|
"MOV.U {}.x,{}.y;",
|
||||||
op, bounds_ret, value, index, mask, value_ret, bounds_ret);
|
op, bounds_ret, value, index, mask, value_ret, bounds_ret);
|
||||||
in_bounds->Invalidate();
|
|
||||||
} else {
|
} else {
|
||||||
ctx.Add("SHF{}.U {},{},{},{};"
|
ctx.Add("SHF{}.U {},{},{},{};"
|
||||||
"MOV.U {}.x,{}.y;",
|
"MOV.U {}.x,{}.y;",
|
||||||
|
|
|
@ -22,11 +22,19 @@ Register RegAlloc::LongDefine(IR::Inst& inst) {
|
||||||
}
|
}
|
||||||
|
|
||||||
Value RegAlloc::Peek(const IR::Value& value) {
|
Value RegAlloc::Peek(const IR::Value& value) {
|
||||||
return value.IsImmediate() ? MakeImm(value) : PeekInst(*value.InstRecursive());
|
if (value.IsImmediate()) {
|
||||||
|
return MakeImm(value);
|
||||||
|
} else {
|
||||||
|
return PeekInst(*value.Inst());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Value RegAlloc::Consume(const IR::Value& value) {
|
Value RegAlloc::Consume(const IR::Value& value) {
|
||||||
return value.IsImmediate() ? MakeImm(value) : ConsumeInst(*value.InstRecursive());
|
if (value.IsImmediate()) {
|
||||||
|
return MakeImm(value);
|
||||||
|
} else {
|
||||||
|
return ConsumeInst(*value.Inst());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void RegAlloc::Unref(IR::Inst& inst) {
|
void RegAlloc::Unref(IR::Inst& inst) {
|
||||||
|
@ -88,7 +96,14 @@ Value RegAlloc::MakeImm(const IR::Value& value) {
|
||||||
}
|
}
|
||||||
|
|
||||||
Register RegAlloc::Define(IR::Inst& inst, bool is_long) {
|
Register RegAlloc::Define(IR::Inst& inst, bool is_long) {
|
||||||
inst.SetDefinition<Id>(Alloc(is_long));
|
if (inst.HasUses()) {
|
||||||
|
inst.SetDefinition<Id>(Alloc(is_long));
|
||||||
|
} else {
|
||||||
|
Id id{};
|
||||||
|
id.is_long.Assign(is_long ? 1 : 0);
|
||||||
|
id.is_null.Assign(1);
|
||||||
|
inst.SetDefinition<Id>(id);
|
||||||
|
}
|
||||||
return Register{PeekInst(inst)};
|
return Register{PeekInst(inst)};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -115,10 +130,12 @@ Id RegAlloc::Alloc(bool is_long) {
|
||||||
num_regs = std::max(num_regs, reg + 1);
|
num_regs = std::max(num_regs, reg + 1);
|
||||||
use[reg] = true;
|
use[reg] = true;
|
||||||
Id ret{};
|
Id ret{};
|
||||||
ret.index.Assign(static_cast<u32>(reg));
|
ret.is_valid.Assign(1);
|
||||||
ret.is_long.Assign(is_long ? 1 : 0);
|
ret.is_long.Assign(is_long ? 1 : 0);
|
||||||
ret.is_spill.Assign(0);
|
ret.is_spill.Assign(0);
|
||||||
ret.is_condition_code.Assign(0);
|
ret.is_condition_code.Assign(0);
|
||||||
|
ret.is_null.Assign(0);
|
||||||
|
ret.index.Assign(static_cast<u32>(reg));
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -126,6 +143,9 @@ Id RegAlloc::Alloc(bool is_long) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void RegAlloc::Free(Id id) {
|
void RegAlloc::Free(Id id) {
|
||||||
|
if (id.is_valid == 0) {
|
||||||
|
throw LogicError("Freeing invalid register");
|
||||||
|
}
|
||||||
if (id.is_spill != 0) {
|
if (id.is_spill != 0) {
|
||||||
throw NotImplementedException("Free spill");
|
throw NotImplementedException("Free spill");
|
||||||
}
|
}
|
||||||
|
|
|
@ -35,10 +35,12 @@ enum class Type : u32 {
|
||||||
struct Id {
|
struct Id {
|
||||||
union {
|
union {
|
||||||
u32 raw;
|
u32 raw;
|
||||||
BitField<0, 29, u32> index;
|
BitField<0, 1, u32> is_valid;
|
||||||
BitField<29, 1, u32> is_long;
|
BitField<1, 1, u32> is_long;
|
||||||
BitField<30, 1, u32> is_spill;
|
BitField<2, 1, u32> is_spill;
|
||||||
BitField<31, 1, u32> is_condition_code;
|
BitField<3, 1, u32> is_condition_code;
|
||||||
|
BitField<4, 1, u32> is_null;
|
||||||
|
BitField<5, 27, u32> index;
|
||||||
};
|
};
|
||||||
|
|
||||||
bool operator==(Id rhs) const noexcept {
|
bool operator==(Id rhs) const noexcept {
|
||||||
|
@ -164,12 +166,18 @@ auto FormatTo(FormatContext& ctx, Id id) {
|
||||||
throw NotImplementedException("Spill emission");
|
throw NotImplementedException("Spill emission");
|
||||||
}
|
}
|
||||||
if constexpr (scalar) {
|
if constexpr (scalar) {
|
||||||
|
if (id.is_null != 0) {
|
||||||
|
return fmt::format_to(ctx.out(), "{}", id.is_long != 0 ? "DC.x" : "RC.x");
|
||||||
|
}
|
||||||
if (id.is_long != 0) {
|
if (id.is_long != 0) {
|
||||||
return fmt::format_to(ctx.out(), "D{}.x", id.index.Value());
|
return fmt::format_to(ctx.out(), "D{}.x", id.index.Value());
|
||||||
} else {
|
} else {
|
||||||
return fmt::format_to(ctx.out(), "R{}.x", id.index.Value());
|
return fmt::format_to(ctx.out(), "R{}.x", id.index.Value());
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
if (id.is_null != 0) {
|
||||||
|
return fmt::format_to(ctx.out(), "{}", id.is_long != 0 ? "DC" : "RC");
|
||||||
|
}
|
||||||
if (id.is_long != 0) {
|
if (id.is_long != 0) {
|
||||||
return fmt::format_to(ctx.out(), "D{}", id.index.Value());
|
return fmt::format_to(ctx.out(), "D{}", id.index.Value());
|
||||||
} else {
|
} else {
|
||||||
|
|
Loading…
Reference in a new issue