mirror of
https://github.com/AsahiLinux/m1n1
synced 2024-11-10 01:34:12 +00:00
387cc2ecec
Signed-off-by: Hector Martin <marcan@marcan.st>
681 lines
21 KiB
C
681 lines
21 KiB
C
/* SPDX-License-Identifier: MIT */
|
|
|
|
#include "kboot.h"
|
|
#include "adt.h"
|
|
#include "assert.h"
|
|
#include "firmware.h"
|
|
#include "math.h"
|
|
#include "pmgr.h"
|
|
#include "soc.h"
|
|
#include "utils.h"
|
|
|
|
#include "libfdt/libfdt.h"
|
|
|
|
#define bail(...) \
|
|
do { \
|
|
printf(__VA_ARGS__); \
|
|
return -1; \
|
|
} while (0)
|
|
|
|
#define MAX_PSTATES 16
|
|
#define MAX_CLUSTERS 8
|
|
#define MAX_DIES 2
|
|
|
|
struct perf_state {
|
|
u32 freq;
|
|
u32 volt;
|
|
};
|
|
|
|
struct aux_perf_state {
|
|
u64 volt;
|
|
u64 freq;
|
|
};
|
|
|
|
struct aux_perf_states {
|
|
u64 dies;
|
|
u64 count;
|
|
struct aux_perf_state states[];
|
|
};
|
|
|
|
static int get_core_counts(u32 *count, u32 nclusters, u32 ncores)
|
|
{
|
|
u64 base;
|
|
pmgr_adt_power_enable("/arm-io/sgx");
|
|
|
|
int adt_sgx_path[8];
|
|
if (adt_path_offset_trace(adt, "/arm-io/sgx", adt_sgx_path) < 0)
|
|
bail("ADT: GPU: Failed to get sgx\n");
|
|
|
|
if (adt_get_reg(adt, adt_sgx_path, "reg", 0, &base, NULL) < 0)
|
|
bail("ADT: GPU: Failed to get sgx reg 0\n");
|
|
|
|
u32 cores[3] = {0, 0, 0};
|
|
|
|
switch (chip_id) {
|
|
case T6002:
|
|
cores[1] = read32(base + 0xd01514);
|
|
/* fallthrough */
|
|
case T8103:
|
|
case T8112:
|
|
case T6000:
|
|
case T6001:
|
|
cores[0] = read32(base + 0xd01500);
|
|
break;
|
|
case T6020:
|
|
case T6021:
|
|
case T6022:
|
|
cores[0] = read32(base + 0xe01500);
|
|
cores[1] = read32(base + 0xe01504);
|
|
cores[2] = read32(base + 0xe01508);
|
|
break;
|
|
}
|
|
|
|
for (u32 i = 0; i < nclusters; i++) {
|
|
count[i] = __builtin_popcount(cores[0] & MASK(ncores));
|
|
|
|
for (u32 j = 0; j < ARRAY_SIZE(cores); j++) {
|
|
cores[j] >>= ncores;
|
|
if (j < (ARRAY_SIZE(cores) - 1))
|
|
cores[j] |= cores[j + 1] << (32 - ncores);
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void adjust_leakage(float *val, u32 clusters, u32 *cores, u32 max, float uncore_fraction)
|
|
{
|
|
for (u32 i = 0; i < clusters; i++) {
|
|
float uncore = val[i] * uncore_fraction;
|
|
float core = val[i] - uncore;
|
|
|
|
val[i] = uncore + (cores[i] / (float)max) * core;
|
|
}
|
|
}
|
|
|
|
static void load_fuses(float *out, u32 count, u64 base, u32 start, u32 width, float scale,
|
|
float offset, bool flip)
|
|
{
|
|
for (u32 i = 0; i < count; i++) {
|
|
base += (start / 32) * 4;
|
|
start &= 31;
|
|
|
|
u32 low = read32(base);
|
|
u32 high = read32(base + 4);
|
|
u32 val = (((((u64)high) << 32) | low) >> start) & MASK(width);
|
|
|
|
float fval = (float)val * scale + offset;
|
|
|
|
if (flip)
|
|
out[count - i - 1] = fval;
|
|
else
|
|
out[i] = fval;
|
|
|
|
start += width;
|
|
}
|
|
}
|
|
|
|
static u32 t8103_pwr_scale[] = {0, 63, 80, 108, 150, 198, 210};
|
|
|
|
static int calc_power_t8103(u32 count, u32 table_count, const struct perf_state *core,
|
|
const struct perf_state *sram, const struct aux_perf_states *cs,
|
|
u32 *max_pwr, float *core_leak, float *sram_leak, float *cs_leak,
|
|
float *afr_leak)
|
|
{
|
|
UNUSED(sram);
|
|
UNUSED(cs);
|
|
UNUSED(core_leak);
|
|
UNUSED(sram_leak);
|
|
UNUSED(cs_leak);
|
|
UNUSED(afr_leak);
|
|
u32 *pwr_scale;
|
|
u32 pwr_scale_count;
|
|
u32 core_count;
|
|
u32 max_cores;
|
|
|
|
switch (chip_id) {
|
|
case T8103:
|
|
pwr_scale = t8103_pwr_scale;
|
|
pwr_scale_count = ARRAY_SIZE(t8103_pwr_scale);
|
|
max_cores = 8;
|
|
break;
|
|
default:
|
|
bail("ADT: GPU: Unsupported chip\n");
|
|
}
|
|
|
|
if (get_core_counts(&core_count, 1, max_cores))
|
|
return -1;
|
|
|
|
if (table_count != 1)
|
|
bail("ADT: GPU: expected 1 perf state table but got %d\n", table_count);
|
|
|
|
if (count != pwr_scale_count)
|
|
bail("ADT: GPU: expected %d perf states but got %d\n", pwr_scale_count, count);
|
|
|
|
for (u32 i = 0; i < pwr_scale_count; i++)
|
|
max_pwr[i] = (u32)core[i].volt * (u32)pwr_scale[i] * 100;
|
|
|
|
core_leak[0] = 1000.0;
|
|
sram_leak[0] = 45.0;
|
|
|
|
adjust_leakage(core_leak, 1, &core_count, max_cores, 0.12);
|
|
adjust_leakage(sram_leak, 1, &core_count, max_cores, 0.2);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int calc_power_t600x(u32 count, u32 table_count, const struct perf_state *core,
|
|
const struct perf_state *sram, const struct aux_perf_states *cs,
|
|
u32 *max_pwr, float *core_leak, float *sram_leak, float *cs_leak,
|
|
float *afr_leak)
|
|
{
|
|
float s_sram, k_sram, s_core, k_core, s_cs, k_cs;
|
|
float dk_core, dk_sram = 0, dk_cs = 0;
|
|
float imax = 1000;
|
|
|
|
u32 ndies = 1;
|
|
u32 nclusters = 0;
|
|
u32 ncores = 0;
|
|
u32 core_count[MAX_CLUSTERS];
|
|
|
|
bool simple_exps = false;
|
|
bool adjust_leakages = true;
|
|
bool has_cs = false;
|
|
|
|
switch (chip_id) {
|
|
case T6002:
|
|
ndies = 2;
|
|
nclusters += 4;
|
|
load_fuses(core_leak + 4, 4, 0x22922bc1b8, 25, 13, 2, 2, true);
|
|
load_fuses(sram_leak + 4, 4, 0x22922bc1cc, 4, 9, 1, 1, true);
|
|
// fallthrough
|
|
case T6001:
|
|
nclusters += 2;
|
|
case T6000:
|
|
nclusters += 2;
|
|
load_fuses(core_leak + 0, min(4, nclusters), 0x2922bc1b8, 25, 13, 2, 2, false);
|
|
load_fuses(sram_leak + 0, min(4, nclusters), 0x2922bc1cc, 4, 9, 1, 1, false);
|
|
|
|
s_sram = 4.3547606;
|
|
k_sram = 0.024927923;
|
|
// macOS difference: macOS uses a misbehaved piecewise function here
|
|
// Since it's obviously wrong, let's just use only the first component
|
|
s_core = 1.48461742;
|
|
k_core = 0.39013552;
|
|
dk_core = 1.06975;
|
|
dk_sram = 0.00625;
|
|
|
|
ncores = 8;
|
|
adjust_leakages = true;
|
|
imax = 26.0;
|
|
break;
|
|
case T8112:
|
|
nclusters = 1;
|
|
load_fuses(core_leak, 1, 0x23d2c84dc, 30, 13, 2, 2, false);
|
|
load_fuses(sram_leak, 1, 0x23d2c84b0, 15, 9, 1, 1, false);
|
|
|
|
s_sram = 3.61619841;
|
|
k_sram = 0.0529281;
|
|
// macOS difference: macOS uses a misbehaved piecewise function here
|
|
// Since it's obviously wrong, let's just use only the first component
|
|
s_core = 1.21356187;
|
|
k_core = 0.43328839;
|
|
dk_core = 0.983196;
|
|
dk_sram = 0.007828;
|
|
|
|
simple_exps = true;
|
|
ncores = 10;
|
|
adjust_leakages = false; // pre-adjusted?
|
|
imax = 24.0;
|
|
break;
|
|
case T6022:
|
|
ndies = 2;
|
|
nclusters += 4;
|
|
load_fuses(core_leak + 4, min(4, nclusters), 0x229e2cc1f8, 4, 13, 2, 2, true);
|
|
load_fuses(sram_leak + 4, min(4, nclusters), 0x229e2cc208, 19, 9, 1, 1, true);
|
|
load_fuses(cs_leak + 1, 1, 0x229e2cc204, 8, 12, 1, 1, false);
|
|
load_fuses(afr_leak + 1, 1, 0x229e2cc210, 0, 12, 1, 1, false);
|
|
|
|
// For some reason, this one is different on T6022...
|
|
dk_cs = 6.7;
|
|
// fallthrough
|
|
case T6021:
|
|
if (!dk_cs)
|
|
dk_cs = 4.492;
|
|
|
|
nclusters += 4;
|
|
s_sram = 5.808;
|
|
k_sram = 0.00707;
|
|
// macOS difference: macOS uses a misbehaved piecewise function here
|
|
// Since it's obviously wrong, let's just use only the first component
|
|
s_core = 1.24554153;
|
|
k_core = 0.56203084;
|
|
|
|
s_cs = 1.87;
|
|
k_cs = 0.162;
|
|
|
|
goto t602x;
|
|
|
|
case T6020:
|
|
nclusters = 2;
|
|
s_sram = 5.02191218;
|
|
k_sram = 0.0145621013;
|
|
// macOS difference: macOS uses a misbehaved piecewise function here
|
|
// Since it's obviously wrong, let's just use only the first component
|
|
s_core = 1.21006932;
|
|
k_core = 0.52776378;
|
|
|
|
s_cs = 1.8;
|
|
k_cs = 0.162;
|
|
dk_cs = 1.889;
|
|
|
|
t602x:
|
|
dk_core = 1.00075;
|
|
dk_sram = 0.00785;
|
|
load_fuses(core_leak + 0, min(4, nclusters), 0x29e2cc1f8, 4, 13, 2, 2, false);
|
|
load_fuses(sram_leak + 0, min(4, nclusters), 0x29e2cc208, 19, 9, 1, 1, false);
|
|
load_fuses(cs_leak + 0, 1, 0x29e2cc204, 8, 12, 1, 1, false);
|
|
load_fuses(afr_leak + 0, 1, 0x29e2cc210, 0, 12, 1, 1, false);
|
|
|
|
simple_exps = true;
|
|
ncores = 10;
|
|
adjust_leakages = false; // pre-adjusted?
|
|
imax = 33.0;
|
|
has_cs = true;
|
|
break;
|
|
|
|
default:
|
|
bail("ADT: GPU: Unsupported chip\n");
|
|
}
|
|
|
|
if (get_core_counts(core_count, nclusters, ncores))
|
|
return -1;
|
|
|
|
printf("FDT: GPU: Core counts: ");
|
|
for (u32 i = 0; i < nclusters; i++) {
|
|
printf("%d ", core_count[i]);
|
|
}
|
|
printf("\n");
|
|
|
|
if (adjust_leakages) {
|
|
adjust_leakage(core_leak, nclusters, core_count, ncores, 0.0825);
|
|
adjust_leakage(sram_leak, nclusters, core_count, ncores, 0.2247);
|
|
}
|
|
|
|
if (table_count != nclusters)
|
|
bail("ADT: GPU: expected %d perf state tables but got %d\n", nclusters, table_count);
|
|
|
|
if (has_cs && (!cs || !cs_leak)) {
|
|
bail("ADT: GPU: expected CS perf table, but not found\n");
|
|
}
|
|
|
|
max_pwr[0] = 0;
|
|
|
|
for (u32 i = 1; i < count; i++) {
|
|
u32 total_mw = 0;
|
|
|
|
for (u32 j = 0; j < nclusters; j++) {
|
|
// macOS difference: macOS truncates Hz to integer MHz before doing this math.
|
|
// That's probably wrong, so let's not do that.
|
|
|
|
float mw = 0;
|
|
size_t idx = j * count + i;
|
|
|
|
mw += sram[idx].volt / 1000.f * sram_leak[j] * k_sram *
|
|
expf(sram[idx].volt / 1000.f * s_sram);
|
|
mw += core[idx].volt / 1000.f * core_leak[j] * k_core *
|
|
expf(core[idx].volt / 1000.f * s_core);
|
|
|
|
float sbase = sram[idx].volt / 750.f;
|
|
float sram_v_p;
|
|
if (simple_exps)
|
|
sram_v_p = sbase * sbase; // v ^ 2
|
|
else
|
|
sram_v_p = sbase * sbase * sbase; // v ^ 3
|
|
mw += dk_sram * core_count[j] * (sram[idx].freq / 1000000.f) * sram_v_p;
|
|
|
|
float cbase = core[idx].volt / 750.f;
|
|
float core_v_p;
|
|
if (simple_exps || core[idx].volt < 750)
|
|
core_v_p = cbase * cbase; // v ^ 2
|
|
else
|
|
core_v_p = cbase * cbase * cbase; // v ^ 3
|
|
mw += dk_core * core_count[j] * (core[idx].freq / 1000000.f) * core_v_p;
|
|
|
|
if (mw > imax * core[idx].volt)
|
|
mw = imax * core[idx].volt;
|
|
|
|
total_mw += mw;
|
|
}
|
|
|
|
// CS gets added after the imax limit
|
|
|
|
if (has_cs) {
|
|
for (u32 j = 0; j < ndies; j++) {
|
|
float mw = 0;
|
|
|
|
int csi = j * cs->count + min(i, cs->count - 1);
|
|
u32 cs_mv = cs->states[csi].volt / 1000;
|
|
u32 cs_hz = cs->states[csi].freq;
|
|
|
|
mw += cs_mv / 1000.f * cs_leak[j] * k_cs * expf(cs_mv / 1000.f * s_cs);
|
|
float csbase = cs_mv / 750.f;
|
|
float cs_v_p = powf(csbase, 1.8);
|
|
mw += dk_cs * (cs_hz / 1000000.f) * cs_v_p;
|
|
|
|
total_mw += mw;
|
|
}
|
|
}
|
|
|
|
max_pwr[i] = total_mw * 1000;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int dt_set_region(void *dt, int sgx, const char *name, const char *path)
|
|
{
|
|
u64 base, size;
|
|
char prop[64];
|
|
|
|
snprintf(prop, sizeof(prop), "%s-base", name);
|
|
if (ADT_GETPROP(adt, sgx, prop, &base) < 0 || !base)
|
|
bail("ADT: GPU: failed to find %s property\n", prop);
|
|
|
|
snprintf(prop, sizeof(prop), "%s-size", name);
|
|
if (ADT_GETPROP(adt, sgx, prop, &size) < 0 || !base)
|
|
bail("ADT: GPU: failed to find %s property\n", prop);
|
|
|
|
int node = fdt_path_offset(dt, path);
|
|
if (node < 0)
|
|
bail("FDT: GPU: failed to find %s node\n", path);
|
|
|
|
fdt64_t reg[2];
|
|
|
|
fdt64_st(®[0], base);
|
|
fdt64_st(®[1], size);
|
|
|
|
if (fdt_setprop_inplace(dt, node, "reg", reg, sizeof(reg)))
|
|
bail("FDT: GPU: failed to set reg prop for %s\n", path);
|
|
|
|
if (fdt_setprop_empty(dt, node, "no-map"))
|
|
bail("FDT: GPU: failed to set no-map prop for %s\n", path);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int fdt_set_float_array(void *dt, int node, const char *name, float *val, int count)
|
|
{
|
|
fdt32_t data[MAX_CLUSTERS];
|
|
|
|
if (count > MAX_CLUSTERS)
|
|
bail("FDT: GPU: fdt_set_float_array() with too many values\n");
|
|
|
|
memcpy(data, val, sizeof(float) * count);
|
|
for (int i = 0; i < count; i++) {
|
|
data[i] = cpu_to_fdt32(data[i]);
|
|
}
|
|
|
|
if (fdt_setprop_inplace(dt, node, name, data, sizeof(u32) * count))
|
|
bail("FDT: GPU: Failed to set %s\n", name);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int fdt_set_aux_opp(void *dt, int gpu, const char *prop, const struct aux_perf_states *ps,
|
|
u32 dies)
|
|
{
|
|
int len;
|
|
const fdt32_t *opps_ph = fdt_getprop(dt, gpu, prop, &len);
|
|
if (!opps_ph || len != 4)
|
|
bail("FDT: GPU: %s not found\n", prop);
|
|
|
|
int opps = fdt_node_offset_by_phandle(dt, fdt32_ld(opps_ph));
|
|
if (opps < 0)
|
|
bail("FDT: GPU: node for phandle %u not found\n", fdt32_ld(opps_ph));
|
|
|
|
u32 count = ps->count;
|
|
|
|
u32 i = 0;
|
|
int opp;
|
|
fdt_for_each_subnode(opp, dt, opps)
|
|
{
|
|
fdt32_t volts[MAX_DIES];
|
|
|
|
for (u32 j = 0; j < dies; j++) {
|
|
volts[j] = cpu_to_fdt32(ps->states[i + j * ps->count].volt);
|
|
}
|
|
|
|
if (i >= count)
|
|
bail("FDT: GPU: Expected %d operating points, but found more\n", count);
|
|
|
|
if (fdt_setprop_inplace(dt, opp, "opp-microvolt", &volts, sizeof(u32) * dies))
|
|
bail("FDT: GPU: Failed to set opp-microvolt for aux PS %d\n", i);
|
|
|
|
if (fdt_setprop_inplace_u64(dt, opp, "opp-hz", ps->states[i].freq))
|
|
bail("FDT: GPU: Failed to set opp-hz for PS %d\n", i);
|
|
|
|
i++;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int dt_set_gpu(void *dt)
|
|
{
|
|
bool has_cs_afr = false;
|
|
int (*calc_power)(u32 count, u32 table_count, const struct perf_state *core,
|
|
const struct perf_state *sram, const struct aux_perf_states *cs, u32 *max_pwr,
|
|
float *core_leak, float *sram_leak, float *cs_leak, float *afr_leak);
|
|
|
|
u32 dies = 1;
|
|
|
|
printf("FDT: GPU: Initializing GPU info\n");
|
|
|
|
switch (chip_id) {
|
|
case T8103:
|
|
calc_power = calc_power_t8103;
|
|
break;
|
|
case T6022:
|
|
dies = 2;
|
|
// fallthrough
|
|
case T6021:
|
|
case T6020:
|
|
has_cs_afr = true;
|
|
calc_power = calc_power_t600x;
|
|
break;
|
|
case T6002:
|
|
dies = 2;
|
|
// fallthrough
|
|
case T6001:
|
|
case T6000:
|
|
case T8112:
|
|
calc_power = calc_power_t600x;
|
|
break;
|
|
default:
|
|
printf("ADT: GPU: unsupported chip!\n");
|
|
return 0;
|
|
}
|
|
|
|
int gpu = fdt_path_offset(dt, "gpu");
|
|
if (gpu < 0) {
|
|
printf("FDT: GPU: gpu alias not found in device tree\n");
|
|
return 0;
|
|
}
|
|
|
|
int len;
|
|
const fdt32_t *opps_ph = fdt_getprop(dt, gpu, "operating-points-v2", &len);
|
|
if (!opps_ph || len != 4)
|
|
bail("FDT: GPU: operating-points-v2 not found\n");
|
|
|
|
int opps = fdt_node_offset_by_phandle(dt, fdt32_ld(opps_ph));
|
|
if (opps < 0)
|
|
bail("FDT: GPU: node for phandle %u not found\n", fdt32_ld(opps_ph));
|
|
|
|
int sgx = adt_path_offset(adt, "/arm-io/sgx");
|
|
if (sgx < 0)
|
|
bail("ADT: GPU: /arm-io/sgx node not found\n");
|
|
|
|
u32 perf_state_count;
|
|
if (ADT_GETPROP(adt, sgx, "perf-state-count", &perf_state_count) < 0 || !perf_state_count)
|
|
bail("ADT: GPU: missing perf-state-count\n");
|
|
|
|
u32 perf_state_table_count;
|
|
if (ADT_GETPROP(adt, sgx, "perf-state-table-count", &perf_state_table_count) < 0 ||
|
|
!perf_state_table_count)
|
|
bail("ADT: GPU: missing perf-state-table-count\n");
|
|
|
|
if (perf_state_count > MAX_PSTATES)
|
|
bail("ADT: GPU: perf-state-count too large\n");
|
|
|
|
if (perf_state_table_count > MAX_CLUSTERS)
|
|
bail("ADT: GPU: perf-state-table-count too large\n");
|
|
|
|
u32 perf_states_len;
|
|
const struct perf_state *perf_states, *perf_states_sram;
|
|
const struct aux_perf_states *perf_states_afr, *perf_states_cs;
|
|
|
|
perf_states = adt_getprop(adt, sgx, "perf-states", &perf_states_len);
|
|
if (!perf_states ||
|
|
perf_states_len != sizeof(*perf_states) * perf_state_count * perf_state_table_count)
|
|
bail("ADT: GPU: invalid perf-states length\n");
|
|
|
|
perf_states_sram = adt_getprop(adt, sgx, "perf-states-sram", &perf_states_len);
|
|
if (perf_states_sram &&
|
|
perf_states_len != sizeof(*perf_states) * perf_state_count * perf_state_table_count)
|
|
bail("ADT: GPU: invalid perf-states-sram length\n");
|
|
|
|
perf_states_afr = adt_getprop(adt, sgx, "afr-perf-states", NULL);
|
|
perf_states_cs = adt_getprop(adt, sgx, "cs-perf-states", NULL);
|
|
|
|
if (has_cs_afr && !perf_states_cs)
|
|
bail("ADT: GPU: cs-perf-states not found\n");
|
|
|
|
if (has_cs_afr && !perf_states_afr)
|
|
bail("ADT: GPU: afr-perf-states not found\n");
|
|
|
|
u32 max_pwr[MAX_PSTATES];
|
|
float core_leak[MAX_CLUSTERS];
|
|
float sram_leak[MAX_CLUSTERS];
|
|
float cs_leak[MAX_DIES];
|
|
float afr_leak[MAX_DIES];
|
|
|
|
if (calc_power(perf_state_count, perf_state_table_count, perf_states, perf_states_sram,
|
|
perf_states_cs, max_pwr, core_leak, sram_leak, cs_leak, afr_leak))
|
|
return -1;
|
|
|
|
printf("FDT: GPU: Max power table: ");
|
|
for (u32 i = 0; i < perf_state_count; i++) {
|
|
printf("%d ", max_pwr[i]);
|
|
}
|
|
printf("\nFDT: GPU: Core leakage table: ");
|
|
for (u32 i = 0; i < perf_state_table_count; i++) {
|
|
printf("%d.%03d ", (int)core_leak[i], ((int)(core_leak[i] * 1000) % 1000));
|
|
}
|
|
printf("\nFDT: GPU: SRAM leakage table: ");
|
|
for (u32 i = 0; i < perf_state_table_count; i++) {
|
|
printf("%d.%03d ", (int)sram_leak[i], ((int)(sram_leak[i] * 1000) % 1000));
|
|
}
|
|
printf("\n");
|
|
|
|
if (fdt_set_float_array(dt, gpu, "apple,core-leak-coef", core_leak, perf_state_table_count))
|
|
return -1;
|
|
|
|
if (fdt_set_float_array(dt, gpu, "apple,sram-leak-coef", sram_leak, perf_state_table_count))
|
|
return -1;
|
|
|
|
u32 i = 0;
|
|
int opp;
|
|
fdt_for_each_subnode(opp, dt, opps)
|
|
{
|
|
fdt32_t volts[MAX_CLUSTERS];
|
|
|
|
for (u32 j = 0; j < perf_state_table_count; j++) {
|
|
volts[j] = cpu_to_fdt32(perf_states[i + j * perf_state_count].volt * 1000);
|
|
}
|
|
|
|
if (i >= perf_state_count)
|
|
bail("FDT: GPU: Expected %d operating points, but found more\n", perf_state_count);
|
|
|
|
if (fdt_setprop_inplace(dt, opp, "opp-microvolt", &volts,
|
|
sizeof(u32) * perf_state_table_count))
|
|
bail("FDT: GPU: Failed to set opp-microvolt for PS %d\n", i);
|
|
|
|
if (fdt_setprop_inplace_u64(dt, opp, "opp-hz", perf_states[i].freq))
|
|
bail("FDT: GPU: Failed to set opp-hz for PS %d\n", i);
|
|
|
|
if (fdt_setprop_inplace_u32(dt, opp, "opp-microwatt", max_pwr[i]))
|
|
bail("FDT: GPU: Failed to set opp-microwatt for PS %d\n", i);
|
|
|
|
i++;
|
|
}
|
|
|
|
if (i != perf_state_count)
|
|
bail("FDT: GPU: Expected %d operating points, but found %d\n", perf_state_count, i);
|
|
|
|
if (has_cs_afr) {
|
|
int ret = fdt_set_aux_opp(dt, gpu, "apple,cs-opp", perf_states_cs, dies);
|
|
if (ret)
|
|
return ret;
|
|
|
|
if (fdt_set_float_array(dt, gpu, "apple,cs-leak-coef", cs_leak, dies))
|
|
return -1;
|
|
|
|
printf("FDT: GPU: CS leakage table: ");
|
|
for (u32 i = 0; i < dies; i++) {
|
|
printf("%d.%03d ", (int)cs_leak[i], ((int)(cs_leak[i] * 1000) % 1000));
|
|
}
|
|
printf("\n");
|
|
}
|
|
|
|
if (has_cs_afr) {
|
|
int ret = fdt_set_aux_opp(dt, gpu, "apple,afr-opp", perf_states_afr, dies);
|
|
if (ret)
|
|
return ret;
|
|
if (fdt_set_float_array(dt, gpu, "apple,afr-leak-coef", afr_leak, dies))
|
|
return -1;
|
|
|
|
printf("FDT: GPU: AFR leakage table: ");
|
|
for (u32 i = 0; i < dies; i++) {
|
|
printf("%d.%03d ", (int)afr_leak[i], ((int)(afr_leak[i] * 1000) % 1000));
|
|
}
|
|
printf("\n");
|
|
}
|
|
|
|
if (dt_set_region(dt, sgx, "gfx-handoff", "/reserved-memory/uat-handoff"))
|
|
return -1;
|
|
if (dt_set_region(dt, sgx, "gfx-shared-region", "/reserved-memory/uat-pagetables"))
|
|
return -1;
|
|
if (dt_set_region(dt, sgx, "gpu-region", "/reserved-memory/uat-ttbs"))
|
|
return -1;
|
|
|
|
// refresh gpu dt node offset after modifying the dt in dt_set_region()
|
|
gpu = fdt_path_offset(dt, "gpu");
|
|
if (gpu < 0) {
|
|
printf("FDT: GPU: gpu alias not found in device tree\n");
|
|
return 0;
|
|
}
|
|
|
|
if (firmware_set_fdt(dt, gpu, "apple,firmware-version", &os_firmware))
|
|
return -1;
|
|
|
|
const struct fw_version_info *compat;
|
|
|
|
switch (os_firmware.version) {
|
|
case V12_3_1:
|
|
compat = &fw_versions[V12_3];
|
|
break;
|
|
case V13_5B4:
|
|
case V13_6_2:
|
|
compat = &fw_versions[V13_5];
|
|
break;
|
|
default:
|
|
compat = &os_firmware;
|
|
break;
|
|
}
|
|
|
|
if (firmware_set_fdt(dt, gpu, "apple,firmware-compat", compat))
|
|
return -1;
|
|
|
|
return 0;
|
|
}
|