mirror of
https://github.com/AsahiLinux/u-boot
synced 2025-01-11 20:59:01 +00:00
61674a17bc
This Octeon 3 DDR driver is ported from the 2013 Cavium / Marvell U-Boot repository. It currently supports DDR4 on Octeon 3. It can be later extended to support also DDR3 and Octeon 2 platforms. Part 2 includes the very complex Octeon 3 DDR4 configuration Signed-off-by: Aaron Williams <awilliams@marvell.com> Signed-off-by: Stefan Roese <sr@denx.de>
11030 lines
326 KiB
C
11030 lines
326 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
||
/*
|
||
* Copyright (C) 2020 Marvell International Ltd.
|
||
*/
|
||
|
||
#include <command.h>
|
||
#include <dm.h>
|
||
#include <hang.h>
|
||
#include <i2c.h>
|
||
#include <ram.h>
|
||
#include <time.h>
|
||
|
||
#include <linux/bitops.h>
|
||
#include <linux/io.h>
|
||
|
||
#include <mach/octeon_ddr.h>
|
||
|
||
/* Random number generator stuff */
|
||
|
||
#define CVMX_RNM_CTL_STATUS 0x0001180040000000
|
||
#define CVMX_OCT_DID_RNG 8ULL
|
||
|
||
static u64 cvmx_build_io_address(u64 major_did, u64 sub_did)
|
||
{
|
||
return ((0x1ull << 48) | (major_did << 43) | (sub_did << 40));
|
||
}
|
||
|
||
static u64 cvmx_rng_get_random64(void)
|
||
{
|
||
return csr_rd(cvmx_build_io_address(CVMX_OCT_DID_RNG, 0));
|
||
}
|
||
|
||
static void cvmx_rng_enable(void)
|
||
{
|
||
u64 val;
|
||
|
||
val = csr_rd(CVMX_RNM_CTL_STATUS);
|
||
val |= BIT(0) | BIT(1);
|
||
csr_wr(CVMX_RNM_CTL_STATUS, val);
|
||
}
|
||
|
||
#define RLEVEL_PRINTALL_DEFAULT 1
|
||
#define WLEVEL_PRINTALL_DEFAULT 1
|
||
|
||
/*
|
||
* Define how many HW WL samples to take for majority voting.
|
||
* MUST BE odd!!
|
||
* Assume there should only be 2 possible values that will show up,
|
||
* so treat ties as a problem!!!
|
||
* NOTE: Do not change this without checking the code!!!
|
||
*/
|
||
#define WLEVEL_LOOPS_DEFAULT 5
|
||
|
||
#define ENABLE_COMPUTED_VREF_ADJUSTMENT 1
|
||
#define SW_WLEVEL_HW_DEFAULT 1
|
||
#define DEFAULT_BEST_RANK_SCORE 9999999
|
||
#define MAX_RANK_SCORE_LIMIT 99
|
||
|
||
/*
|
||
* Define how many HW RL samples per rank to take multiple samples will
|
||
* allow looking for the best sample score
|
||
*/
|
||
#define RLEVEL_SAMPLES_DEFAULT 3
|
||
|
||
#define ddr_seq_print(format, ...) do {} while (0)
|
||
|
||
struct wlevel_bitcnt {
|
||
int bitcnt[4];
|
||
};
|
||
|
||
static void display_dac_dbi_settings(int lmc, int dac_or_dbi,
|
||
int ecc_ena, int *settings, char *title);
|
||
|
||
static unsigned short load_dac_override(struct ddr_priv *priv, int if_num,
|
||
int dac_value, int byte);
|
||
|
||
/* "mode" arg */
|
||
#define DBTRAIN_TEST 0
|
||
#define DBTRAIN_DBI 1
|
||
#define DBTRAIN_LFSR 2
|
||
|
||
static int run_best_hw_patterns(struct ddr_priv *priv, int lmc, u64 phys_addr,
|
||
int mode, u64 *xor_data);
|
||
|
||
#define LMC_DDR3_RESET_ASSERT 0
|
||
#define LMC_DDR3_RESET_DEASSERT 1
|
||
|
||
static void cn7xxx_lmc_ddr3_reset(struct ddr_priv *priv, int if_num, int reset)
|
||
{
|
||
union cvmx_lmcx_reset_ctl reset_ctl;
|
||
|
||
/*
|
||
* 4. Deassert DDRn_RESET_L pin by writing
|
||
* LMC(0..3)_RESET_CTL[DDR3RST] = 1
|
||
* without modifying any other LMC(0..3)_RESET_CTL fields.
|
||
* 5. Read LMC(0..3)_RESET_CTL and wait for the result.
|
||
* 6. Wait a minimum of 500us. This guarantees the necessary T = 500us
|
||
* delay between DDRn_RESET_L deassertion and DDRn_DIMM*_CKE*
|
||
* assertion.
|
||
*/
|
||
debug("LMC%d %s DDR_RESET_L\n", if_num,
|
||
(reset ==
|
||
LMC_DDR3_RESET_DEASSERT) ? "De-asserting" : "Asserting");
|
||
|
||
reset_ctl.u64 = lmc_rd(priv, CVMX_LMCX_RESET_CTL(if_num));
|
||
reset_ctl.cn78xx.ddr3rst = reset;
|
||
lmc_wr(priv, CVMX_LMCX_RESET_CTL(if_num), reset_ctl.u64);
|
||
|
||
lmc_rd(priv, CVMX_LMCX_RESET_CTL(if_num));
|
||
|
||
udelay(500);
|
||
}
|
||
|
||
static void perform_lmc_reset(struct ddr_priv *priv, int node, int if_num)
|
||
{
|
||
/*
|
||
* 5.9.6 LMC RESET Initialization
|
||
*
|
||
* The purpose of this step is to assert/deassert the RESET# pin at the
|
||
* DDR3/DDR4 parts.
|
||
*
|
||
* This LMC RESET step is done for all enabled LMCs.
|
||
*
|
||
* It may be appropriate to skip this step if the DDR3/DDR4 DRAM parts
|
||
* are in self refresh and are currently preserving their
|
||
* contents. (Software can determine this via
|
||
* LMC(0..3)_RESET_CTL[DDR3PSV] in some circumstances.) The remainder of
|
||
* this section assumes that the DRAM contents need not be preserved.
|
||
*
|
||
* The remainder of this section assumes that the CN78XX DDRn_RESET_L
|
||
* pin is attached to the RESET# pin of the attached DDR3/DDR4 parts,
|
||
* as will be appropriate in many systems.
|
||
*
|
||
* (In other systems, such as ones that can preserve DDR3/DDR4 part
|
||
* contents while CN78XX is powered down, it will not be appropriate to
|
||
* directly attach the CN78XX DDRn_RESET_L pin to DRESET# of the
|
||
* DDR3/DDR4 parts, and this section may not apply.)
|
||
*
|
||
* The remainder of this section describes the sequence for LMCn.
|
||
*
|
||
* Perform the following six substeps for LMC reset initialization:
|
||
*
|
||
* 1. If not done already, assert DDRn_RESET_L pin by writing
|
||
* LMC(0..3)_RESET_ CTL[DDR3RST] = 0 without modifying any other
|
||
* LMC(0..3)_RESET_CTL fields.
|
||
*/
|
||
|
||
if (!ddr_memory_preserved(priv)) {
|
||
/*
|
||
* 2. Read LMC(0..3)_RESET_CTL and wait for the result.
|
||
*/
|
||
|
||
lmc_rd(priv, CVMX_LMCX_RESET_CTL(if_num));
|
||
|
||
/*
|
||
* 3. Wait until RESET# assertion-time requirement from JEDEC
|
||
* DDR3/DDR4 specification is satisfied (200 us during a
|
||
* power-on ramp, 100ns when power is already stable).
|
||
*/
|
||
|
||
udelay(200);
|
||
|
||
/*
|
||
* 4. Deassert DDRn_RESET_L pin by writing
|
||
* LMC(0..3)_RESET_CTL[DDR3RST] = 1
|
||
* without modifying any other LMC(0..3)_RESET_CTL fields.
|
||
* 5. Read LMC(0..3)_RESET_CTL and wait for the result.
|
||
* 6. Wait a minimum of 500us. This guarantees the necessary
|
||
* T = 500us delay between DDRn_RESET_L deassertion and
|
||
* DDRn_DIMM*_CKE* assertion.
|
||
*/
|
||
cn7xxx_lmc_ddr3_reset(priv, if_num, LMC_DDR3_RESET_DEASSERT);
|
||
|
||
/* Toggle Reset Again */
|
||
/* That is, assert, then de-assert, one more time */
|
||
cn7xxx_lmc_ddr3_reset(priv, if_num, LMC_DDR3_RESET_ASSERT);
|
||
cn7xxx_lmc_ddr3_reset(priv, if_num, LMC_DDR3_RESET_DEASSERT);
|
||
}
|
||
}
|
||
|
||
void oct3_ddr3_seq(struct ddr_priv *priv, int rank_mask, int if_num,
|
||
int sequence)
|
||
{
|
||
/*
|
||
* 3. Without changing any other fields in LMC(0)_CONFIG, write
|
||
* LMC(0)_CONFIG[RANKMASK] then write both
|
||
* LMC(0)_SEQ_CTL[SEQ_SEL,INIT_START] = 1 with a single CSR write
|
||
* operation. LMC(0)_CONFIG[RANKMASK] bits should be set to indicate
|
||
* the ranks that will participate in the sequence.
|
||
*
|
||
* The LMC(0)_SEQ_CTL[SEQ_SEL] value should select power-up/init or
|
||
* selfrefresh exit, depending on whether the DRAM parts are in
|
||
* self-refresh and whether their contents should be preserved. While
|
||
* LMC performs these sequences, it will not perform any other DDR3
|
||
* transactions. When the sequence is complete, hardware sets the
|
||
* LMC(0)_CONFIG[INIT_STATUS] bits for the ranks that have been
|
||
* initialized.
|
||
*
|
||
* If power-up/init is selected immediately following a DRESET
|
||
* assertion, LMC executes the sequence described in the "Reset and
|
||
* Initialization Procedure" section of the JEDEC DDR3
|
||
* specification. This includes activating CKE, writing all four DDR3
|
||
* mode registers on all selected ranks, and issuing the required
|
||
* ZQCL
|
||
* command. The LMC(0)_CONFIG[RANKMASK] value should select all ranks
|
||
* with attached DRAM in this case. If LMC(0)_CONTROL[RDIMM_ENA] = 1,
|
||
* LMC writes the JEDEC standard SSTE32882 control words selected by
|
||
* LMC(0)_DIMM_CTL[DIMM*_WMASK] between DDR_CKE* signal assertion and
|
||
* the first DDR3 mode register write operation.
|
||
* LMC(0)_DIMM_CTL[DIMM*_WMASK] should be cleared to 0 if the
|
||
* corresponding DIMM is not present.
|
||
*
|
||
* If self-refresh exit is selected, LMC executes the required SRX
|
||
* command followed by a refresh and ZQ calibration. Section 4.5
|
||
* describes behavior of a REF + ZQCS. LMC does not write the DDR3
|
||
* mode registers as part of this sequence, and the mode register
|
||
* parameters must match at self-refresh entry and exit times.
|
||
*
|
||
* 4. Read LMC(0)_SEQ_CTL and wait for LMC(0)_SEQ_CTL[SEQ_COMPLETE]
|
||
* to be set.
|
||
*
|
||
* 5. Read LMC(0)_CONFIG[INIT_STATUS] and confirm that all ranks have
|
||
* been initialized.
|
||
*/
|
||
|
||
union cvmx_lmcx_seq_ctl seq_ctl;
|
||
union cvmx_lmcx_config lmc_config;
|
||
int timeout;
|
||
|
||
lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
|
||
lmc_config.s.rankmask = rank_mask;
|
||
lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), lmc_config.u64);
|
||
|
||
seq_ctl.u64 = 0;
|
||
|
||
seq_ctl.s.init_start = 1;
|
||
seq_ctl.s.seq_sel = sequence;
|
||
|
||
ddr_seq_print
|
||
("Performing LMC sequence: rank_mask=0x%02x, sequence=0x%x, %s\n",
|
||
rank_mask, sequence, sequence_str[sequence]);
|
||
|
||
if (seq_ctl.s.seq_sel == 3)
|
||
debug("LMC%d: Exiting Self-refresh Rank_mask:%x\n", if_num,
|
||
rank_mask);
|
||
|
||
lmc_wr(priv, CVMX_LMCX_SEQ_CTL(if_num), seq_ctl.u64);
|
||
lmc_rd(priv, CVMX_LMCX_SEQ_CTL(if_num));
|
||
|
||
timeout = 100;
|
||
do {
|
||
udelay(100); /* Wait a while */
|
||
seq_ctl.u64 = lmc_rd(priv, CVMX_LMCX_SEQ_CTL(if_num));
|
||
if (--timeout == 0) {
|
||
printf("Sequence %d timed out\n", sequence);
|
||
break;
|
||
}
|
||
} while (seq_ctl.s.seq_complete != 1);
|
||
|
||
ddr_seq_print(" LMC sequence=%x: Completed.\n", sequence);
|
||
}
|
||
|
||
#define bdk_numa_get_address(n, p) ((p) | ((u64)n) << CVMX_NODE_MEM_SHIFT)
|
||
#define AREA_BASE_OFFSET BIT_ULL(26)
|
||
|
||
static int test_dram_byte64(struct ddr_priv *priv, int lmc, u64 p,
|
||
u64 bitmask, u64 *xor_data)
|
||
{
|
||
u64 p1, p2, d1, d2;
|
||
u64 v, v1;
|
||
u64 p2offset = (1ULL << 26); // offset to area 2
|
||
u64 datamask;
|
||
u64 xor;
|
||
u64 i, j, k;
|
||
u64 ii;
|
||
int errors = 0;
|
||
//u64 index;
|
||
u64 pattern1 = cvmx_rng_get_random64();
|
||
u64 pattern2 = 0;
|
||
u64 bad_bits[2] = { 0, 0 };
|
||
int kbitno = (octeon_is_cpuid(OCTEON_CN7XXX)) ? 20 : 18;
|
||
union cvmx_l2c_ctl l2c_ctl;
|
||
int burst;
|
||
int saved_dissblkdty;
|
||
int node = 0;
|
||
|
||
// Force full cacheline write-backs to boost traffic
|
||
l2c_ctl.u64 = l2c_rd(priv, CVMX_L2C_CTL);
|
||
saved_dissblkdty = l2c_ctl.cn78xx.dissblkdty;
|
||
l2c_ctl.cn78xx.dissblkdty = 1;
|
||
l2c_wr(priv, CVMX_L2C_CTL, l2c_ctl.u64);
|
||
|
||
if (octeon_is_cpuid(OCTEON_CN73XX) || octeon_is_cpuid(OCTEON_CNF75XX))
|
||
kbitno = 18;
|
||
|
||
// Byte lanes may be clear in the mask to indicate no testing on that
|
||
//lane.
|
||
datamask = bitmask;
|
||
|
||
/*
|
||
* Add offset to both test regions to not clobber boot stuff
|
||
* when running from L2 for NAND boot.
|
||
*/
|
||
p += AREA_BASE_OFFSET; // make sure base is out of the way of boot
|
||
|
||
// final address must include LMC and node
|
||
p |= (lmc << 7); /* Map address into proper interface */
|
||
p = bdk_numa_get_address(node, p); /* Map to node */
|
||
p |= 1ull << 63;
|
||
|
||
#define II_INC BIT_ULL(22)
|
||
#define II_MAX BIT_ULL(22)
|
||
#define K_INC BIT_ULL(14)
|
||
#define K_MAX BIT_ULL(kbitno)
|
||
#define J_INC BIT_ULL(9)
|
||
#define J_MAX BIT_ULL(12)
|
||
#define I_INC BIT_ULL(3)
|
||
#define I_MAX BIT_ULL(7)
|
||
|
||
debug("N%d.LMC%d: %s: phys_addr=0x%llx/0x%llx (0x%llx)\n",
|
||
node, lmc, __func__, p, p + p2offset, 1ULL << kbitno);
|
||
|
||
// loops are ordered so that only a single 64-bit slot is written to
|
||
// each cacheline at one time, then the cachelines are forced out;
|
||
// this should maximize read/write traffic
|
||
|
||
// FIXME? extend the range of memory tested!!
|
||
for (ii = 0; ii < II_MAX; ii += II_INC) {
|
||
for (i = 0; i < I_MAX; i += I_INC) {
|
||
for (k = 0; k < K_MAX; k += K_INC) {
|
||
for (j = 0; j < J_MAX; j += J_INC) {
|
||
p1 = p + ii + k + j;
|
||
p2 = p1 + p2offset;
|
||
|
||
v = pattern1 * (p1 + i);
|
||
// write the same thing to both areas
|
||
v1 = v;
|
||
|
||
cvmx_write64_uint64(p1 + i, v);
|
||
cvmx_write64_uint64(p2 + i, v1);
|
||
|
||
CVMX_CACHE_WBIL2(p1, 0);
|
||
CVMX_CACHE_WBIL2(p2, 0);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
CVMX_DCACHE_INVALIDATE;
|
||
|
||
debug("N%d.LMC%d: dram_tuning_mem_xor: done INIT loop\n", node, lmc);
|
||
|
||
/* Make a series of passes over the memory areas. */
|
||
|
||
for (burst = 0; burst < 1 /* was: dram_tune_use_bursts */ ; burst++) {
|
||
u64 this_pattern = cvmx_rng_get_random64();
|
||
|
||
pattern2 ^= this_pattern;
|
||
|
||
/*
|
||
* XOR the data with a random value, applying the change to both
|
||
* memory areas.
|
||
*/
|
||
|
||
// FIXME? extend the range of memory tested!!
|
||
for (ii = 0; ii < II_MAX; ii += II_INC) {
|
||
// FIXME: rearranged, did not make much difference?
|
||
for (i = 0; i < I_MAX; i += I_INC) {
|
||
for (k = 0; k < K_MAX; k += K_INC) {
|
||
for (j = 0; j < J_MAX; j += J_INC) {
|
||
p1 = p + ii + k + j;
|
||
p2 = p1 + p2offset;
|
||
|
||
v = cvmx_read64_uint64(p1 +
|
||
i) ^
|
||
this_pattern;
|
||
v1 = cvmx_read64_uint64(p2 +
|
||
i) ^
|
||
this_pattern;
|
||
|
||
cvmx_write64_uint64(p1 + i, v);
|
||
cvmx_write64_uint64(p2 + i, v1);
|
||
|
||
CVMX_CACHE_WBIL2(p1, 0);
|
||
CVMX_CACHE_WBIL2(p2, 0);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
CVMX_DCACHE_INVALIDATE;
|
||
|
||
debug("N%d.LMC%d: dram_tuning_mem_xor: done MODIFY loop\n",
|
||
node, lmc);
|
||
|
||
/*
|
||
* Look for differences in the areas. If there is a mismatch,
|
||
* reset both memory locations with the same pattern. Failing
|
||
* to do so means that on all subsequent passes the pair of
|
||
* locations remain out of sync giving spurious errors.
|
||
*/
|
||
|
||
// FIXME: Change the loop order so that an entire cache line
|
||
// is compared at one time. This is so that a read
|
||
// error that occurs *anywhere* on the cacheline will
|
||
// be caught, rather than comparing only 1 cacheline
|
||
// slot at a time, where an error on a different
|
||
// slot will be missed that time around
|
||
// Does the above make sense?
|
||
|
||
// FIXME? extend the range of memory tested!!
|
||
for (ii = 0; ii < II_MAX; ii += II_INC) {
|
||
for (k = 0; k < K_MAX; k += K_INC) {
|
||
for (j = 0; j < J_MAX; j += J_INC) {
|
||
p1 = p + ii + k + j;
|
||
p2 = p1 + p2offset;
|
||
|
||
// process entire cachelines in the
|
||
//innermost loop
|
||
for (i = 0; i < I_MAX; i += I_INC) {
|
||
int bybit = 1;
|
||
// start in byte lane 0
|
||
u64 bymsk = 0xffULL;
|
||
|
||
// FIXME: this should predict
|
||
// what we find...???
|
||
v = ((p1 + i) * pattern1) ^
|
||
pattern2;
|
||
d1 = cvmx_read64_uint64(p1 + i);
|
||
d2 = cvmx_read64_uint64(p2 + i);
|
||
|
||
// union of error bits only in
|
||
// active byte lanes
|
||
xor = ((d1 ^ v) | (d2 ^ v)) &
|
||
datamask;
|
||
|
||
if (!xor)
|
||
continue;
|
||
|
||
// accumulate bad bits
|
||
bad_bits[0] |= xor;
|
||
|
||
while (xor != 0) {
|
||
debug("ERROR(%03d): [0x%016llX] [0x%016llX] expected 0x%016llX d1 %016llX d2 %016llX\n",
|
||
burst, p1, p2, v,
|
||
d1, d2);
|
||
// error(s) in this lane
|
||
if (xor & bymsk) {
|
||
// set the byte
|
||
// error bit
|
||
errors |= bybit;
|
||
// clear byte
|
||
// lane in
|
||
// error bits
|
||
xor &= ~bymsk;
|
||
// clear the
|
||
// byte lane in
|
||
// the mask
|
||
datamask &= ~bymsk;
|
||
#if EXIT_WHEN_ALL_LANES_HAVE_ERRORS
|
||
// nothing
|
||
// left to do
|
||
if (datamask == 0) {
|
||
return errors;
|
||
}
|
||
#endif /* EXIT_WHEN_ALL_LANES_HAVE_ERRORS */
|
||
}
|
||
// move mask into
|
||
// next byte lane
|
||
bymsk <<= 8;
|
||
// move bit into next
|
||
// byte position
|
||
bybit <<= 1;
|
||
}
|
||
}
|
||
CVMX_CACHE_WBIL2(p1, 0);
|
||
CVMX_CACHE_WBIL2(p2, 0);
|
||
}
|
||
}
|
||
}
|
||
|
||
debug("N%d.LMC%d: dram_tuning_mem_xor: done TEST loop\n",
|
||
node, lmc);
|
||
}
|
||
|
||
if (xor_data) { // send the bad bits back...
|
||
xor_data[0] = bad_bits[0];
|
||
xor_data[1] = bad_bits[1]; // let it be zeroed
|
||
}
|
||
|
||
// Restore original setting that could enable partial cacheline writes
|
||
l2c_ctl.u64 = l2c_rd(priv, CVMX_L2C_CTL);
|
||
l2c_ctl.cn78xx.dissblkdty = saved_dissblkdty;
|
||
l2c_wr(priv, CVMX_L2C_CTL, l2c_ctl.u64);
|
||
|
||
return errors;
|
||
}
|
||
|
||
static void ddr4_mrw(struct ddr_priv *priv, int if_num, int rank,
|
||
int mr_wr_addr, int mr_wr_sel, int mr_wr_bg1)
|
||
{
|
||
union cvmx_lmcx_mr_mpr_ctl lmc_mr_mpr_ctl;
|
||
|
||
lmc_mr_mpr_ctl.u64 = 0;
|
||
lmc_mr_mpr_ctl.cn78xx.mr_wr_addr = (mr_wr_addr == -1) ? 0 : mr_wr_addr;
|
||
lmc_mr_mpr_ctl.cn78xx.mr_wr_sel = mr_wr_sel;
|
||
lmc_mr_mpr_ctl.cn78xx.mr_wr_rank = rank;
|
||
lmc_mr_mpr_ctl.cn78xx.mr_wr_use_default_value =
|
||
(mr_wr_addr == -1) ? 1 : 0;
|
||
lmc_mr_mpr_ctl.cn78xx.mr_wr_bg1 = mr_wr_bg1;
|
||
lmc_wr(priv, CVMX_LMCX_MR_MPR_CTL(if_num), lmc_mr_mpr_ctl.u64);
|
||
|
||
/* Mode Register Write */
|
||
oct3_ddr3_seq(priv, 1 << rank, if_num, 0x8);
|
||
}
|
||
|
||
#define INV_A0_17(x) ((x) ^ 0x22bf8)
|
||
|
||
static void set_mpr_mode(struct ddr_priv *priv, int rank_mask,
|
||
int if_num, int dimm_count, int mpr, int bg1)
|
||
{
|
||
int rankx;
|
||
|
||
debug("All Ranks: Set mpr mode = %x %c-side\n",
|
||
mpr, (bg1 == 0) ? 'A' : 'B');
|
||
|
||
for (rankx = 0; rankx < dimm_count * 4; rankx++) {
|
||
if (!(rank_mask & (1 << rankx)))
|
||
continue;
|
||
if (bg1 == 0) {
|
||
/* MR3 A-side */
|
||
ddr4_mrw(priv, if_num, rankx, mpr << 2, 3, bg1);
|
||
} else {
|
||
/* MR3 B-side */
|
||
ddr4_mrw(priv, if_num, rankx, INV_A0_17(mpr << 2), ~3,
|
||
bg1);
|
||
}
|
||
}
|
||
}
|
||
|
||
static void do_ddr4_mpr_read(struct ddr_priv *priv, int if_num,
|
||
int rank, int page, int location)
|
||
{
|
||
union cvmx_lmcx_mr_mpr_ctl lmc_mr_mpr_ctl;
|
||
|
||
lmc_mr_mpr_ctl.u64 = lmc_rd(priv, CVMX_LMCX_MR_MPR_CTL(if_num));
|
||
lmc_mr_mpr_ctl.cn70xx.mr_wr_addr = 0;
|
||
lmc_mr_mpr_ctl.cn70xx.mr_wr_sel = page; /* Page */
|
||
lmc_mr_mpr_ctl.cn70xx.mr_wr_rank = rank;
|
||
lmc_mr_mpr_ctl.cn70xx.mpr_loc = location;
|
||
lmc_mr_mpr_ctl.cn70xx.mpr_wr = 0; /* Read=0, Write=1 */
|
||
lmc_wr(priv, CVMX_LMCX_MR_MPR_CTL(if_num), lmc_mr_mpr_ctl.u64);
|
||
|
||
/* MPR register access sequence */
|
||
oct3_ddr3_seq(priv, 1 << rank, if_num, 0x9);
|
||
|
||
debug("LMC_MR_MPR_CTL : 0x%016llx\n",
|
||
lmc_mr_mpr_ctl.u64);
|
||
debug("lmc_mr_mpr_ctl.cn70xx.mr_wr_addr: 0x%02x\n",
|
||
lmc_mr_mpr_ctl.cn70xx.mr_wr_addr);
|
||
debug("lmc_mr_mpr_ctl.cn70xx.mr_wr_sel : 0x%02x\n",
|
||
lmc_mr_mpr_ctl.cn70xx.mr_wr_sel);
|
||
debug("lmc_mr_mpr_ctl.cn70xx.mpr_loc : 0x%02x\n",
|
||
lmc_mr_mpr_ctl.cn70xx.mpr_loc);
|
||
debug("lmc_mr_mpr_ctl.cn70xx.mpr_wr : 0x%02x\n",
|
||
lmc_mr_mpr_ctl.cn70xx.mpr_wr);
|
||
}
|
||
|
||
static int set_rdimm_mode(struct ddr_priv *priv, int if_num, int enable)
|
||
{
|
||
union cvmx_lmcx_control lmc_control;
|
||
int save_rdimm_mode;
|
||
|
||
lmc_control.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
|
||
save_rdimm_mode = lmc_control.s.rdimm_ena;
|
||
lmc_control.s.rdimm_ena = enable;
|
||
debug("Setting RDIMM_ENA = %x\n", enable);
|
||
lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), lmc_control.u64);
|
||
|
||
return save_rdimm_mode;
|
||
}
|
||
|
||
static void ddr4_mpr_read(struct ddr_priv *priv, int if_num, int rank,
|
||
int page, int location, u64 *mpr_data)
|
||
{
|
||
do_ddr4_mpr_read(priv, if_num, rank, page, location);
|
||
|
||
mpr_data[0] = lmc_rd(priv, CVMX_LMCX_MPR_DATA0(if_num));
|
||
}
|
||
|
||
/* Display MPR values for Page */
|
||
static void display_mpr_page(struct ddr_priv *priv, int rank_mask,
|
||
int if_num, int page)
|
||
{
|
||
int rankx, location;
|
||
u64 mpr_data[3];
|
||
|
||
for (rankx = 0; rankx < 4; rankx++) {
|
||
if (!(rank_mask & (1 << rankx)))
|
||
continue;
|
||
|
||
debug("N0.LMC%d.R%d: MPR Page %d loc [0:3]: ",
|
||
if_num, rankx, page);
|
||
for (location = 0; location < 4; location++) {
|
||
ddr4_mpr_read(priv, if_num, rankx, page, location,
|
||
mpr_data);
|
||
debug("0x%02llx ", mpr_data[0] & 0xFF);
|
||
}
|
||
debug("\n");
|
||
|
||
} /* for (rankx = 0; rankx < 4; rankx++) */
|
||
}
|
||
|
||
static void ddr4_mpr_write(struct ddr_priv *priv, int if_num, int rank,
|
||
int page, int location, u8 mpr_data)
|
||
{
|
||
union cvmx_lmcx_mr_mpr_ctl lmc_mr_mpr_ctl;
|
||
|
||
lmc_mr_mpr_ctl.u64 = 0;
|
||
lmc_mr_mpr_ctl.cn70xx.mr_wr_addr = mpr_data;
|
||
lmc_mr_mpr_ctl.cn70xx.mr_wr_sel = page; /* Page */
|
||
lmc_mr_mpr_ctl.cn70xx.mr_wr_rank = rank;
|
||
lmc_mr_mpr_ctl.cn70xx.mpr_loc = location;
|
||
lmc_mr_mpr_ctl.cn70xx.mpr_wr = 1; /* Read=0, Write=1 */
|
||
lmc_wr(priv, CVMX_LMCX_MR_MPR_CTL(if_num), lmc_mr_mpr_ctl.u64);
|
||
|
||
/* MPR register access sequence */
|
||
oct3_ddr3_seq(priv, 1 << rank, if_num, 0x9);
|
||
|
||
debug("LMC_MR_MPR_CTL : 0x%016llx\n",
|
||
lmc_mr_mpr_ctl.u64);
|
||
debug("lmc_mr_mpr_ctl.cn70xx.mr_wr_addr: 0x%02x\n",
|
||
lmc_mr_mpr_ctl.cn70xx.mr_wr_addr);
|
||
debug("lmc_mr_mpr_ctl.cn70xx.mr_wr_sel : 0x%02x\n",
|
||
lmc_mr_mpr_ctl.cn70xx.mr_wr_sel);
|
||
debug("lmc_mr_mpr_ctl.cn70xx.mpr_loc : 0x%02x\n",
|
||
lmc_mr_mpr_ctl.cn70xx.mpr_loc);
|
||
debug("lmc_mr_mpr_ctl.cn70xx.mpr_wr : 0x%02x\n",
|
||
lmc_mr_mpr_ctl.cn70xx.mpr_wr);
|
||
}
|
||
|
||
static void set_vref(struct ddr_priv *priv, int if_num, int rank,
|
||
int range, int value)
|
||
{
|
||
union cvmx_lmcx_mr_mpr_ctl lmc_mr_mpr_ctl;
|
||
union cvmx_lmcx_modereg_params3 lmc_modereg_params3;
|
||
int mr_wr_addr = 0;
|
||
|
||
lmc_mr_mpr_ctl.u64 = 0;
|
||
lmc_modereg_params3.u64 = lmc_rd(priv,
|
||
CVMX_LMCX_MODEREG_PARAMS3(if_num));
|
||
|
||
/* A12:A10 tCCD_L */
|
||
mr_wr_addr |= lmc_modereg_params3.s.tccd_l << 10;
|
||
mr_wr_addr |= 1 << 7; /* A7 1 = Enable(Training Mode) */
|
||
mr_wr_addr |= range << 6; /* A6 vrefDQ Training Range */
|
||
mr_wr_addr |= value << 0; /* A5:A0 vrefDQ Training Value */
|
||
|
||
lmc_mr_mpr_ctl.cn70xx.mr_wr_addr = mr_wr_addr;
|
||
lmc_mr_mpr_ctl.cn70xx.mr_wr_sel = 6; /* Write MR6 */
|
||
lmc_mr_mpr_ctl.cn70xx.mr_wr_rank = rank;
|
||
lmc_wr(priv, CVMX_LMCX_MR_MPR_CTL(if_num), lmc_mr_mpr_ctl.u64);
|
||
|
||
/* 0x8 = Mode Register Write */
|
||
oct3_ddr3_seq(priv, 1 << rank, if_num, 0x8);
|
||
|
||
/*
|
||
* It is vendor specific whether vref_value is captured with A7=1.
|
||
* A subsequent MRS might be necessary.
|
||
*/
|
||
oct3_ddr3_seq(priv, 1 << rank, if_num, 0x8);
|
||
|
||
mr_wr_addr &= ~(1 << 7); /* A7 0 = Disable(Training Mode) */
|
||
lmc_mr_mpr_ctl.cn70xx.mr_wr_addr = mr_wr_addr;
|
||
lmc_wr(priv, CVMX_LMCX_MR_MPR_CTL(if_num), lmc_mr_mpr_ctl.u64);
|
||
}
|
||
|
||
static void set_dram_output_inversion(struct ddr_priv *priv, int if_num,
|
||
int dimm_count, int rank_mask,
|
||
int inversion)
|
||
{
|
||
union cvmx_lmcx_ddr4_dimm_ctl lmc_ddr4_dimm_ctl;
|
||
union cvmx_lmcx_dimmx_params lmc_dimmx_params;
|
||
union cvmx_lmcx_dimm_ctl lmc_dimm_ctl;
|
||
int dimm_no;
|
||
|
||
/* Don't touch extenced register control words */
|
||
lmc_ddr4_dimm_ctl.u64 = 0;
|
||
lmc_wr(priv, CVMX_LMCX_DDR4_DIMM_CTL(if_num), lmc_ddr4_dimm_ctl.u64);
|
||
|
||
debug("All DIMMs: Register Control Word RC0 : %x\n",
|
||
(inversion & 1));
|
||
|
||
for (dimm_no = 0; dimm_no < dimm_count; ++dimm_no) {
|
||
lmc_dimmx_params.u64 =
|
||
lmc_rd(priv, CVMX_LMCX_DIMMX_PARAMS(dimm_no, if_num));
|
||
lmc_dimmx_params.s.rc0 =
|
||
(lmc_dimmx_params.s.rc0 & ~1) | (inversion & 1);
|
||
|
||
lmc_wr(priv,
|
||
CVMX_LMCX_DIMMX_PARAMS(dimm_no, if_num),
|
||
lmc_dimmx_params.u64);
|
||
}
|
||
|
||
/* LMC0_DIMM_CTL */
|
||
lmc_dimm_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DIMM_CTL(if_num));
|
||
lmc_dimm_ctl.s.dimm0_wmask = 0x1;
|
||
lmc_dimm_ctl.s.dimm1_wmask = (dimm_count > 1) ? 0x0001 : 0x0000;
|
||
|
||
debug("LMC DIMM_CTL : 0x%016llx\n",
|
||
lmc_dimm_ctl.u64);
|
||
lmc_wr(priv, CVMX_LMCX_DIMM_CTL(if_num), lmc_dimm_ctl.u64);
|
||
|
||
oct3_ddr3_seq(priv, rank_mask, if_num, 0x7); /* Init RCW */
|
||
}
|
||
|
||
static void write_mpr_page0_pattern(struct ddr_priv *priv, int rank_mask,
|
||
int if_num, int dimm_count, int pattern,
|
||
int location_mask)
|
||
{
|
||
int rankx;
|
||
int location;
|
||
|
||
for (rankx = 0; rankx < dimm_count * 4; rankx++) {
|
||
if (!(rank_mask & (1 << rankx)))
|
||
continue;
|
||
for (location = 0; location < 4; ++location) {
|
||
if (!(location_mask & (1 << location)))
|
||
continue;
|
||
|
||
ddr4_mpr_write(priv, if_num, rankx,
|
||
/* page */ 0, /* location */ location,
|
||
pattern);
|
||
}
|
||
}
|
||
}
|
||
|
||
static void change_rdimm_mpr_pattern(struct ddr_priv *priv, int rank_mask,
|
||
int if_num, int dimm_count)
|
||
{
|
||
int save_ref_zqcs_int;
|
||
union cvmx_lmcx_config lmc_config;
|
||
|
||
/*
|
||
* Okay, here is the latest sequence. This should work for all
|
||
* chips and passes (78,88,73,etc). This sequence should be run
|
||
* immediately after DRAM INIT. The basic idea is to write the
|
||
* same pattern into each of the 4 MPR locations in the DRAM, so
|
||
* that the same value is returned when doing MPR reads regardless
|
||
* of the inversion state. My advice is to put this into a
|
||
* function, change_rdimm_mpr_pattern or something like that, so
|
||
* that it can be called multiple times, as I think David wants a
|
||
* clock-like pattern for OFFSET training, but does not want a
|
||
* clock pattern for Bit-Deskew. You should then be able to call
|
||
* this at any point in the init sequence (after DRAM init) to
|
||
* change the pattern to a new value.
|
||
* Mike
|
||
*
|
||
* A correction: PHY doesn't need any pattern during offset
|
||
* training, but needs clock like pattern for internal vref and
|
||
* bit-dskew training. So for that reason, these steps below have
|
||
* to be conducted before those trainings to pre-condition
|
||
* the pattern. David
|
||
*
|
||
* Note: Step 3, 4, 8 and 9 have to be done through RDIMM
|
||
* sequence. If you issue MRW sequence to do RCW write (in o78 pass
|
||
* 1 at least), LMC will still do two commands because
|
||
* CONTROL[RDIMM_ENA] is still set high. We don't want it to have
|
||
* any unintentional mode register write so it's best to do what
|
||
* Mike is doing here.
|
||
* Andrew
|
||
*/
|
||
|
||
/* 1) Disable refresh (REF_ZQCS_INT = 0) */
|
||
|
||
debug("1) Disable refresh (REF_ZQCS_INT = 0)\n");
|
||
|
||
lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
|
||
save_ref_zqcs_int = lmc_config.cn78xx.ref_zqcs_int;
|
||
lmc_config.cn78xx.ref_zqcs_int = 0;
|
||
lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), lmc_config.u64);
|
||
|
||
/*
|
||
* 2) Put all devices in MPR mode (Run MRW sequence (sequence=8)
|
||
* with MODEREG_PARAMS0[MPRLOC]=0,
|
||
* MODEREG_PARAMS0[MPR]=1, MR_MPR_CTL[MR_WR_SEL]=3, and
|
||
* MR_MPR_CTL[MR_WR_USE_DEFAULT_VALUE]=1)
|
||
*/
|
||
|
||
debug("2) Put all devices in MPR mode (Run MRW sequence (sequence=8)\n");
|
||
|
||
/* A-side */
|
||
set_mpr_mode(priv, rank_mask, if_num, dimm_count, 1, 0);
|
||
/* B-side */
|
||
set_mpr_mode(priv, rank_mask, if_num, dimm_count, 1, 1);
|
||
|
||
/*
|
||
* a. Or you can set MR_MPR_CTL[MR_WR_USE_DEFAULT_VALUE]=0 and set
|
||
* the value you would like directly into
|
||
* MR_MPR_CTL[MR_WR_ADDR]
|
||
*/
|
||
|
||
/*
|
||
* 3) Disable RCD Parity (if previously enabled) - parity does not
|
||
* work if inversion disabled
|
||
*/
|
||
|
||
debug("3) Disable RCD Parity\n");
|
||
|
||
/*
|
||
* 4) Disable Inversion in the RCD.
|
||
* a. I did (3&4) via the RDIMM sequence (seq_sel=7), but it
|
||
* may be easier to use the MRW sequence (seq_sel=8). Just set
|
||
* MR_MPR_CTL[MR_WR_SEL]=7, MR_MPR_CTL[MR_WR_ADDR][3:0]=data,
|
||
* MR_MPR_CTL[MR_WR_ADDR][7:4]=RCD reg
|
||
*/
|
||
|
||
debug("4) Disable Inversion in the RCD.\n");
|
||
|
||
set_dram_output_inversion(priv, if_num, dimm_count, rank_mask, 1);
|
||
|
||
/*
|
||
* 5) Disable CONTROL[RDIMM_ENA] so that MR sequence goes out
|
||
* non-inverted.
|
||
*/
|
||
|
||
debug("5) Disable CONTROL[RDIMM_ENA]\n");
|
||
|
||
set_rdimm_mode(priv, if_num, 0);
|
||
|
||
/*
|
||
* 6) Write all 4 MPR registers with the desired pattern (have to
|
||
* do this for all enabled ranks)
|
||
* a. MR_MPR_CTL.MPR_WR=1, MR_MPR_CTL.MPR_LOC=0..3,
|
||
* MR_MPR_CTL.MR_WR_SEL=0, MR_MPR_CTL.MR_WR_ADDR[7:0]=pattern
|
||
*/
|
||
|
||
debug("6) Write all 4 MPR page 0 Training Patterns\n");
|
||
|
||
write_mpr_page0_pattern(priv, rank_mask, if_num, dimm_count, 0x55, 0x8);
|
||
|
||
/* 7) Re-enable RDIMM_ENA */
|
||
|
||
debug("7) Re-enable RDIMM_ENA\n");
|
||
|
||
set_rdimm_mode(priv, if_num, 1);
|
||
|
||
/* 8) Re-enable RDIMM inversion */
|
||
|
||
debug("8) Re-enable RDIMM inversion\n");
|
||
|
||
set_dram_output_inversion(priv, if_num, dimm_count, rank_mask, 0);
|
||
|
||
/* 9) Re-enable RDIMM parity (if desired) */
|
||
|
||
debug("9) Re-enable RDIMM parity (if desired)\n");
|
||
|
||
/*
|
||
* 10)Take B-side devices out of MPR mode (Run MRW sequence
|
||
* (sequence=8) with MODEREG_PARAMS0[MPRLOC]=0,
|
||
* MODEREG_PARAMS0[MPR]=0, MR_MPR_CTL[MR_WR_SEL]=3, and
|
||
* MR_MPR_CTL[MR_WR_USE_DEFAULT_VALUE]=1)
|
||
*/
|
||
|
||
debug("10)Take B-side devices out of MPR mode\n");
|
||
|
||
set_mpr_mode(priv, rank_mask, if_num, dimm_count,
|
||
/* mpr */ 0, /* bg1 */ 1);
|
||
|
||
/*
|
||
* a. Or you can set MR_MPR_CTL[MR_WR_USE_DEFAULT_VALUE]=0 and
|
||
* set the value you would like directly into MR_MPR_CTL[MR_WR_ADDR]
|
||
*/
|
||
|
||
/* 11)Re-enable refresh (REF_ZQCS_INT=previous value) */
|
||
|
||
debug("11)Re-enable refresh (REF_ZQCS_INT=previous value)\n");
|
||
|
||
lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
|
||
lmc_config.cn78xx.ref_zqcs_int = save_ref_zqcs_int;
|
||
lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), lmc_config.u64);
|
||
}
|
||
|
||
static int validate_hwl_seq(int *wl, int *seq)
|
||
{
|
||
// sequence index, step through the sequence array
|
||
int seqx;
|
||
int bitnum;
|
||
|
||
seqx = 0;
|
||
|
||
while (seq[seqx + 1] >= 0) { // stop on next seq entry == -1
|
||
// but now, check current versus next
|
||
bitnum = (wl[seq[seqx]] << 2) | wl[seq[seqx + 1]];
|
||
// magic validity number (see matrix above)
|
||
if (!((1 << bitnum) & 0xBDE7))
|
||
return 1;
|
||
seqx++;
|
||
}
|
||
|
||
return 0;
|
||
}
|
||
|
||
static int validate_hw_wl_settings(int if_num,
|
||
union cvmx_lmcx_wlevel_rankx
|
||
*lmc_wlevel_rank, int is_rdimm, int ecc_ena)
|
||
{
|
||
int wl[9], byte, errors;
|
||
|
||
// arrange the sequences so
|
||
// index 0 has byte 0, etc, ECC in middle
|
||
int useq[] = { 0, 1, 2, 3, 8, 4, 5, 6, 7, -1 };
|
||
// index 0 is ECC, then go down
|
||
int rseq1[] = { 8, 3, 2, 1, 0, -1 };
|
||
// index 0 has byte 4, then go up
|
||
int rseq2[] = { 4, 5, 6, 7, -1 };
|
||
// index 0 has byte 0, etc, no ECC
|
||
int useqno[] = { 0, 1, 2, 3, 4, 5, 6, 7, -1 };
|
||
// index 0 is byte 3, then go down, no ECC
|
||
int rseq1no[] = { 3, 2, 1, 0, -1 };
|
||
|
||
// in the CSR, bytes 0-7 are always data, byte 8 is ECC
|
||
for (byte = 0; byte < (8 + ecc_ena); byte++) {
|
||
// preprocess :-)
|
||
wl[byte] = (get_wl_rank(lmc_wlevel_rank, byte) >>
|
||
1) & 3;
|
||
}
|
||
|
||
errors = 0;
|
||
if (is_rdimm) { // RDIMM order
|
||
errors = validate_hwl_seq(wl, (ecc_ena) ? rseq1 : rseq1no);
|
||
errors += validate_hwl_seq(wl, rseq2);
|
||
} else { // UDIMM order
|
||
errors = validate_hwl_seq(wl, (ecc_ena) ? useq : useqno);
|
||
}
|
||
|
||
return errors;
|
||
}
|
||
|
||
static unsigned int extr_wr(u64 u, int x)
|
||
{
|
||
return (unsigned int)(((u >> (x * 12 + 5)) & 0x3ULL) |
|
||
((u >> (51 + x - 2)) & 0x4ULL));
|
||
}
|
||
|
||
static void insrt_wr(u64 *up, int x, int v)
|
||
{
|
||
u64 u = *up;
|
||
|
||
u &= ~(((0x3ULL) << (x * 12 + 5)) | ((0x1ULL) << (51 + x)));
|
||
*up = (u | ((v & 0x3ULL) << (x * 12 + 5)) |
|
||
((v & 0x4ULL) << (51 + x - 2)));
|
||
}
|
||
|
||
/* Read out Deskew Settings for DDR */
|
||
|
||
struct deskew_bytes {
|
||
u16 bits[8];
|
||
};
|
||
|
||
struct deskew_data {
|
||
struct deskew_bytes bytes[9];
|
||
};
|
||
|
||
struct dac_data {
|
||
int bytes[9];
|
||
};
|
||
|
||
// T88 pass 1, skip 4=DAC
|
||
static const u8 dsk_bit_seq_p1[8] = { 0, 1, 2, 3, 5, 6, 7, 8 };
|
||
// T88 Pass 2, skip 4=DAC and 5=DBI
|
||
static const u8 dsk_bit_seq_p2[8] = { 0, 1, 2, 3, 6, 7, 8, 9 };
|
||
|
||
static void get_deskew_settings(struct ddr_priv *priv, int if_num,
|
||
struct deskew_data *dskdat)
|
||
{
|
||
union cvmx_lmcx_phy_ctl phy_ctl;
|
||
union cvmx_lmcx_config lmc_config;
|
||
int bit_index;
|
||
int byte_lane, byte_limit;
|
||
// NOTE: these are for pass 2.x
|
||
int is_o78p2 = !octeon_is_cpuid(OCTEON_CN78XX_PASS1_X);
|
||
const u8 *bit_seq = (is_o78p2) ? dsk_bit_seq_p2 : dsk_bit_seq_p1;
|
||
|
||
lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
|
||
byte_limit = ((!lmc_config.s.mode32b) ? 8 : 4) + lmc_config.s.ecc_ena;
|
||
|
||
memset(dskdat, 0, sizeof(*dskdat));
|
||
|
||
phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
|
||
phy_ctl.s.dsk_dbg_clk_scaler = 3;
|
||
|
||
for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
|
||
phy_ctl.s.dsk_dbg_byte_sel = byte_lane; // set byte lane
|
||
|
||
for (bit_index = 0; bit_index < 8; ++bit_index) {
|
||
// set bit number and start read sequence
|
||
phy_ctl.s.dsk_dbg_bit_sel = bit_seq[bit_index];
|
||
phy_ctl.s.dsk_dbg_rd_start = 1;
|
||
lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
|
||
|
||
// poll for read sequence to complete
|
||
do {
|
||
phy_ctl.u64 =
|
||
lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
|
||
} while (phy_ctl.s.dsk_dbg_rd_complete != 1);
|
||
|
||
// record the data
|
||
dskdat->bytes[byte_lane].bits[bit_index] =
|
||
phy_ctl.s.dsk_dbg_rd_data & 0x3ff;
|
||
}
|
||
}
|
||
}
|
||
|
||
static void display_deskew_settings(struct ddr_priv *priv, int if_num,
|
||
struct deskew_data *dskdat,
|
||
int print_enable)
|
||
{
|
||
int byte_lane;
|
||
int bit_num;
|
||
u16 flags, deskew;
|
||
union cvmx_lmcx_config lmc_config;
|
||
int byte_limit;
|
||
const char *fc = " ?-=+*#&";
|
||
|
||
lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
|
||
byte_limit = ((lmc_config.s.mode32b) ? 4 : 8) + lmc_config.s.ecc_ena;
|
||
|
||
if (print_enable) {
|
||
debug("N0.LMC%d: Deskew Data: Bit => :",
|
||
if_num);
|
||
for (bit_num = 7; bit_num >= 0; --bit_num)
|
||
debug(" %3d ", bit_num);
|
||
debug("\n");
|
||
}
|
||
|
||
for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
|
||
if (print_enable)
|
||
debug("N0.LMC%d: Bit Deskew Byte %d %s :",
|
||
if_num, byte_lane,
|
||
(print_enable >= 3) ? "FINAL" : " ");
|
||
|
||
for (bit_num = 7; bit_num >= 0; --bit_num) {
|
||
flags = dskdat->bytes[byte_lane].bits[bit_num] & 7;
|
||
deskew = dskdat->bytes[byte_lane].bits[bit_num] >> 3;
|
||
|
||
if (print_enable)
|
||
debug(" %3d %c", deskew, fc[flags ^ 1]);
|
||
|
||
} /* for (bit_num = 7; bit_num >= 0; --bit_num) */
|
||
|
||
if (print_enable)
|
||
debug("\n");
|
||
}
|
||
}
|
||
|
||
static void override_deskew_settings(struct ddr_priv *priv, int if_num,
|
||
struct deskew_data *dskdat)
|
||
{
|
||
union cvmx_lmcx_phy_ctl phy_ctl;
|
||
union cvmx_lmcx_config lmc_config;
|
||
|
||
int bit, byte_lane, byte_limit;
|
||
u64 csr_data;
|
||
|
||
lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
|
||
byte_limit = ((lmc_config.s.mode32b) ? 4 : 8) + lmc_config.s.ecc_ena;
|
||
|
||
phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
|
||
|
||
phy_ctl.s.phy_reset = 0;
|
||
phy_ctl.s.dsk_dbg_num_bits_sel = 1;
|
||
phy_ctl.s.dsk_dbg_offset = 0;
|
||
phy_ctl.s.dsk_dbg_clk_scaler = 3;
|
||
|
||
phy_ctl.s.dsk_dbg_wr_mode = 1;
|
||
phy_ctl.s.dsk_dbg_load_dis = 0;
|
||
phy_ctl.s.dsk_dbg_overwrt_ena = 0;
|
||
|
||
phy_ctl.s.phy_dsk_reset = 0;
|
||
|
||
lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
|
||
lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
|
||
|
||
for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
|
||
csr_data = 0;
|
||
// FIXME: can we ignore DBI?
|
||
for (bit = 0; bit < 8; ++bit) {
|
||
// fetch input and adjust
|
||
u64 bits = (dskdat->bytes[byte_lane].bits[bit] >> 3) &
|
||
0x7F;
|
||
|
||
/*
|
||
* lmc_general_purpose0.data[6:0] // DQ0
|
||
* lmc_general_purpose0.data[13:7] // DQ1
|
||
* lmc_general_purpose0.data[20:14] // DQ2
|
||
* lmc_general_purpose0.data[27:21] // DQ3
|
||
* lmc_general_purpose0.data[34:28] // DQ4
|
||
* lmc_general_purpose0.data[41:35] // DQ5
|
||
* lmc_general_purpose0.data[48:42] // DQ6
|
||
* lmc_general_purpose0.data[55:49] // DQ7
|
||
* lmc_general_purpose0.data[62:56] // DBI
|
||
*/
|
||
csr_data |= (bits << (7 * bit));
|
||
|
||
} /* for (bit = 0; bit < 8; ++bit) */
|
||
|
||
// update GP0 with the bit data for this byte lane
|
||
lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE0(if_num), csr_data);
|
||
lmc_rd(priv, CVMX_LMCX_GENERAL_PURPOSE0(if_num));
|
||
|
||
// start the deskew load sequence
|
||
phy_ctl.s.dsk_dbg_byte_sel = byte_lane;
|
||
phy_ctl.s.dsk_dbg_rd_start = 1;
|
||
lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
|
||
|
||
// poll for read sequence to complete
|
||
do {
|
||
udelay(100);
|
||
phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
|
||
} while (phy_ctl.s.dsk_dbg_rd_complete != 1);
|
||
}
|
||
|
||
// tell phy to use the new settings
|
||
phy_ctl.s.dsk_dbg_overwrt_ena = 1;
|
||
phy_ctl.s.dsk_dbg_rd_start = 0;
|
||
lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
|
||
|
||
phy_ctl.s.dsk_dbg_wr_mode = 0;
|
||
lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
|
||
}
|
||
|
||
static void process_by_rank_dac(struct ddr_priv *priv, int if_num,
|
||
int rank_mask, struct dac_data *dacdat)
|
||
{
|
||
union cvmx_lmcx_config lmc_config;
|
||
int rankx, byte_lane;
|
||
int byte_limit;
|
||
int rank_count;
|
||
struct dac_data dacsum;
|
||
int lane_probs;
|
||
|
||
lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
|
||
byte_limit = ((lmc_config.s.mode32b) ? 4 : 8) + lmc_config.s.ecc_ena;
|
||
|
||
memset((void *)&dacsum, 0, sizeof(dacsum));
|
||
rank_count = 0;
|
||
lane_probs = 0;
|
||
|
||
for (rankx = 0; rankx < 4; rankx++) {
|
||
if (!(rank_mask & (1 << rankx)))
|
||
continue;
|
||
rank_count++;
|
||
|
||
display_dac_dbi_settings(if_num, /*dac */ 1,
|
||
lmc_config.s.ecc_ena,
|
||
&dacdat[rankx].bytes[0],
|
||
"By-Ranks VREF");
|
||
// sum
|
||
for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
|
||
if (rank_count == 2) {
|
||
int ranks_diff =
|
||
abs((dacsum.bytes[byte_lane] -
|
||
dacdat[rankx].bytes[byte_lane]));
|
||
|
||
// FIXME: is 19 a good number?
|
||
if (ranks_diff > 19)
|
||
lane_probs |= (1 << byte_lane);
|
||
}
|
||
dacsum.bytes[byte_lane] +=
|
||
dacdat[rankx].bytes[byte_lane];
|
||
}
|
||
}
|
||
|
||
// average
|
||
for (byte_lane = 0; byte_lane < byte_limit; byte_lane++)
|
||
dacsum.bytes[byte_lane] /= rank_count; // FIXME: nint?
|
||
|
||
display_dac_dbi_settings(if_num, /*dac */ 1, lmc_config.s.ecc_ena,
|
||
&dacsum.bytes[0], "All-Rank VREF");
|
||
|
||
if (lane_probs) {
|
||
debug("N0.LMC%d: All-Rank VREF DAC Problem Bytelane(s): 0x%03x\n",
|
||
if_num, lane_probs);
|
||
}
|
||
|
||
// finally, write the averaged DAC values
|
||
for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
|
||
load_dac_override(priv, if_num, dacsum.bytes[byte_lane],
|
||
byte_lane);
|
||
}
|
||
}
|
||
|
||
static void process_by_rank_dsk(struct ddr_priv *priv, int if_num,
|
||
int rank_mask, struct deskew_data *dskdat)
|
||
{
|
||
union cvmx_lmcx_config lmc_config;
|
||
int rankx, lane, bit;
|
||
int byte_limit;
|
||
struct deskew_data dsksum, dskcnt;
|
||
u16 deskew;
|
||
|
||
lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
|
||
byte_limit = ((lmc_config.s.mode32b) ? 4 : 8) + lmc_config.s.ecc_ena;
|
||
|
||
memset((void *)&dsksum, 0, sizeof(dsksum));
|
||
memset((void *)&dskcnt, 0, sizeof(dskcnt));
|
||
|
||
for (rankx = 0; rankx < 4; rankx++) {
|
||
if (!(rank_mask & (1 << rankx)))
|
||
continue;
|
||
|
||
// sum ranks
|
||
for (lane = 0; lane < byte_limit; lane++) {
|
||
for (bit = 0; bit < 8; ++bit) {
|
||
deskew = dskdat[rankx].bytes[lane].bits[bit];
|
||
// if flags indicate sat hi or lo, skip it
|
||
if (deskew & 6)
|
||
continue;
|
||
|
||
// clear flags
|
||
dsksum.bytes[lane].bits[bit] +=
|
||
deskew & ~7;
|
||
// count entries
|
||
dskcnt.bytes[lane].bits[bit] += 1;
|
||
}
|
||
}
|
||
}
|
||
|
||
// average ranks
|
||
for (lane = 0; lane < byte_limit; lane++) {
|
||
for (bit = 0; bit < 8; ++bit) {
|
||
int div = dskcnt.bytes[lane].bits[bit];
|
||
|
||
if (div > 0) {
|
||
dsksum.bytes[lane].bits[bit] /= div;
|
||
// clear flags
|
||
dsksum.bytes[lane].bits[bit] &= ~7;
|
||
// set LOCK
|
||
dsksum.bytes[lane].bits[bit] |= 1;
|
||
} else {
|
||
// FIXME? use reset value?
|
||
dsksum.bytes[lane].bits[bit] =
|
||
(64 << 3) | 1;
|
||
}
|
||
}
|
||
}
|
||
|
||
// TME for FINAL version
|
||
display_deskew_settings(priv, if_num, &dsksum, /*VBL_TME */ 3);
|
||
|
||
// finally, write the averaged DESKEW values
|
||
override_deskew_settings(priv, if_num, &dsksum);
|
||
}
|
||
|
||
struct deskew_counts {
|
||
int saturated; // number saturated
|
||
int unlocked; // number unlocked
|
||
int nibrng_errs; // nibble range errors
|
||
int nibunl_errs; // nibble unlocked errors
|
||
int bitval_errs; // bit value errors
|
||
};
|
||
|
||
#define MIN_BITVAL 17
|
||
#define MAX_BITVAL 110
|
||
|
||
static void validate_deskew_training(struct ddr_priv *priv, int rank_mask,
|
||
int if_num, struct deskew_counts *counts,
|
||
int print_flags)
|
||
{
|
||
int byte_lane, bit_index, nib_num;
|
||
int nibrng_errs, nibunl_errs, bitval_errs;
|
||
union cvmx_lmcx_config lmc_config;
|
||
s16 nib_min[2], nib_max[2], nib_unl[2];
|
||
int byte_limit;
|
||
int print_enable = print_flags & 1;
|
||
struct deskew_data dskdat;
|
||
s16 flags, deskew;
|
||
const char *fc = " ?-=+*#&";
|
||
int bit_last;
|
||
|
||
lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
|
||
byte_limit = ((!lmc_config.s.mode32b) ? 8 : 4) + lmc_config.s.ecc_ena;
|
||
|
||
memset(counts, 0, sizeof(struct deskew_counts));
|
||
|
||
get_deskew_settings(priv, if_num, &dskdat);
|
||
|
||
if (print_enable) {
|
||
debug("N0.LMC%d: Deskew Settings: Bit => :",
|
||
if_num);
|
||
for (bit_index = 7; bit_index >= 0; --bit_index)
|
||
debug(" %3d ", bit_index);
|
||
debug("\n");
|
||
}
|
||
|
||
for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
|
||
if (print_enable)
|
||
debug("N0.LMC%d: Bit Deskew Byte %d %s :",
|
||
if_num, byte_lane,
|
||
(print_flags & 2) ? "FINAL" : " ");
|
||
|
||
nib_min[0] = 127;
|
||
nib_min[1] = 127;
|
||
nib_max[0] = 0;
|
||
nib_max[1] = 0;
|
||
nib_unl[0] = 0;
|
||
nib_unl[1] = 0;
|
||
|
||
if (lmc_config.s.mode32b == 1 && byte_lane == 4) {
|
||
bit_last = 3;
|
||
if (print_enable)
|
||
debug(" ");
|
||
} else {
|
||
bit_last = 7;
|
||
}
|
||
|
||
for (bit_index = bit_last; bit_index >= 0; --bit_index) {
|
||
nib_num = (bit_index > 3) ? 1 : 0;
|
||
|
||
flags = dskdat.bytes[byte_lane].bits[bit_index] & 7;
|
||
deskew = dskdat.bytes[byte_lane].bits[bit_index] >> 3;
|
||
|
||
counts->saturated += !!(flags & 6);
|
||
|
||
// Do range calc even when locked; it could happen
|
||
// that a bit is still unlocked after final retry,
|
||
// and we want to have an external retry if a RANGE
|
||
// error is present at exit...
|
||
nib_min[nib_num] = min(nib_min[nib_num], deskew);
|
||
nib_max[nib_num] = max(nib_max[nib_num], deskew);
|
||
|
||
if (!(flags & 1)) { // only when not locked
|
||
counts->unlocked += 1;
|
||
nib_unl[nib_num] += 1;
|
||
}
|
||
|
||
if (print_enable)
|
||
debug(" %3d %c", deskew, fc[flags ^ 1]);
|
||
}
|
||
|
||
/*
|
||
* Now look for nibble errors
|
||
*
|
||
* For bit 55, it looks like a bit deskew problem. When the
|
||
* upper nibble of byte 6 needs to go to saturation, bit 7
|
||
* of byte 6 locks prematurely at 64. For DIMMs with raw
|
||
* card A and B, can we reset the deskew training when we
|
||
* encounter this case? The reset criteria should be looking
|
||
* at one nibble at a time for raw card A and B; if the
|
||
* bit-deskew setting within a nibble is different by > 33,
|
||
* we'll issue a reset to the bit deskew training.
|
||
*
|
||
* LMC0 Bit Deskew Byte(6): 64 0 - 0 - 0 - 26 61 35 64
|
||
*/
|
||
// upper nibble range, then lower nibble range
|
||
nibrng_errs = ((nib_max[1] - nib_min[1]) > 33) ? 1 : 0;
|
||
nibrng_errs |= ((nib_max[0] - nib_min[0]) > 33) ? 1 : 0;
|
||
|
||
// check for nibble all unlocked
|
||
nibunl_errs = ((nib_unl[0] == 4) || (nib_unl[1] == 4)) ? 1 : 0;
|
||
|
||
// check for bit value errors, ie < 17 or > 110
|
||
// FIXME? assume max always > MIN_BITVAL and min < MAX_BITVAL
|
||
bitval_errs = ((nib_max[1] > MAX_BITVAL) ||
|
||
(nib_max[0] > MAX_BITVAL)) ? 1 : 0;
|
||
bitval_errs |= ((nib_min[1] < MIN_BITVAL) ||
|
||
(nib_min[0] < MIN_BITVAL)) ? 1 : 0;
|
||
|
||
if ((nibrng_errs != 0 || nibunl_errs != 0 ||
|
||
bitval_errs != 0) && print_enable) {
|
||
debug(" %c%c%c",
|
||
(nibrng_errs) ? 'R' : ' ',
|
||
(nibunl_errs) ? 'U' : ' ',
|
||
(bitval_errs) ? 'V' : ' ');
|
||
}
|
||
|
||
if (print_enable)
|
||
debug("\n");
|
||
|
||
counts->nibrng_errs |= (nibrng_errs << byte_lane);
|
||
counts->nibunl_errs |= (nibunl_errs << byte_lane);
|
||
counts->bitval_errs |= (bitval_errs << byte_lane);
|
||
}
|
||
}
|
||
|
||
static unsigned short load_dac_override(struct ddr_priv *priv, int if_num,
|
||
int dac_value, int byte)
|
||
{
|
||
union cvmx_lmcx_dll_ctl3 ddr_dll_ctl3;
|
||
// single bytelanes incr by 1; A is for ALL
|
||
int bytex = (byte == 0x0A) ? byte : byte + 1;
|
||
|
||
ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));
|
||
|
||
SET_DDR_DLL_CTL3(byte_sel, bytex);
|
||
SET_DDR_DLL_CTL3(offset, dac_value >> 1);
|
||
|
||
ddr_dll_ctl3.cn73xx.bit_select = 0x9; /* No-op */
|
||
lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
|
||
|
||
ddr_dll_ctl3.cn73xx.bit_select = 0xC; /* vref bypass setting load */
|
||
lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
|
||
|
||
ddr_dll_ctl3.cn73xx.bit_select = 0xD; /* vref bypass on. */
|
||
lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
|
||
|
||
ddr_dll_ctl3.cn73xx.bit_select = 0x9; /* No-op */
|
||
lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
|
||
|
||
lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num)); // flush writes
|
||
|
||
return (unsigned short)GET_DDR_DLL_CTL3(offset);
|
||
}
|
||
|
||
// arg dac_or_dbi is 1 for DAC, 0 for DBI
|
||
// returns 9 entries (bytelanes 0 through 8) in settings[]
|
||
// returns 0 if OK, -1 if a problem
|
||
static int read_dac_dbi_settings(struct ddr_priv *priv, int if_num,
|
||
int dac_or_dbi, int *settings)
|
||
{
|
||
union cvmx_lmcx_phy_ctl phy_ctl;
|
||
int byte_lane, bit_num;
|
||
int deskew;
|
||
int dac_value;
|
||
int new_deskew_layout = 0;
|
||
|
||
new_deskew_layout = octeon_is_cpuid(OCTEON_CN73XX) ||
|
||
octeon_is_cpuid(OCTEON_CNF75XX);
|
||
new_deskew_layout |= (octeon_is_cpuid(OCTEON_CN78XX) &&
|
||
!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X));
|
||
|
||
phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
|
||
phy_ctl.s.dsk_dbg_clk_scaler = 3;
|
||
lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
|
||
|
||
bit_num = (dac_or_dbi) ? 4 : 5;
|
||
// DBI not available
|
||
if (bit_num == 5 && !new_deskew_layout)
|
||
return -1;
|
||
|
||
// FIXME: always assume ECC is available
|
||
for (byte_lane = 8; byte_lane >= 0; --byte_lane) {
|
||
//set byte lane and bit to read
|
||
phy_ctl.s.dsk_dbg_bit_sel = bit_num;
|
||
phy_ctl.s.dsk_dbg_byte_sel = byte_lane;
|
||
lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
|
||
|
||
//start read sequence
|
||
phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
|
||
phy_ctl.s.dsk_dbg_rd_start = 1;
|
||
lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
|
||
|
||
//poll for read sequence to complete
|
||
do {
|
||
phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
|
||
} while (phy_ctl.s.dsk_dbg_rd_complete != 1);
|
||
|
||
// keep the flag bits where they are for DBI
|
||
deskew = phy_ctl.s.dsk_dbg_rd_data; /* >> 3 */
|
||
dac_value = phy_ctl.s.dsk_dbg_rd_data & 0xff;
|
||
|
||
settings[byte_lane] = (dac_or_dbi) ? dac_value : deskew;
|
||
}
|
||
|
||
return 0;
|
||
}
|
||
|
||
// print out the DBI settings array
|
||
// arg dac_or_dbi is 1 for DAC, 0 for DBI
|
||
static void display_dac_dbi_settings(int lmc, int dac_or_dbi,
|
||
int ecc_ena, int *settings, char *title)
|
||
{
|
||
int byte;
|
||
int flags;
|
||
int deskew;
|
||
const char *fc = " ?-=+*#&";
|
||
|
||
debug("N0.LMC%d: %s %s Settings %d:0 :",
|
||
lmc, title, (dac_or_dbi) ? "DAC" : "DBI", 7 + ecc_ena);
|
||
// FIXME: what about 32-bit mode?
|
||
for (byte = (7 + ecc_ena); byte >= 0; --byte) {
|
||
if (dac_or_dbi) { // DAC
|
||
flags = 1; // say its locked to get blank
|
||
deskew = settings[byte] & 0xff;
|
||
} else { // DBI
|
||
flags = settings[byte] & 7;
|
||
deskew = (settings[byte] >> 3) & 0x7f;
|
||
}
|
||
debug(" %3d %c", deskew, fc[flags ^ 1]);
|
||
}
|
||
debug("\n");
|
||
}
|
||
|
||
// Find a HWL majority
|
||
static int find_wl_majority(struct wlevel_bitcnt *bc, int *mx, int *mc,
|
||
int *xc, int *cc)
|
||
{
|
||
int ix, ic;
|
||
|
||
*mx = -1;
|
||
*mc = 0;
|
||
*xc = 0;
|
||
*cc = 0;
|
||
|
||
for (ix = 0; ix < 4; ix++) {
|
||
ic = bc->bitcnt[ix];
|
||
|
||
// make a bitmask of the ones with a count
|
||
if (ic > 0) {
|
||
*mc |= (1 << ix);
|
||
*cc += 1; // count how many had non-zero counts
|
||
}
|
||
|
||
// find the majority
|
||
if (ic > *xc) { // new max?
|
||
*xc = ic; // yes
|
||
*mx = ix; // set its index
|
||
}
|
||
}
|
||
|
||
return (*mx << 1);
|
||
}
|
||
|
||
// Evaluate the DAC settings array
|
||
static int evaluate_dac_settings(int if_64b, int ecc_ena, int *settings)
|
||
{
|
||
int byte, lane, dac, comp;
|
||
int last = (if_64b) ? 7 : 3;
|
||
|
||
// FIXME: change the check...???
|
||
// this looks only for sets of DAC values whose max/min differ by a lot
|
||
// let any EVEN go so long as it is within range...
|
||
for (byte = (last + ecc_ena); byte >= 0; --byte) {
|
||
dac = settings[byte] & 0xff;
|
||
|
||
for (lane = (last + ecc_ena); lane >= 0; --lane) {
|
||
comp = settings[lane] & 0xff;
|
||
if (abs((dac - comp)) > 25)
|
||
return 1;
|
||
}
|
||
}
|
||
|
||
return 0;
|
||
}
|
||
|
||
static void perform_offset_training(struct ddr_priv *priv, int rank_mask,
|
||
int if_num)
|
||
{
|
||
union cvmx_lmcx_phy_ctl lmc_phy_ctl;
|
||
u64 orig_phy_ctl;
|
||
const char *s;
|
||
|
||
/*
|
||
* 4.8.6 LMC Offset Training
|
||
*
|
||
* LMC requires input-receiver offset training.
|
||
*
|
||
* 1. Write LMC(0)_PHY_CTL[DAC_ON] = 1
|
||
*/
|
||
lmc_phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
|
||
orig_phy_ctl = lmc_phy_ctl.u64;
|
||
lmc_phy_ctl.s.dac_on = 1;
|
||
|
||
// allow full CSR override
|
||
s = lookup_env_ull(priv, "ddr_phy_ctl");
|
||
if (s)
|
||
lmc_phy_ctl.u64 = strtoull(s, NULL, 0);
|
||
|
||
// do not print or write if CSR does not change...
|
||
if (lmc_phy_ctl.u64 != orig_phy_ctl) {
|
||
debug("PHY_CTL : 0x%016llx\n",
|
||
lmc_phy_ctl.u64);
|
||
lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), lmc_phy_ctl.u64);
|
||
}
|
||
|
||
/*
|
||
* 2. Write LMC(0)_SEQ_CTL[SEQ_SEL] = 0x0B and
|
||
* LMC(0)_SEQ_CTL[INIT_START] = 1.
|
||
*
|
||
* 3. Wait for LMC(0)_SEQ_CTL[SEQ_COMPLETE] to be set to 1.
|
||
*/
|
||
/* Start Offset training sequence */
|
||
oct3_ddr3_seq(priv, rank_mask, if_num, 0x0B);
|
||
}
|
||
|
||
static void perform_internal_vref_training(struct ddr_priv *priv,
|
||
int rank_mask, int if_num)
|
||
{
|
||
union cvmx_lmcx_ext_config ext_config;
|
||
union cvmx_lmcx_dll_ctl3 ddr_dll_ctl3;
|
||
|
||
// First, make sure all byte-lanes are out of VREF bypass mode
|
||
ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));
|
||
|
||
ddr_dll_ctl3.cn78xx.byte_sel = 0x0A; /* all byte-lanes */
|
||
ddr_dll_ctl3.cn78xx.bit_select = 0x09; /* No-op */
|
||
lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
|
||
|
||
ddr_dll_ctl3.cn78xx.bit_select = 0x0E; /* vref bypass off. */
|
||
lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
|
||
|
||
ddr_dll_ctl3.cn78xx.bit_select = 0x09; /* No-op */
|
||
lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
|
||
|
||
/*
|
||
* 4.8.7 LMC Internal vref Training
|
||
*
|
||
* LMC requires input-reference-voltage training.
|
||
*
|
||
* 1. Write LMC(0)_EXT_CONFIG[VREFINT_SEQ_DESKEW] = 0.
|
||
*/
|
||
ext_config.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG(if_num));
|
||
ext_config.s.vrefint_seq_deskew = 0;
|
||
|
||
ddr_seq_print("Performing LMC sequence: vrefint_seq_deskew = %d\n",
|
||
ext_config.s.vrefint_seq_deskew);
|
||
|
||
lmc_wr(priv, CVMX_LMCX_EXT_CONFIG(if_num), ext_config.u64);
|
||
|
||
/*
|
||
* 2. Write LMC(0)_SEQ_CTL[SEQ_SEL] = 0x0a and
|
||
* LMC(0)_SEQ_CTL[INIT_START] = 1.
|
||
*
|
||
* 3. Wait for LMC(0)_SEQ_CTL[SEQ_COMPLETE] to be set to 1.
|
||
*/
|
||
/* Start LMC Internal vref Training */
|
||
oct3_ddr3_seq(priv, rank_mask, if_num, 0x0A);
|
||
}
|
||
|
||
#define dbg_avg(format, ...) // debug(format, ##__VA_ARGS__)
|
||
|
||
static int process_samples_average(s16 *bytes, int num_samples,
|
||
int lmc, int lane_no)
|
||
{
|
||
int i, sadj, sum = 0, ret, asum, trunc;
|
||
s16 smin = 32767, smax = -32768;
|
||
int nmin, nmax;
|
||
//int rng;
|
||
|
||
dbg_avg("DBG_AVG%d.%d: ", lmc, lane_no);
|
||
|
||
for (i = 0; i < num_samples; i++) {
|
||
sum += bytes[i];
|
||
if (bytes[i] < smin)
|
||
smin = bytes[i];
|
||
if (bytes[i] > smax)
|
||
smax = bytes[i];
|
||
dbg_avg(" %3d", bytes[i]);
|
||
}
|
||
|
||
nmin = 0;
|
||
nmax = 0;
|
||
for (i = 0; i < num_samples; i++) {
|
||
if (bytes[i] == smin)
|
||
nmin += 1;
|
||
if (bytes[i] == smax)
|
||
nmax += 1;
|
||
}
|
||
dbg_avg(" (min=%3d/%d, max=%3d/%d, range=%2d, samples=%2d)",
|
||
smin, nmin, smax, nmax, rng, num_samples);
|
||
|
||
asum = sum - smin - smax;
|
||
|
||
sadj = divide_nint(asum * 10, (num_samples - 2));
|
||
|
||
trunc = asum / (num_samples - 2);
|
||
|
||
dbg_avg(" [%3d.%d, %3d]", sadj / 10, sadj % 10, trunc);
|
||
|
||
sadj = divide_nint(sadj, 10);
|
||
if (trunc & 1)
|
||
ret = trunc;
|
||
else if (sadj & 1)
|
||
ret = sadj;
|
||
else
|
||
ret = trunc + 1;
|
||
|
||
dbg_avg(" -> %3d\n", ret);
|
||
|
||
return ret;
|
||
}
|
||
|
||
#define DEFAULT_SAT_RETRY_LIMIT 11 // 1 + 10 retries
|
||
|
||
#define default_lock_retry_limit 20 // 20 retries
|
||
#define deskew_validation_delay 10000 // 10 millisecs
|
||
|
||
static int perform_deskew_training(struct ddr_priv *priv, int rank_mask,
|
||
int if_num, int spd_rawcard_aorb)
|
||
{
|
||
int unsaturated, locked;
|
||
int sat_retries, sat_retries_limit;
|
||
int lock_retries, lock_retries_total, lock_retries_limit;
|
||
int print_first;
|
||
int print_them_all;
|
||
struct deskew_counts dsk_counts;
|
||
union cvmx_lmcx_phy_ctl phy_ctl;
|
||
char *s;
|
||
int has_no_sat = octeon_is_cpuid(OCTEON_CN78XX_PASS2_X) ||
|
||
octeon_is_cpuid(OCTEON_CNF75XX);
|
||
int disable_bitval_retries = 1; // default to disabled
|
||
|
||
debug("N0.LMC%d: Performing Deskew Training.\n", if_num);
|
||
|
||
sat_retries = 0;
|
||
sat_retries_limit = (has_no_sat) ? 5 : DEFAULT_SAT_RETRY_LIMIT;
|
||
|
||
lock_retries_total = 0;
|
||
unsaturated = 0;
|
||
print_first = 1; // print the first one
|
||
// set to true for printing all normal deskew attempts
|
||
print_them_all = 0;
|
||
|
||
// provide override for bitval_errs causing internal VREF retries
|
||
s = env_get("ddr_disable_bitval_retries");
|
||
if (s)
|
||
disable_bitval_retries = !!simple_strtoul(s, NULL, 0);
|
||
|
||
lock_retries_limit = default_lock_retry_limit;
|
||
if ((octeon_is_cpuid(OCTEON_CN78XX_PASS2_X)) ||
|
||
(octeon_is_cpuid(OCTEON_CN73XX)) ||
|
||
(octeon_is_cpuid(OCTEON_CNF75XX)))
|
||
lock_retries_limit *= 2; // give new chips twice as many
|
||
|
||
do { /* while (sat_retries < sat_retry_limit) */
|
||
/*
|
||
* 4.8.8 LMC Deskew Training
|
||
*
|
||
* LMC requires input-read-data deskew training.
|
||
*
|
||
* 1. Write LMC(0)_EXT_CONFIG[VREFINT_SEQ_DESKEW] = 1.
|
||
*/
|
||
|
||
union cvmx_lmcx_ext_config ext_config;
|
||
|
||
ext_config.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG(if_num));
|
||
ext_config.s.vrefint_seq_deskew = 1;
|
||
|
||
ddr_seq_print
|
||
("Performing LMC sequence: vrefint_seq_deskew = %d\n",
|
||
ext_config.s.vrefint_seq_deskew);
|
||
|
||
lmc_wr(priv, CVMX_LMCX_EXT_CONFIG(if_num), ext_config.u64);
|
||
|
||
/*
|
||
* 2. Write LMC(0)_SEQ_CTL[SEQ_SEL] = 0x0A and
|
||
* LMC(0)_SEQ_CTL[INIT_START] = 1.
|
||
*
|
||
* 3. Wait for LMC(0)_SEQ_CTL[SEQ_COMPLETE] to be set to 1.
|
||
*/
|
||
|
||
phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
|
||
phy_ctl.s.phy_dsk_reset = 1; /* RESET Deskew sequence */
|
||
lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
|
||
|
||
/* LMC Deskew Training */
|
||
oct3_ddr3_seq(priv, rank_mask, if_num, 0x0A);
|
||
|
||
lock_retries = 0;
|
||
|
||
perform_deskew_training:
|
||
|
||
phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
|
||
phy_ctl.s.phy_dsk_reset = 0; /* Normal Deskew sequence */
|
||
lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
|
||
|
||
/* LMC Deskew Training */
|
||
oct3_ddr3_seq(priv, rank_mask, if_num, 0x0A);
|
||
|
||
// Moved this from validate_deskew_training
|
||
/* Allow deskew results to stabilize before evaluating them. */
|
||
udelay(deskew_validation_delay);
|
||
|
||
// Now go look at lock and saturation status...
|
||
validate_deskew_training(priv, rank_mask, if_num, &dsk_counts,
|
||
print_first);
|
||
// after printing the first and not doing them all, no more
|
||
if (print_first && !print_them_all)
|
||
print_first = 0;
|
||
|
||
unsaturated = (dsk_counts.saturated == 0);
|
||
locked = (dsk_counts.unlocked == 0);
|
||
|
||
// only do locking retries if unsaturated or rawcard A or B,
|
||
// otherwise full SAT retry
|
||
if (unsaturated || (spd_rawcard_aorb && !has_no_sat)) {
|
||
if (!locked) { // and not locked
|
||
lock_retries++;
|
||
lock_retries_total++;
|
||
if (lock_retries <= lock_retries_limit) {
|
||
goto perform_deskew_training;
|
||
} else {
|
||
debug("N0.LMC%d: LOCK RETRIES failed after %d retries\n",
|
||
if_num, lock_retries_limit);
|
||
}
|
||
} else {
|
||
// only print if we did try
|
||
if (lock_retries_total > 0)
|
||
debug("N0.LMC%d: LOCK RETRIES successful after %d retries\n",
|
||
if_num, lock_retries);
|
||
}
|
||
} /* if (unsaturated || spd_rawcard_aorb) */
|
||
|
||
++sat_retries;
|
||
|
||
/*
|
||
* At this point, check for a DDR4 RDIMM that will not
|
||
* benefit from SAT retries; if so, exit
|
||
*/
|
||
if (spd_rawcard_aorb && !has_no_sat) {
|
||
debug("N0.LMC%d: Deskew Training Loop: Exiting for RAWCARD == A or B.\n",
|
||
if_num);
|
||
break; // no sat or lock retries
|
||
}
|
||
|
||
} while (!unsaturated && (sat_retries < sat_retries_limit));
|
||
|
||
debug("N0.LMC%d: Deskew Training %s. %d sat-retries, %d lock-retries\n",
|
||
if_num, (sat_retries >= DEFAULT_SAT_RETRY_LIMIT) ?
|
||
"Timed Out" : "Completed", sat_retries - 1, lock_retries_total);
|
||
|
||
// FIXME? add saturation to reasons for fault return - give it a
|
||
// chance via Internal VREF
|
||
// FIXME? add OPTIONAL bit value to reasons for fault return -
|
||
// give it a chance via Internal VREF
|
||
if (dsk_counts.nibrng_errs != 0 || dsk_counts.nibunl_errs != 0 ||
|
||
(dsk_counts.bitval_errs != 0 && !disable_bitval_retries) ||
|
||
!unsaturated) {
|
||
debug("N0.LMC%d: Nibble or Saturation Error(s) found, returning FAULT\n",
|
||
if_num);
|
||
// FIXME: do we want this output always for errors?
|
||
validate_deskew_training(priv, rank_mask, if_num,
|
||
&dsk_counts, 1);
|
||
return -1; // we did retry locally, they did not help
|
||
}
|
||
|
||
// NOTE: we (currently) always print one last training validation
|
||
// before starting Read Leveling...
|
||
|
||
return 0;
|
||
}
|
||
|
||
#define SCALING_FACTOR (1000)
|
||
|
||
// NOTE: this gets called for 1-rank and 2-rank DIMMs in single-slot config
|
||
static int compute_vref_1slot_2rank(int rtt_wr, int rtt_park, int dqx_ctl,
|
||
int rank_count, int dram_connection)
|
||
{
|
||
u64 reff_s;
|
||
u64 rser_s = (dram_connection) ? 0 : 15;
|
||
u64 vdd = 1200;
|
||
u64 vref;
|
||
// 99 == HiZ
|
||
u64 rtt_wr_s = (((rtt_wr == 0) || rtt_wr == 99) ?
|
||
1 * 1024 * 1024 : rtt_wr);
|
||
u64 rtt_park_s = (((rtt_park == 0) || ((rank_count == 1) &&
|
||
(rtt_wr != 0))) ?
|
||
1 * 1024 * 1024 : rtt_park);
|
||
u64 dqx_ctl_s = (dqx_ctl == 0 ? 1 * 1024 * 1024 : dqx_ctl);
|
||
int vref_value;
|
||
u64 rangepc = 6000; // range1 base
|
||
u64 vrefpc;
|
||
int vref_range = 0;
|
||
|
||
reff_s = divide_nint((rtt_wr_s * rtt_park_s), (rtt_wr_s + rtt_park_s));
|
||
|
||
vref = (((rser_s + dqx_ctl_s) * SCALING_FACTOR) /
|
||
(rser_s + dqx_ctl_s + reff_s)) + SCALING_FACTOR;
|
||
|
||
vref = (vref * vdd) / 2 / SCALING_FACTOR;
|
||
|
||
vrefpc = (vref * 100 * 100) / vdd;
|
||
|
||
if (vrefpc < rangepc) { // < range1 base, use range2
|
||
vref_range = 1 << 6; // set bit A6 for range2
|
||
rangepc = 4500; // range2 base is 45%
|
||
}
|
||
|
||
vref_value = divide_nint(vrefpc - rangepc, 65);
|
||
if (vref_value < 0)
|
||
vref_value = vref_range; // set to base of range
|
||
else
|
||
vref_value |= vref_range;
|
||
|
||
debug("rtt_wr: %d, rtt_park: %d, dqx_ctl: %d, rank_count: %d\n",
|
||
rtt_wr, rtt_park, dqx_ctl, rank_count);
|
||
debug("rtt_wr_s: %lld, rtt_park_s: %lld, dqx_ctl_s: %lld, vref_value: 0x%x, range: %d\n",
|
||
rtt_wr_s, rtt_park_s, dqx_ctl_s, vref_value ^ vref_range,
|
||
vref_range ? 2 : 1);
|
||
|
||
return vref_value;
|
||
}
|
||
|
||
// NOTE: this gets called for 1-rank and 2-rank DIMMs in two-slot configs
|
||
static int compute_vref_2slot_2rank(int rtt_wr, int rtt_park_00,
|
||
int rtt_park_01,
|
||
int dqx_ctl, int rtt_nom,
|
||
int dram_connection)
|
||
{
|
||
u64 rser = (dram_connection) ? 0 : 15;
|
||
u64 vdd = 1200;
|
||
u64 vl, vlp, vcm;
|
||
u64 rd0, rd1, rpullup;
|
||
// 99 == HiZ
|
||
u64 rtt_wr_s = (((rtt_wr == 0) || rtt_wr == 99) ?
|
||
1 * 1024 * 1024 : rtt_wr);
|
||
u64 rtt_park_00_s = (rtt_park_00 == 0 ? 1 * 1024 * 1024 : rtt_park_00);
|
||
u64 rtt_park_01_s = (rtt_park_01 == 0 ? 1 * 1024 * 1024 : rtt_park_01);
|
||
u64 dqx_ctl_s = (dqx_ctl == 0 ? 1 * 1024 * 1024 : dqx_ctl);
|
||
u64 rtt_nom_s = (rtt_nom == 0 ? 1 * 1024 * 1024 : rtt_nom);
|
||
int vref_value;
|
||
u64 rangepc = 6000; // range1 base
|
||
u64 vrefpc;
|
||
int vref_range = 0;
|
||
|
||
// rd0 = (RTT_NOM (parallel) RTT_WR) + =
|
||
// ((RTT_NOM * RTT_WR) / (RTT_NOM + RTT_WR)) + RSER
|
||
rd0 = divide_nint((rtt_nom_s * rtt_wr_s),
|
||
(rtt_nom_s + rtt_wr_s)) + rser;
|
||
|
||
// rd1 = (RTT_PARK_00 (parallel) RTT_PARK_01) + RSER =
|
||
// ((RTT_PARK_00 * RTT_PARK_01) / (RTT_PARK_00 + RTT_PARK_01)) + RSER
|
||
rd1 = divide_nint((rtt_park_00_s * rtt_park_01_s),
|
||
(rtt_park_00_s + rtt_park_01_s)) + rser;
|
||
|
||
// rpullup = rd0 (parallel) rd1 = (rd0 * rd1) / (rd0 + rd1)
|
||
rpullup = divide_nint((rd0 * rd1), (rd0 + rd1));
|
||
|
||
// vl = (DQX_CTL / (DQX_CTL + rpullup)) * 1.2
|
||
vl = divide_nint((dqx_ctl_s * vdd), (dqx_ctl_s + rpullup));
|
||
|
||
// vlp = ((RSER / rd0) * (1.2 - vl)) + vl
|
||
vlp = divide_nint((rser * (vdd - vl)), rd0) + vl;
|
||
|
||
// vcm = (vlp + 1.2) / 2
|
||
vcm = divide_nint((vlp + vdd), 2);
|
||
|
||
// vrefpc = (vcm / 1.2) * 100
|
||
vrefpc = divide_nint((vcm * 100 * 100), vdd);
|
||
|
||
if (vrefpc < rangepc) { // < range1 base, use range2
|
||
vref_range = 1 << 6; // set bit A6 for range2
|
||
rangepc = 4500; // range2 base is 45%
|
||
}
|
||
|
||
vref_value = divide_nint(vrefpc - rangepc, 65);
|
||
if (vref_value < 0)
|
||
vref_value = vref_range; // set to base of range
|
||
else
|
||
vref_value |= vref_range;
|
||
|
||
debug("rtt_wr:%d, rtt_park_00:%d, rtt_park_01:%d, dqx_ctl:%d, rtt_nom:%d, vref_value:%d (0x%x)\n",
|
||
rtt_wr, rtt_park_00, rtt_park_01, dqx_ctl, rtt_nom, vref_value,
|
||
vref_value);
|
||
|
||
return vref_value;
|
||
}
|
||
|
||
// NOTE: only call this for DIMMs with 1 or 2 ranks, not 4.
|
||
static int compute_vref_val(struct ddr_priv *priv, int if_num, int rankx,
|
||
int dimm_count, int rank_count,
|
||
struct impedence_values *imp_values,
|
||
int is_stacked_die, int dram_connection)
|
||
{
|
||
int computed_final_vref_value = 0;
|
||
int enable_adjust = ENABLE_COMPUTED_VREF_ADJUSTMENT;
|
||
const char *s;
|
||
int rtt_wr, dqx_ctl, rtt_nom, index;
|
||
union cvmx_lmcx_modereg_params1 lmc_modereg_params1;
|
||
union cvmx_lmcx_modereg_params2 lmc_modereg_params2;
|
||
union cvmx_lmcx_comp_ctl2 comp_ctl2;
|
||
int rtt_park;
|
||
int rtt_park_00;
|
||
int rtt_park_01;
|
||
|
||
debug("N0.LMC%d.R%d: %s(...dram_connection = %d)\n",
|
||
if_num, rankx, __func__, dram_connection);
|
||
|
||
// allow some overrides...
|
||
s = env_get("ddr_adjust_computed_vref");
|
||
if (s) {
|
||
enable_adjust = !!simple_strtoul(s, NULL, 0);
|
||
if (!enable_adjust) {
|
||
debug("N0.LMC%d.R%d: DISABLE adjustment of computed VREF\n",
|
||
if_num, rankx);
|
||
}
|
||
}
|
||
|
||
s = env_get("ddr_set_computed_vref");
|
||
if (s) {
|
||
int new_vref = simple_strtoul(s, NULL, 0);
|
||
|
||
debug("N0.LMC%d.R%d: OVERRIDE computed VREF to 0x%x (%d)\n",
|
||
if_num, rankx, new_vref, new_vref);
|
||
return new_vref;
|
||
}
|
||
|
||
/*
|
||
* Calculate an alternative to the measured vref value
|
||
* but only for configurations we know how to...
|
||
*/
|
||
// We have code for 2-rank DIMMs in both 1-slot or 2-slot configs,
|
||
// and can use the 2-rank 1-slot code for 1-rank DIMMs in 1-slot
|
||
// configs, and can use the 2-rank 2-slot code for 1-rank DIMMs
|
||
// in 2-slot configs.
|
||
|
||
lmc_modereg_params1.u64 =
|
||
lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS1(if_num));
|
||
lmc_modereg_params2.u64 =
|
||
lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS2(if_num));
|
||
comp_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
|
||
dqx_ctl = imp_values->dqx_strength[comp_ctl2.s.dqx_ctl];
|
||
|
||
// WR always comes from the current rank
|
||
index = (lmc_modereg_params1.u64 >> (rankx * 12 + 5)) & 0x03;
|
||
if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X))
|
||
index |= lmc_modereg_params1.u64 >> (51 + rankx - 2) & 0x04;
|
||
rtt_wr = imp_values->rtt_wr_ohms[index];
|
||
|
||
// separate calculations for 1 vs 2 DIMMs per LMC
|
||
if (dimm_count == 1) {
|
||
// PARK comes from this rank if 1-rank, otherwise other rank
|
||
index =
|
||
(lmc_modereg_params2.u64 >>
|
||
((rankx ^ (rank_count - 1)) * 10 + 0)) & 0x07;
|
||
rtt_park = imp_values->rtt_nom_ohms[index];
|
||
computed_final_vref_value =
|
||
compute_vref_1slot_2rank(rtt_wr, rtt_park, dqx_ctl,
|
||
rank_count, dram_connection);
|
||
} else {
|
||
// get both PARK values from the other DIMM
|
||
index =
|
||
(lmc_modereg_params2.u64 >> ((rankx ^ 0x02) * 10 + 0)) &
|
||
0x07;
|
||
rtt_park_00 = imp_values->rtt_nom_ohms[index];
|
||
index =
|
||
(lmc_modereg_params2.u64 >> ((rankx ^ 0x03) * 10 + 0)) &
|
||
0x07;
|
||
rtt_park_01 = imp_values->rtt_nom_ohms[index];
|
||
// NOM comes from this rank if 1-rank, otherwise other rank
|
||
index =
|
||
(lmc_modereg_params1.u64 >>
|
||
((rankx ^ (rank_count - 1)) * 12 + 9)) & 0x07;
|
||
rtt_nom = imp_values->rtt_nom_ohms[index];
|
||
computed_final_vref_value =
|
||
compute_vref_2slot_2rank(rtt_wr, rtt_park_00, rtt_park_01,
|
||
dqx_ctl, rtt_nom, dram_connection);
|
||
}
|
||
|
||
if (enable_adjust) {
|
||
union cvmx_lmcx_config lmc_config;
|
||
union cvmx_lmcx_control lmc_control;
|
||
|
||
lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
|
||
lmc_control.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
|
||
|
||
/*
|
||
* New computed vref = existing computed vref – X
|
||
*
|
||
* The value of X is depending on different conditions.
|
||
* Both #122 and #139 are 2Rx4 RDIMM, while #124 is stacked
|
||
* die 2Rx4, so I conclude the results into two conditions:
|
||
*
|
||
* 1. Stacked Die: 2Rx4
|
||
* 1-slot: offset = 7. i, e New computed vref = existing
|
||
* computed vref – 7
|
||
* 2-slot: offset = 6
|
||
*
|
||
* 2. Regular: 2Rx4
|
||
* 1-slot: offset = 3
|
||
* 2-slot: offset = 2
|
||
*/
|
||
// we know we never get called unless DDR4, so test just
|
||
// the other conditions
|
||
if (lmc_control.s.rdimm_ena == 1 &&
|
||
rank_count == 2 && lmc_config.s.mode_x4dev) {
|
||
// it must first be RDIMM and 2-rank and x4
|
||
int adj;
|
||
|
||
// now do according to stacked die or not...
|
||
if (is_stacked_die)
|
||
adj = (dimm_count == 1) ? -7 : -6;
|
||
else
|
||
adj = (dimm_count == 1) ? -3 : -2;
|
||
|
||
// we must have adjusted it, so print it out if
|
||
// verbosity is right
|
||
debug("N0.LMC%d.R%d: adjusting computed vref from %2d (0x%02x) to %2d (0x%02x)\n",
|
||
if_num, rankx, computed_final_vref_value,
|
||
computed_final_vref_value,
|
||
computed_final_vref_value + adj,
|
||
computed_final_vref_value + adj);
|
||
computed_final_vref_value += adj;
|
||
}
|
||
}
|
||
|
||
return computed_final_vref_value;
|
||
}
|
||
|
||
static void unpack_rlevel_settings(int if_bytemask, int ecc_ena,
|
||
struct rlevel_byte_data *rlevel_byte,
|
||
union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank)
|
||
{
|
||
if ((if_bytemask & 0xff) == 0xff) {
|
||
if (ecc_ena) {
|
||
rlevel_byte[8].delay = lmc_rlevel_rank.s.byte7;
|
||
rlevel_byte[7].delay = lmc_rlevel_rank.s.byte6;
|
||
rlevel_byte[6].delay = lmc_rlevel_rank.s.byte5;
|
||
rlevel_byte[5].delay = lmc_rlevel_rank.s.byte4;
|
||
/* ECC */
|
||
rlevel_byte[4].delay = lmc_rlevel_rank.s.byte8;
|
||
} else {
|
||
rlevel_byte[7].delay = lmc_rlevel_rank.s.byte7;
|
||
rlevel_byte[6].delay = lmc_rlevel_rank.s.byte6;
|
||
rlevel_byte[5].delay = lmc_rlevel_rank.s.byte5;
|
||
rlevel_byte[4].delay = lmc_rlevel_rank.s.byte4;
|
||
}
|
||
} else {
|
||
rlevel_byte[8].delay = lmc_rlevel_rank.s.byte8; /* unused */
|
||
rlevel_byte[7].delay = lmc_rlevel_rank.s.byte7; /* unused */
|
||
rlevel_byte[6].delay = lmc_rlevel_rank.s.byte6; /* unused */
|
||
rlevel_byte[5].delay = lmc_rlevel_rank.s.byte5; /* unused */
|
||
rlevel_byte[4].delay = lmc_rlevel_rank.s.byte4; /* ECC */
|
||
}
|
||
|
||
rlevel_byte[3].delay = lmc_rlevel_rank.s.byte3;
|
||
rlevel_byte[2].delay = lmc_rlevel_rank.s.byte2;
|
||
rlevel_byte[1].delay = lmc_rlevel_rank.s.byte1;
|
||
rlevel_byte[0].delay = lmc_rlevel_rank.s.byte0;
|
||
}
|
||
|
||
static void pack_rlevel_settings(int if_bytemask, int ecc_ena,
|
||
struct rlevel_byte_data *rlevel_byte,
|
||
union cvmx_lmcx_rlevel_rankx
|
||
*final_rlevel_rank)
|
||
{
|
||
union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank = *final_rlevel_rank;
|
||
|
||
if ((if_bytemask & 0xff) == 0xff) {
|
||
if (ecc_ena) {
|
||
lmc_rlevel_rank.s.byte7 = rlevel_byte[8].delay;
|
||
lmc_rlevel_rank.s.byte6 = rlevel_byte[7].delay;
|
||
lmc_rlevel_rank.s.byte5 = rlevel_byte[6].delay;
|
||
lmc_rlevel_rank.s.byte4 = rlevel_byte[5].delay;
|
||
/* ECC */
|
||
lmc_rlevel_rank.s.byte8 = rlevel_byte[4].delay;
|
||
} else {
|
||
lmc_rlevel_rank.s.byte7 = rlevel_byte[7].delay;
|
||
lmc_rlevel_rank.s.byte6 = rlevel_byte[6].delay;
|
||
lmc_rlevel_rank.s.byte5 = rlevel_byte[5].delay;
|
||
lmc_rlevel_rank.s.byte4 = rlevel_byte[4].delay;
|
||
}
|
||
} else {
|
||
lmc_rlevel_rank.s.byte8 = rlevel_byte[8].delay;
|
||
lmc_rlevel_rank.s.byte7 = rlevel_byte[7].delay;
|
||
lmc_rlevel_rank.s.byte6 = rlevel_byte[6].delay;
|
||
lmc_rlevel_rank.s.byte5 = rlevel_byte[5].delay;
|
||
lmc_rlevel_rank.s.byte4 = rlevel_byte[4].delay;
|
||
}
|
||
|
||
lmc_rlevel_rank.s.byte3 = rlevel_byte[3].delay;
|
||
lmc_rlevel_rank.s.byte2 = rlevel_byte[2].delay;
|
||
lmc_rlevel_rank.s.byte1 = rlevel_byte[1].delay;
|
||
lmc_rlevel_rank.s.byte0 = rlevel_byte[0].delay;
|
||
|
||
*final_rlevel_rank = lmc_rlevel_rank;
|
||
}
|
||
|
||
/////////////////// These are the RLEVEL settings display routines
|
||
|
||
// flags
|
||
#define WITH_NOTHING 0
|
||
#define WITH_SCORE 1
|
||
#define WITH_AVERAGE 2
|
||
#define WITH_FINAL 4
|
||
#define WITH_COMPUTE 8
|
||
|
||
static void do_display_rl(int if_num,
|
||
union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank,
|
||
int rank, int flags, int score)
|
||
{
|
||
char score_buf[16];
|
||
char *msg_buf;
|
||
char hex_buf[20];
|
||
|
||
if (flags & WITH_SCORE) {
|
||
snprintf(score_buf, sizeof(score_buf), "(%d)", score);
|
||
} else {
|
||
score_buf[0] = ' ';
|
||
score_buf[1] = 0;
|
||
}
|
||
|
||
if (flags & WITH_AVERAGE) {
|
||
msg_buf = " DELAY AVERAGES ";
|
||
} else if (flags & WITH_FINAL) {
|
||
msg_buf = " FINAL SETTINGS ";
|
||
} else if (flags & WITH_COMPUTE) {
|
||
msg_buf = " COMPUTED DELAYS ";
|
||
} else {
|
||
snprintf(hex_buf, sizeof(hex_buf), "0x%016llX",
|
||
(unsigned long long)lmc_rlevel_rank.u64);
|
||
msg_buf = hex_buf;
|
||
}
|
||
|
||
debug("N0.LMC%d.R%d: Rlevel Rank %#4x, %s : %5d %5d %5d %5d %5d %5d %5d %5d %5d %s\n",
|
||
if_num, rank, lmc_rlevel_rank.s.status, msg_buf,
|
||
lmc_rlevel_rank.s.byte8, lmc_rlevel_rank.s.byte7,
|
||
lmc_rlevel_rank.s.byte6, lmc_rlevel_rank.s.byte5,
|
||
lmc_rlevel_rank.s.byte4, lmc_rlevel_rank.s.byte3,
|
||
lmc_rlevel_rank.s.byte2, lmc_rlevel_rank.s.byte1,
|
||
lmc_rlevel_rank.s.byte0, score_buf);
|
||
}
|
||
|
||
static void display_rl(int if_num,
|
||
union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank, int rank)
|
||
{
|
||
do_display_rl(if_num, lmc_rlevel_rank, rank, 0, 0);
|
||
}
|
||
|
||
static void display_rl_with_score(int if_num,
|
||
union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank,
|
||
int rank, int score)
|
||
{
|
||
do_display_rl(if_num, lmc_rlevel_rank, rank, 1, score);
|
||
}
|
||
|
||
static void display_rl_with_final(int if_num,
|
||
union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank,
|
||
int rank)
|
||
{
|
||
do_display_rl(if_num, lmc_rlevel_rank, rank, 4, 0);
|
||
}
|
||
|
||
static void display_rl_with_computed(int if_num,
|
||
union cvmx_lmcx_rlevel_rankx
|
||
lmc_rlevel_rank, int rank, int score)
|
||
{
|
||
do_display_rl(if_num, lmc_rlevel_rank, rank, 9, score);
|
||
}
|
||
|
||
// flag values
|
||
#define WITH_RODT_BLANK 0
|
||
#define WITH_RODT_SKIPPING 1
|
||
#define WITH_RODT_BESTROW 2
|
||
#define WITH_RODT_BESTSCORE 3
|
||
// control
|
||
#define SKIP_SKIPPING 1
|
||
|
||
static const char *with_rodt_canned_msgs[4] = {
|
||
" ", "SKIPPING ", "BEST ROW ", "BEST SCORE"
|
||
};
|
||
|
||
static void display_rl_with_rodt(int if_num,
|
||
union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank,
|
||
int rank, int score,
|
||
int nom_ohms, int rodt_ohms, int flag)
|
||
{
|
||
const char *msg_buf;
|
||
char set_buf[20];
|
||
|
||
#if SKIP_SKIPPING
|
||
if (flag == WITH_RODT_SKIPPING)
|
||
return;
|
||
#endif
|
||
|
||
msg_buf = with_rodt_canned_msgs[flag];
|
||
if (nom_ohms < 0) {
|
||
snprintf(set_buf, sizeof(set_buf), " RODT %3d ",
|
||
rodt_ohms);
|
||
} else {
|
||
snprintf(set_buf, sizeof(set_buf), "NOM %3d RODT %3d", nom_ohms,
|
||
rodt_ohms);
|
||
}
|
||
|
||
debug("N0.LMC%d.R%d: Rlevel %s %s : %5d %5d %5d %5d %5d %5d %5d %5d %5d (%d)\n",
|
||
if_num, rank, set_buf, msg_buf, lmc_rlevel_rank.s.byte8,
|
||
lmc_rlevel_rank.s.byte7, lmc_rlevel_rank.s.byte6,
|
||
lmc_rlevel_rank.s.byte5, lmc_rlevel_rank.s.byte4,
|
||
lmc_rlevel_rank.s.byte3, lmc_rlevel_rank.s.byte2,
|
||
lmc_rlevel_rank.s.byte1, lmc_rlevel_rank.s.byte0, score);
|
||
}
|
||
|
||
static void do_display_wl(int if_num,
|
||
union cvmx_lmcx_wlevel_rankx lmc_wlevel_rank,
|
||
int rank, int flags)
|
||
{
|
||
char *msg_buf;
|
||
char hex_buf[20];
|
||
|
||
if (flags & WITH_FINAL) {
|
||
msg_buf = " FINAL SETTINGS ";
|
||
} else {
|
||
snprintf(hex_buf, sizeof(hex_buf), "0x%016llX",
|
||
(unsigned long long)lmc_wlevel_rank.u64);
|
||
msg_buf = hex_buf;
|
||
}
|
||
|
||
debug("N0.LMC%d.R%d: Wlevel Rank %#4x, %s : %5d %5d %5d %5d %5d %5d %5d %5d %5d\n",
|
||
if_num, rank, lmc_wlevel_rank.s.status, msg_buf,
|
||
lmc_wlevel_rank.s.byte8, lmc_wlevel_rank.s.byte7,
|
||
lmc_wlevel_rank.s.byte6, lmc_wlevel_rank.s.byte5,
|
||
lmc_wlevel_rank.s.byte4, lmc_wlevel_rank.s.byte3,
|
||
lmc_wlevel_rank.s.byte2, lmc_wlevel_rank.s.byte1,
|
||
lmc_wlevel_rank.s.byte0);
|
||
}
|
||
|
||
static void display_wl(int if_num,
|
||
union cvmx_lmcx_wlevel_rankx lmc_wlevel_rank, int rank)
|
||
{
|
||
do_display_wl(if_num, lmc_wlevel_rank, rank, WITH_NOTHING);
|
||
}
|
||
|
||
static void display_wl_with_final(int if_num,
|
||
union cvmx_lmcx_wlevel_rankx lmc_wlevel_rank,
|
||
int rank)
|
||
{
|
||
do_display_wl(if_num, lmc_wlevel_rank, rank, WITH_FINAL);
|
||
}
|
||
|
||
// pretty-print bitmask adjuster
|
||
static u64 ppbm(u64 bm)
|
||
{
|
||
if (bm != 0ul) {
|
||
while ((bm & 0x0fful) == 0ul)
|
||
bm >>= 4;
|
||
}
|
||
|
||
return bm;
|
||
}
|
||
|
||
// xlate PACKED index to UNPACKED index to use with rlevel_byte
|
||
#define XPU(i, e) (((i) < 4) ? (i) : (((i) < 8) ? (i) + (e) : 4))
|
||
// xlate UNPACKED index to PACKED index to use with rlevel_bitmask
|
||
#define XUP(i, e) (((i) < 4) ? (i) : (e) ? (((i) > 4) ? (i) - 1 : 8) : (i))
|
||
|
||
// flag values
|
||
#define WITH_WL_BITMASKS 0
|
||
#define WITH_RL_BITMASKS 1
|
||
#define WITH_RL_MASK_SCORES 2
|
||
#define WITH_RL_SEQ_SCORES 3
|
||
|
||
static void do_display_bm(int if_num, int rank, void *bm,
|
||
int flags, int ecc)
|
||
{
|
||
if (flags == WITH_WL_BITMASKS) {
|
||
// wlevel_bitmask array in PACKED index order, so just
|
||
// print them
|
||
int *bitmasks = (int *)bm;
|
||
|
||
debug("N0.LMC%d.R%d: Wlevel Debug Bitmasks : %05x %05x %05x %05x %05x %05x %05x %05x %05x\n",
|
||
if_num, rank, bitmasks[8], bitmasks[7], bitmasks[6],
|
||
bitmasks[5], bitmasks[4], bitmasks[3], bitmasks[2],
|
||
bitmasks[1], bitmasks[0]
|
||
);
|
||
} else if (flags == WITH_RL_BITMASKS) {
|
||
// rlevel_bitmask array in PACKED index order, so just
|
||
// print them
|
||
struct rlevel_bitmask *rlevel_bitmask =
|
||
(struct rlevel_bitmask *)bm;
|
||
|
||
debug("N0.LMC%d.R%d: Rlevel Debug Bitmasks 8:0 : %05llx %05llx %05llx %05llx %05llx %05llx %05llx %05llx %05llx\n",
|
||
if_num, rank, ppbm(rlevel_bitmask[8].bm),
|
||
ppbm(rlevel_bitmask[7].bm), ppbm(rlevel_bitmask[6].bm),
|
||
ppbm(rlevel_bitmask[5].bm), ppbm(rlevel_bitmask[4].bm),
|
||
ppbm(rlevel_bitmask[3].bm), ppbm(rlevel_bitmask[2].bm),
|
||
ppbm(rlevel_bitmask[1].bm), ppbm(rlevel_bitmask[0].bm)
|
||
);
|
||
} else if (flags == WITH_RL_MASK_SCORES) {
|
||
// rlevel_bitmask array in PACKED index order, so just
|
||
// print them
|
||
struct rlevel_bitmask *rlevel_bitmask =
|
||
(struct rlevel_bitmask *)bm;
|
||
|
||
debug("N0.LMC%d.R%d: Rlevel Debug Bitmask Scores 8:0 : %5d %5d %5d %5d %5d %5d %5d %5d %5d\n",
|
||
if_num, rank, rlevel_bitmask[8].errs,
|
||
rlevel_bitmask[7].errs, rlevel_bitmask[6].errs,
|
||
rlevel_bitmask[5].errs, rlevel_bitmask[4].errs,
|
||
rlevel_bitmask[3].errs, rlevel_bitmask[2].errs,
|
||
rlevel_bitmask[1].errs, rlevel_bitmask[0].errs);
|
||
} else if (flags == WITH_RL_SEQ_SCORES) {
|
||
// rlevel_byte array in UNPACKED index order, so xlate
|
||
// and print them
|
||
struct rlevel_byte_data *rlevel_byte =
|
||
(struct rlevel_byte_data *)bm;
|
||
|
||
debug("N0.LMC%d.R%d: Rlevel Debug Non-seq Scores 8:0 : %5d %5d %5d %5d %5d %5d %5d %5d %5d\n",
|
||
if_num, rank, rlevel_byte[XPU(8, ecc)].sqerrs,
|
||
rlevel_byte[XPU(7, ecc)].sqerrs,
|
||
rlevel_byte[XPU(6, ecc)].sqerrs,
|
||
rlevel_byte[XPU(5, ecc)].sqerrs,
|
||
rlevel_byte[XPU(4, ecc)].sqerrs,
|
||
rlevel_byte[XPU(3, ecc)].sqerrs,
|
||
rlevel_byte[XPU(2, ecc)].sqerrs,
|
||
rlevel_byte[XPU(1, ecc)].sqerrs,
|
||
rlevel_byte[XPU(0, ecc)].sqerrs);
|
||
}
|
||
}
|
||
|
||
static void display_wl_bm(int if_num, int rank, int *bitmasks)
|
||
{
|
||
do_display_bm(if_num, rank, (void *)bitmasks, WITH_WL_BITMASKS, 0);
|
||
}
|
||
|
||
static void display_rl_bm(int if_num, int rank,
|
||
struct rlevel_bitmask *bitmasks, int ecc_ena)
|
||
{
|
||
do_display_bm(if_num, rank, (void *)bitmasks, WITH_RL_BITMASKS,
|
||
ecc_ena);
|
||
}
|
||
|
||
static void display_rl_bm_scores(int if_num, int rank,
|
||
struct rlevel_bitmask *bitmasks, int ecc_ena)
|
||
{
|
||
do_display_bm(if_num, rank, (void *)bitmasks, WITH_RL_MASK_SCORES,
|
||
ecc_ena);
|
||
}
|
||
|
||
static void display_rl_seq_scores(int if_num, int rank,
|
||
struct rlevel_byte_data *bytes, int ecc_ena)
|
||
{
|
||
do_display_bm(if_num, rank, (void *)bytes, WITH_RL_SEQ_SCORES, ecc_ena);
|
||
}
|
||
|
||
#define RODT_OHMS_COUNT 8
|
||
#define RTT_NOM_OHMS_COUNT 8
|
||
#define RTT_NOM_TABLE_COUNT 8
|
||
#define RTT_WR_OHMS_COUNT 8
|
||
#define DIC_OHMS_COUNT 3
|
||
#define DRIVE_STRENGTH_COUNT 15
|
||
|
||
static unsigned char ddr4_rodt_ohms[RODT_OHMS_COUNT] = {
|
||
0, 40, 60, 80, 120, 240, 34, 48 };
|
||
static unsigned char ddr4_rtt_nom_ohms[RTT_NOM_OHMS_COUNT] = {
|
||
0, 60, 120, 40, 240, 48, 80, 34 };
|
||
static unsigned char ddr4_rtt_nom_table[RTT_NOM_TABLE_COUNT] = {
|
||
0, 4, 2, 6, 1, 5, 3, 7 };
|
||
// setting HiZ ohms to 99 for computed vref
|
||
static unsigned char ddr4_rtt_wr_ohms[RTT_WR_OHMS_COUNT] = {
|
||
0, 120, 240, 99, 80 };
|
||
static unsigned char ddr4_dic_ohms[DIC_OHMS_COUNT] = { 34, 48 };
|
||
static short ddr4_drive_strength[DRIVE_STRENGTH_COUNT] = {
|
||
0, 0, 26, 30, 34, 40, 48, 68, 0, 0, 0, 0, 0, 0, 0 };
|
||
static short ddr4_dqx_strength[DRIVE_STRENGTH_COUNT] = {
|
||
0, 24, 27, 30, 34, 40, 48, 60, 0, 0, 0, 0, 0, 0, 0 };
|
||
struct impedence_values ddr4_impedence_val = {
|
||
.rodt_ohms = ddr4_rodt_ohms,
|
||
.rtt_nom_ohms = ddr4_rtt_nom_ohms,
|
||
.rtt_nom_table = ddr4_rtt_nom_table,
|
||
.rtt_wr_ohms = ddr4_rtt_wr_ohms,
|
||
.dic_ohms = ddr4_dic_ohms,
|
||
.drive_strength = ddr4_drive_strength,
|
||
.dqx_strength = ddr4_dqx_strength,
|
||
};
|
||
|
||
static unsigned char ddr3_rodt_ohms[RODT_OHMS_COUNT] = {
|
||
0, 20, 30, 40, 60, 120, 0, 0 };
|
||
static unsigned char ddr3_rtt_nom_ohms[RTT_NOM_OHMS_COUNT] = {
|
||
0, 60, 120, 40, 20, 30, 0, 0 };
|
||
static unsigned char ddr3_rtt_nom_table[RTT_NOM_TABLE_COUNT] = {
|
||
0, 2, 1, 3, 5, 4, 0, 0 };
|
||
static unsigned char ddr3_rtt_wr_ohms[RTT_WR_OHMS_COUNT] = { 0, 60, 120 };
|
||
static unsigned char ddr3_dic_ohms[DIC_OHMS_COUNT] = { 40, 34 };
|
||
static short ddr3_drive_strength[DRIVE_STRENGTH_COUNT] = {
|
||
0, 24, 27, 30, 34, 40, 48, 60, 0, 0, 0, 0, 0, 0, 0 };
|
||
static struct impedence_values ddr3_impedence_val = {
|
||
.rodt_ohms = ddr3_rodt_ohms,
|
||
.rtt_nom_ohms = ddr3_rtt_nom_ohms,
|
||
.rtt_nom_table = ddr3_rtt_nom_table,
|
||
.rtt_wr_ohms = ddr3_rtt_wr_ohms,
|
||
.dic_ohms = ddr3_dic_ohms,
|
||
.drive_strength = ddr3_drive_strength,
|
||
.dqx_strength = ddr3_drive_strength,
|
||
};
|
||
|
||
static u64 hertz_to_psecs(u64 hertz)
|
||
{
|
||
/* Clock in psecs */
|
||
return divide_nint((u64)1000 * 1000 * 1000 * 1000, hertz);
|
||
}
|
||
|
||
#define DIVIDEND_SCALE 1000 /* Scale to avoid rounding error. */
|
||
|
||
static u64 psecs_to_mts(u64 psecs)
|
||
{
|
||
return divide_nint(divide_nint((u64)(2 * 1000000 * DIVIDEND_SCALE),
|
||
psecs), DIVIDEND_SCALE);
|
||
}
|
||
|
||
#define WITHIN(v, b, m) (((v) >= ((b) - (m))) && ((v) <= ((b) + (m))))
|
||
|
||
static unsigned long pretty_psecs_to_mts(u64 psecs)
|
||
{
|
||
u64 ret = 0; // default to error
|
||
|
||
if (WITHIN(psecs, 2500, 1))
|
||
ret = 800;
|
||
else if (WITHIN(psecs, 1875, 1))
|
||
ret = 1066;
|
||
else if (WITHIN(psecs, 1500, 1))
|
||
ret = 1333;
|
||
else if (WITHIN(psecs, 1250, 1))
|
||
ret = 1600;
|
||
else if (WITHIN(psecs, 1071, 1))
|
||
ret = 1866;
|
||
else if (WITHIN(psecs, 937, 1))
|
||
ret = 2133;
|
||
else if (WITHIN(psecs, 833, 1))
|
||
ret = 2400;
|
||
else if (WITHIN(psecs, 750, 1))
|
||
ret = 2666;
|
||
return ret;
|
||
}
|
||
|
||
static u64 mts_to_hertz(u64 mts)
|
||
{
|
||
return ((mts * 1000 * 1000) / 2);
|
||
}
|
||
|
||
static int compute_rc3x(int64_t tclk_psecs)
|
||
{
|
||
long speed;
|
||
long tclk_psecs_min, tclk_psecs_max;
|
||
long data_rate_mhz, data_rate_mhz_min, data_rate_mhz_max;
|
||
int rc3x;
|
||
|
||
#define ENCODING_BASE 1240
|
||
|
||
data_rate_mhz = psecs_to_mts(tclk_psecs);
|
||
|
||
/*
|
||
* 2400 MT/s is a special case. Using integer arithmetic it rounds
|
||
* from 833 psecs to 2401 MT/s. Force it to 2400 to pick the
|
||
* proper setting from the table.
|
||
*/
|
||
if (tclk_psecs == 833)
|
||
data_rate_mhz = 2400;
|
||
|
||
for (speed = ENCODING_BASE; speed < 3200; speed += 20) {
|
||
int error = 0;
|
||
|
||
/* Clock in psecs */
|
||
tclk_psecs_min = hertz_to_psecs(mts_to_hertz(speed + 00));
|
||
/* Clock in psecs */
|
||
tclk_psecs_max = hertz_to_psecs(mts_to_hertz(speed + 18));
|
||
|
||
data_rate_mhz_min = psecs_to_mts(tclk_psecs_min);
|
||
data_rate_mhz_max = psecs_to_mts(tclk_psecs_max);
|
||
|
||
/* Force alingment to multiple to avound rounding errors. */
|
||
data_rate_mhz_min = ((data_rate_mhz_min + 18) / 20) * 20;
|
||
data_rate_mhz_max = ((data_rate_mhz_max + 18) / 20) * 20;
|
||
|
||
error += (speed + 00 != data_rate_mhz_min);
|
||
error += (speed + 20 != data_rate_mhz_max);
|
||
|
||
rc3x = (speed - ENCODING_BASE) / 20;
|
||
|
||
if (data_rate_mhz <= (speed + 20))
|
||
break;
|
||
}
|
||
|
||
return rc3x;
|
||
}
|
||
|
||
/*
|
||
* static global variables needed, so that functions (loops) can be
|
||
* restructured from the main huge function. Its not elegant, but the
|
||
* only way to break the original functions like init_octeon3_ddr3_interface()
|
||
* into separate logical smaller functions with less indentation levels.
|
||
*/
|
||
static int if_num __section(".data");
|
||
static u32 if_mask __section(".data");
|
||
static int ddr_hertz __section(".data");
|
||
|
||
static struct ddr_conf *ddr_conf __section(".data");
|
||
static const struct dimm_odt_config *odt_1rank_config __section(".data");
|
||
static const struct dimm_odt_config *odt_2rank_config __section(".data");
|
||
static const struct dimm_odt_config *odt_4rank_config __section(".data");
|
||
static struct dimm_config *dimm_config_table __section(".data");
|
||
static const struct dimm_odt_config *odt_config __section(".data");
|
||
static const struct ddr3_custom_config *c_cfg __section(".data");
|
||
|
||
static int odt_idx __section(".data");
|
||
|
||
static ulong tclk_psecs __section(".data");
|
||
static ulong eclk_psecs __section(".data");
|
||
|
||
static int row_bits __section(".data");
|
||
static int col_bits __section(".data");
|
||
static int num_banks __section(".data");
|
||
static int num_ranks __section(".data");
|
||
static int dram_width __section(".data");
|
||
static int dimm_count __section(".data");
|
||
/* Accumulate and report all the errors before giving up */
|
||
static int fatal_error __section(".data");
|
||
/* Flag that indicates safe DDR settings should be used */
|
||
static int safe_ddr_flag __section(".data");
|
||
/* Octeon II Default: 64bit interface width */
|
||
static int if_64b __section(".data");
|
||
static int if_bytemask __section(".data");
|
||
static u32 mem_size_mbytes __section(".data");
|
||
static unsigned int didx __section(".data");
|
||
static int bank_bits __section(".data");
|
||
static int bunk_enable __section(".data");
|
||
static int rank_mask __section(".data");
|
||
static int column_bits_start __section(".data");
|
||
static int row_lsb __section(".data");
|
||
static int pbank_lsb __section(".data");
|
||
static int use_ecc __section(".data");
|
||
static int mtb_psec __section(".data");
|
||
static short ftb_dividend __section(".data");
|
||
static short ftb_divisor __section(".data");
|
||
static int taamin __section(".data");
|
||
static int tckmin __section(".data");
|
||
static int cl __section(".data");
|
||
static int min_cas_latency __section(".data");
|
||
static int max_cas_latency __section(".data");
|
||
static int override_cas_latency __section(".data");
|
||
static int ddr_rtt_nom_auto __section(".data");
|
||
static int ddr_rodt_ctl_auto __section(".data");
|
||
|
||
static int spd_addr __section(".data");
|
||
static int spd_org __section(".data");
|
||
static int spd_banks __section(".data");
|
||
static int spd_rdimm __section(".data");
|
||
static int spd_dimm_type __section(".data");
|
||
static int spd_ecc __section(".data");
|
||
static u32 spd_cas_latency __section(".data");
|
||
static int spd_mtb_dividend __section(".data");
|
||
static int spd_mtb_divisor __section(".data");
|
||
static int spd_tck_min __section(".data");
|
||
static int spd_taa_min __section(".data");
|
||
static int spd_twr __section(".data");
|
||
static int spd_trcd __section(".data");
|
||
static int spd_trrd __section(".data");
|
||
static int spd_trp __section(".data");
|
||
static int spd_tras __section(".data");
|
||
static int spd_trc __section(".data");
|
||
static int spd_trfc __section(".data");
|
||
static int spd_twtr __section(".data");
|
||
static int spd_trtp __section(".data");
|
||
static int spd_tfaw __section(".data");
|
||
static int spd_addr_mirror __section(".data");
|
||
static int spd_package __section(".data");
|
||
static int spd_rawcard __section(".data");
|
||
static int spd_rawcard_aorb __section(".data");
|
||
static int spd_rdimm_registers __section(".data");
|
||
static int spd_thermal_sensor __section(".data");
|
||
|
||
static int is_stacked_die __section(".data");
|
||
static int is_3ds_dimm __section(".data");
|
||
// 3DS: logical ranks per package rank
|
||
static int lranks_per_prank __section(".data");
|
||
// 3DS: logical ranks bits
|
||
static int lranks_bits __section(".data");
|
||
// in Mbits; only used for 3DS
|
||
static int die_capacity __section(".data");
|
||
|
||
static enum ddr_type ddr_type __section(".data");
|
||
|
||
static int twr __section(".data");
|
||
static int trcd __section(".data");
|
||
static int trrd __section(".data");
|
||
static int trp __section(".data");
|
||
static int tras __section(".data");
|
||
static int trc __section(".data");
|
||
static int trfc __section(".data");
|
||
static int twtr __section(".data");
|
||
static int trtp __section(".data");
|
||
static int tfaw __section(".data");
|
||
|
||
static int ddr4_tckavgmin __section(".data");
|
||
static int ddr4_tckavgmax __section(".data");
|
||
static int ddr4_trdcmin __section(".data");
|
||
static int ddr4_trpmin __section(".data");
|
||
static int ddr4_trasmin __section(".data");
|
||
static int ddr4_trcmin __section(".data");
|
||
static int ddr4_trfc1min __section(".data");
|
||
static int ddr4_trfc2min __section(".data");
|
||
static int ddr4_trfc4min __section(".data");
|
||
static int ddr4_tfawmin __section(".data");
|
||
static int ddr4_trrd_smin __section(".data");
|
||
static int ddr4_trrd_lmin __section(".data");
|
||
static int ddr4_tccd_lmin __section(".data");
|
||
|
||
static int wl_mask_err __section(".data");
|
||
static int wl_loops __section(".data");
|
||
static int default_rtt_nom[4] __section(".data");
|
||
static int dyn_rtt_nom_mask __section(".data");
|
||
static struct impedence_values *imp_val __section(".data");
|
||
static char default_rodt_ctl __section(".data");
|
||
// default to disabled (ie, try LMC restart, not chip reset)
|
||
static int ddr_disable_chip_reset __section(".data");
|
||
static const char *dimm_type_name __section(".data");
|
||
static int match_wl_rtt_nom __section(".data");
|
||
|
||
struct hwl_alt_by_rank {
|
||
u16 hwl_alt_mask; // mask of bytelanes with alternate
|
||
u16 hwl_alt_delay[9]; // bytelane alternate avail if mask=1
|
||
};
|
||
|
||
static struct hwl_alt_by_rank hwl_alts[4] __section(".data");
|
||
|
||
#define DEFAULT_INTERNAL_VREF_TRAINING_LIMIT 3 // was: 5
|
||
static int internal_retries __section(".data");
|
||
|
||
static int deskew_training_errors __section(".data");
|
||
static struct deskew_counts deskew_training_results __section(".data");
|
||
static int disable_deskew_training __section(".data");
|
||
static int restart_if_dsk_incomplete __section(".data");
|
||
static int dac_eval_retries __section(".data");
|
||
static int dac_settings[9] __section(".data");
|
||
static int num_samples __section(".data");
|
||
static int sample __section(".data");
|
||
static int lane __section(".data");
|
||
static int last_lane __section(".data");
|
||
static int total_dac_eval_retries __section(".data");
|
||
static int dac_eval_exhausted __section(".data");
|
||
|
||
#define DEFAULT_DAC_SAMPLES 7 // originally was 5
|
||
#define DAC_RETRIES_LIMIT 2
|
||
|
||
struct bytelane_sample {
|
||
s16 bytes[DEFAULT_DAC_SAMPLES];
|
||
};
|
||
|
||
static struct bytelane_sample lanes[9] __section(".data");
|
||
|
||
static char disable_sequential_delay_check __section(".data");
|
||
static int wl_print __section(".data");
|
||
|
||
static int enable_by_rank_init __section(".data");
|
||
static int saved_rank_mask __section(".data");
|
||
static int by_rank __section(".data");
|
||
static struct deskew_data rank_dsk[4] __section(".data");
|
||
static struct dac_data rank_dac[4] __section(".data");
|
||
|
||
// todo: perhaps remove node at some time completely?
|
||
static int node __section(".data");
|
||
static int base_cl __section(".data");
|
||
|
||
/* Parameters from DDR3 Specifications */
|
||
#define DDR3_TREFI 7800000 /* 7.8 us */
|
||
#define DDR3_ZQCS 80000ull /* 80 ns */
|
||
#define DDR3_ZQCS_INTERNAL 1280000000ull /* 128ms/100 */
|
||
#define DDR3_TCKE 5000 /* 5 ns */
|
||
#define DDR3_TMRD 4 /* 4 nCK */
|
||
#define DDR3_TDLLK 512 /* 512 nCK */
|
||
#define DDR3_TMPRR 1 /* 1 nCK */
|
||
#define DDR3_TWLMRD 40 /* 40 nCK */
|
||
#define DDR3_TWLDQSEN 25 /* 25 nCK */
|
||
|
||
/* Parameters from DDR4 Specifications */
|
||
#define DDR4_TMRD 8 /* 8 nCK */
|
||
#define DDR4_TDLLK 768 /* 768 nCK */
|
||
|
||
static void lmc_config(struct ddr_priv *priv)
|
||
{
|
||
union cvmx_lmcx_config cfg;
|
||
char *s;
|
||
|
||
cfg.u64 = 0;
|
||
|
||
cfg.cn78xx.ecc_ena = use_ecc;
|
||
cfg.cn78xx.row_lsb = encode_row_lsb_ddr3(row_lsb);
|
||
cfg.cn78xx.pbank_lsb = encode_pbank_lsb_ddr3(pbank_lsb);
|
||
|
||
cfg.cn78xx.idlepower = 0; /* Disabled */
|
||
|
||
s = lookup_env(priv, "ddr_idlepower");
|
||
if (s)
|
||
cfg.cn78xx.idlepower = simple_strtoul(s, NULL, 0);
|
||
|
||
cfg.cn78xx.forcewrite = 0; /* Disabled */
|
||
/* Include memory reference address in the ECC */
|
||
cfg.cn78xx.ecc_adr = 1;
|
||
|
||
s = lookup_env(priv, "ddr_ecc_adr");
|
||
if (s)
|
||
cfg.cn78xx.ecc_adr = simple_strtoul(s, NULL, 0);
|
||
|
||
cfg.cn78xx.reset = 0;
|
||
|
||
/*
|
||
* Program LMC0_CONFIG[24:18], ref_zqcs_int(6:0) to
|
||
* RND-DN(tREFI/clkPeriod/512) Program LMC0_CONFIG[36:25],
|
||
* ref_zqcs_int(18:7) to
|
||
* RND-DN(ZQCS_Interval/clkPeriod/(512*128)). Note that this
|
||
* value should always be greater than 32, to account for
|
||
* resistor calibration delays.
|
||
*/
|
||
|
||
cfg.cn78xx.ref_zqcs_int = ((DDR3_TREFI / tclk_psecs / 512) & 0x7f);
|
||
cfg.cn78xx.ref_zqcs_int |=
|
||
((max(33ull, (DDR3_ZQCS_INTERNAL / (tclk_psecs / 100) /
|
||
(512 * 128))) & 0xfff) << 7);
|
||
|
||
cfg.cn78xx.early_dqx = 1; /* Default to enabled */
|
||
|
||
s = lookup_env(priv, "ddr_early_dqx");
|
||
if (!s)
|
||
s = lookup_env(priv, "ddr%d_early_dqx", if_num);
|
||
|
||
if (s)
|
||
cfg.cn78xx.early_dqx = simple_strtoul(s, NULL, 0);
|
||
|
||
cfg.cn78xx.sref_with_dll = 0;
|
||
|
||
cfg.cn78xx.rank_ena = bunk_enable;
|
||
cfg.cn78xx.rankmask = rank_mask; /* Set later */
|
||
cfg.cn78xx.mirrmask = (spd_addr_mirror << 1 | spd_addr_mirror << 3) &
|
||
rank_mask;
|
||
/* Set once and don't change it. */
|
||
cfg.cn78xx.init_status = rank_mask;
|
||
cfg.cn78xx.early_unload_d0_r0 = 0;
|
||
cfg.cn78xx.early_unload_d0_r1 = 0;
|
||
cfg.cn78xx.early_unload_d1_r0 = 0;
|
||
cfg.cn78xx.early_unload_d1_r1 = 0;
|
||
cfg.cn78xx.scrz = 0;
|
||
if (octeon_is_cpuid(OCTEON_CN70XX))
|
||
cfg.cn78xx.mode32b = 1; /* Read-only. Always 1. */
|
||
cfg.cn78xx.mode_x4dev = (dram_width == 4) ? 1 : 0;
|
||
cfg.cn78xx.bg2_enable = ((ddr_type == DDR4_DRAM) &&
|
||
(dram_width == 16)) ? 0 : 1;
|
||
|
||
s = lookup_env_ull(priv, "ddr_config");
|
||
if (s)
|
||
cfg.u64 = simple_strtoull(s, NULL, 0);
|
||
debug("LMC_CONFIG : 0x%016llx\n",
|
||
cfg.u64);
|
||
lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), cfg.u64);
|
||
}
|
||
|
||
static void lmc_control(struct ddr_priv *priv)
|
||
{
|
||
union cvmx_lmcx_control ctrl;
|
||
char *s;
|
||
|
||
ctrl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
|
||
ctrl.s.rdimm_ena = spd_rdimm;
|
||
ctrl.s.bwcnt = 0; /* Clear counter later */
|
||
if (spd_rdimm)
|
||
ctrl.s.ddr2t = (safe_ddr_flag ? 1 : c_cfg->ddr2t_rdimm);
|
||
else
|
||
ctrl.s.ddr2t = (safe_ddr_flag ? 1 : c_cfg->ddr2t_udimm);
|
||
ctrl.s.pocas = 0;
|
||
ctrl.s.fprch2 = (safe_ddr_flag ? 2 : c_cfg->fprch2);
|
||
ctrl.s.throttle_rd = safe_ddr_flag ? 1 : 0;
|
||
ctrl.s.throttle_wr = safe_ddr_flag ? 1 : 0;
|
||
ctrl.s.inorder_rd = safe_ddr_flag ? 1 : 0;
|
||
ctrl.s.inorder_wr = safe_ddr_flag ? 1 : 0;
|
||
ctrl.s.elev_prio_dis = safe_ddr_flag ? 1 : 0;
|
||
/* discards writes to addresses that don't exist in the DRAM */
|
||
ctrl.s.nxm_write_en = 0;
|
||
ctrl.s.max_write_batch = 8;
|
||
ctrl.s.xor_bank = 1;
|
||
ctrl.s.auto_dclkdis = 1;
|
||
ctrl.s.int_zqcs_dis = 0;
|
||
ctrl.s.ext_zqcs_dis = 0;
|
||
ctrl.s.bprch = 1;
|
||
ctrl.s.wodt_bprch = 1;
|
||
ctrl.s.rodt_bprch = 1;
|
||
|
||
s = lookup_env(priv, "ddr_xor_bank");
|
||
if (s)
|
||
ctrl.s.xor_bank = simple_strtoul(s, NULL, 0);
|
||
|
||
s = lookup_env(priv, "ddr_2t");
|
||
if (s)
|
||
ctrl.s.ddr2t = simple_strtoul(s, NULL, 0);
|
||
|
||
s = lookup_env(priv, "ddr_fprch2");
|
||
if (s)
|
||
ctrl.s.fprch2 = simple_strtoul(s, NULL, 0);
|
||
|
||
s = lookup_env(priv, "ddr_bprch");
|
||
if (s)
|
||
ctrl.s.bprch = simple_strtoul(s, NULL, 0);
|
||
|
||
s = lookup_env(priv, "ddr_wodt_bprch");
|
||
if (s)
|
||
ctrl.s.wodt_bprch = simple_strtoul(s, NULL, 0);
|
||
|
||
s = lookup_env(priv, "ddr_rodt_bprch");
|
||
if (s)
|
||
ctrl.s.rodt_bprch = simple_strtoul(s, NULL, 0);
|
||
|
||
s = lookup_env(priv, "ddr_int_zqcs_dis");
|
||
if (s)
|
||
ctrl.s.int_zqcs_dis = simple_strtoul(s, NULL, 0);
|
||
|
||
s = lookup_env(priv, "ddr_ext_zqcs_dis");
|
||
if (s)
|
||
ctrl.s.ext_zqcs_dis = simple_strtoul(s, NULL, 0);
|
||
|
||
s = lookup_env_ull(priv, "ddr_control");
|
||
if (s)
|
||
ctrl.u64 = simple_strtoull(s, NULL, 0);
|
||
|
||
debug("LMC_CONTROL : 0x%016llx\n",
|
||
ctrl.u64);
|
||
lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctrl.u64);
|
||
}
|
||
|
||
static void lmc_timing_params0(struct ddr_priv *priv)
|
||
{
|
||
union cvmx_lmcx_timing_params0 tp0;
|
||
unsigned int trp_value;
|
||
char *s;
|
||
|
||
tp0.u64 = lmc_rd(priv, CVMX_LMCX_TIMING_PARAMS0(if_num));
|
||
|
||
trp_value = divide_roundup(trp, tclk_psecs) - 1;
|
||
debug("TIMING_PARAMS0[TRP]: NEW 0x%x, OLD 0x%x\n", trp_value,
|
||
trp_value +
|
||
(unsigned int)(divide_roundup(max(4ull * tclk_psecs, 7500ull),
|
||
tclk_psecs)) - 4);
|
||
s = lookup_env_ull(priv, "ddr_use_old_trp");
|
||
if (s) {
|
||
if (!!simple_strtoull(s, NULL, 0)) {
|
||
trp_value +=
|
||
divide_roundup(max(4ull * tclk_psecs, 7500ull),
|
||
tclk_psecs) - 4;
|
||
debug("TIMING_PARAMS0[trp]: USING OLD 0x%x\n",
|
||
trp_value);
|
||
}
|
||
}
|
||
|
||
tp0.cn78xx.txpr =
|
||
divide_roundup(max(5ull * tclk_psecs, trfc + 10000ull),
|
||
16 * tclk_psecs);
|
||
tp0.cn78xx.trp = trp_value & 0x1f;
|
||
tp0.cn78xx.tcksre =
|
||
divide_roundup(max(5ull * tclk_psecs, 10000ull), tclk_psecs) - 1;
|
||
|
||
if (ddr_type == DDR4_DRAM) {
|
||
int tzqinit = 4; // Default to 4, for all DDR4 speed bins
|
||
|
||
s = lookup_env(priv, "ddr_tzqinit");
|
||
if (s)
|
||
tzqinit = simple_strtoul(s, NULL, 0);
|
||
|
||
tp0.cn78xx.tzqinit = tzqinit;
|
||
/* Always 8. */
|
||
tp0.cn78xx.tzqcs = divide_roundup(128 * tclk_psecs,
|
||
(16 * tclk_psecs));
|
||
tp0.cn78xx.tcke =
|
||
divide_roundup(max(3 * tclk_psecs, (ulong)DDR3_TCKE),
|
||
tclk_psecs) - 1;
|
||
tp0.cn78xx.tmrd =
|
||
divide_roundup((DDR4_TMRD * tclk_psecs), tclk_psecs) - 1;
|
||
tp0.cn78xx.tmod = 25; /* 25 is the max allowed */
|
||
tp0.cn78xx.tdllk = divide_roundup(DDR4_TDLLK, 256);
|
||
} else {
|
||
tp0.cn78xx.tzqinit =
|
||
divide_roundup(max(512ull * tclk_psecs, 640000ull),
|
||
(256 * tclk_psecs));
|
||
tp0.cn78xx.tzqcs =
|
||
divide_roundup(max(64ull * tclk_psecs, DDR3_ZQCS),
|
||
(16 * tclk_psecs));
|
||
tp0.cn78xx.tcke = divide_roundup(DDR3_TCKE, tclk_psecs) - 1;
|
||
tp0.cn78xx.tmrd =
|
||
divide_roundup((DDR3_TMRD * tclk_psecs), tclk_psecs) - 1;
|
||
tp0.cn78xx.tmod =
|
||
divide_roundup(max(12ull * tclk_psecs, 15000ull),
|
||
tclk_psecs) - 1;
|
||
tp0.cn78xx.tdllk = divide_roundup(DDR3_TDLLK, 256);
|
||
}
|
||
|
||
s = lookup_env_ull(priv, "ddr_timing_params0");
|
||
if (s)
|
||
tp0.u64 = simple_strtoull(s, NULL, 0);
|
||
debug("TIMING_PARAMS0 : 0x%016llx\n",
|
||
tp0.u64);
|
||
lmc_wr(priv, CVMX_LMCX_TIMING_PARAMS0(if_num), tp0.u64);
|
||
}
|
||
|
||
static void lmc_timing_params1(struct ddr_priv *priv)
|
||
{
|
||
union cvmx_lmcx_timing_params1 tp1;
|
||
unsigned int txp, temp_trcd, trfc_dlr;
|
||
char *s;
|
||
|
||
tp1.u64 = lmc_rd(priv, CVMX_LMCX_TIMING_PARAMS1(if_num));
|
||
|
||
/* .cn70xx. */
|
||
tp1.s.tmprr = divide_roundup(DDR3_TMPRR * tclk_psecs, tclk_psecs) - 1;
|
||
|
||
tp1.cn78xx.tras = divide_roundup(tras, tclk_psecs) - 1;
|
||
|
||
temp_trcd = divide_roundup(trcd, tclk_psecs);
|
||
if (temp_trcd > 15) {
|
||
debug("TIMING_PARAMS1[trcd]: need extension bit for 0x%x\n",
|
||
temp_trcd);
|
||
}
|
||
if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) && temp_trcd > 15) {
|
||
/*
|
||
* Let .trcd=0 serve as a flag that the field has
|
||
* overflowed. Must use Additive Latency mode as a
|
||
* workaround.
|
||
*/
|
||
temp_trcd = 0;
|
||
}
|
||
tp1.cn78xx.trcd = (temp_trcd >> 0) & 0xf;
|
||
tp1.cn78xx.trcd_ext = (temp_trcd >> 4) & 0x1;
|
||
|
||
tp1.cn78xx.twtr = divide_roundup(twtr, tclk_psecs) - 1;
|
||
tp1.cn78xx.trfc = divide_roundup(trfc, 8 * tclk_psecs);
|
||
|
||
if (ddr_type == DDR4_DRAM) {
|
||
/* Workaround bug 24006. Use Trrd_l. */
|
||
tp1.cn78xx.trrd =
|
||
divide_roundup(ddr4_trrd_lmin, tclk_psecs) - 2;
|
||
} else {
|
||
tp1.cn78xx.trrd = divide_roundup(trrd, tclk_psecs) - 2;
|
||
}
|
||
|
||
/*
|
||
* tXP = max( 3nCK, 7.5 ns) DDR3-800 tCLK = 2500 psec
|
||
* tXP = max( 3nCK, 7.5 ns) DDR3-1066 tCLK = 1875 psec
|
||
* tXP = max( 3nCK, 6.0 ns) DDR3-1333 tCLK = 1500 psec
|
||
* tXP = max( 3nCK, 6.0 ns) DDR3-1600 tCLK = 1250 psec
|
||
* tXP = max( 3nCK, 6.0 ns) DDR3-1866 tCLK = 1071 psec
|
||
* tXP = max( 3nCK, 6.0 ns) DDR3-2133 tCLK = 937 psec
|
||
*/
|
||
txp = (tclk_psecs < 1875) ? 6000 : 7500;
|
||
txp = divide_roundup(max((unsigned int)(3 * tclk_psecs), txp),
|
||
tclk_psecs) - 1;
|
||
if (txp > 7) {
|
||
debug("TIMING_PARAMS1[txp]: need extension bit for 0x%x\n",
|
||
txp);
|
||
}
|
||
if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) && txp > 7)
|
||
txp = 7; // max it out
|
||
tp1.cn78xx.txp = (txp >> 0) & 7;
|
||
tp1.cn78xx.txp_ext = (txp >> 3) & 1;
|
||
|
||
tp1.cn78xx.twlmrd = divide_roundup(DDR3_TWLMRD * tclk_psecs,
|
||
4 * tclk_psecs);
|
||
tp1.cn78xx.twldqsen = divide_roundup(DDR3_TWLDQSEN * tclk_psecs,
|
||
4 * tclk_psecs);
|
||
tp1.cn78xx.tfaw = divide_roundup(tfaw, 4 * tclk_psecs);
|
||
tp1.cn78xx.txpdll = divide_roundup(max(10ull * tclk_psecs, 24000ull),
|
||
tclk_psecs) - 1;
|
||
|
||
if (ddr_type == DDR4_DRAM && is_3ds_dimm) {
|
||
/*
|
||
* 4 Gb: tRFC_DLR = 90 ns
|
||
* 8 Gb: tRFC_DLR = 120 ns
|
||
* 16 Gb: tRFC_DLR = 190 ns FIXME?
|
||
*/
|
||
if (die_capacity == 0x1000) // 4 Gbit
|
||
trfc_dlr = 90;
|
||
else if (die_capacity == 0x2000) // 8 Gbit
|
||
trfc_dlr = 120;
|
||
else if (die_capacity == 0x4000) // 16 Gbit
|
||
trfc_dlr = 190;
|
||
else
|
||
trfc_dlr = 0;
|
||
|
||
if (trfc_dlr == 0) {
|
||
debug("N%d.LMC%d: ERROR: tRFC_DLR: die_capacity %u Mbit is illegal\n",
|
||
node, if_num, die_capacity);
|
||
} else {
|
||
tp1.cn78xx.trfc_dlr =
|
||
divide_roundup(trfc_dlr * 1000UL, 8 * tclk_psecs);
|
||
debug("N%d.LMC%d: TIMING_PARAMS1[trfc_dlr] set to %u\n",
|
||
node, if_num, tp1.cn78xx.trfc_dlr);
|
||
}
|
||
}
|
||
|
||
s = lookup_env_ull(priv, "ddr_timing_params1");
|
||
if (s)
|
||
tp1.u64 = simple_strtoull(s, NULL, 0);
|
||
|
||
debug("TIMING_PARAMS1 : 0x%016llx\n",
|
||
tp1.u64);
|
||
lmc_wr(priv, CVMX_LMCX_TIMING_PARAMS1(if_num), tp1.u64);
|
||
}
|
||
|
||
static void lmc_timing_params2(struct ddr_priv *priv)
|
||
{
|
||
if (ddr_type == DDR4_DRAM) {
|
||
union cvmx_lmcx_timing_params1 tp1;
|
||
union cvmx_lmcx_timing_params2 tp2;
|
||
int temp_trrd_l;
|
||
|
||
tp1.u64 = lmc_rd(priv, CVMX_LMCX_TIMING_PARAMS1(if_num));
|
||
tp2.u64 = lmc_rd(priv, CVMX_LMCX_TIMING_PARAMS2(if_num));
|
||
debug("TIMING_PARAMS2 : 0x%016llx\n",
|
||
tp2.u64);
|
||
|
||
temp_trrd_l = divide_roundup(ddr4_trrd_lmin, tclk_psecs) - 2;
|
||
if (temp_trrd_l > 7)
|
||
debug("TIMING_PARAMS2[trrd_l]: need extension bit for 0x%x\n",
|
||
temp_trrd_l);
|
||
if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) && temp_trrd_l > 7)
|
||
temp_trrd_l = 7; // max it out
|
||
tp2.cn78xx.trrd_l = (temp_trrd_l >> 0) & 7;
|
||
tp2.cn78xx.trrd_l_ext = (temp_trrd_l >> 3) & 1;
|
||
|
||
// correct for 1600-2400
|
||
tp2.s.twtr_l = divide_nint(max(4ull * tclk_psecs, 7500ull),
|
||
tclk_psecs) - 1;
|
||
tp2.s.t_rw_op_max = 7;
|
||
tp2.s.trtp = divide_roundup(max(4ull * tclk_psecs, 7500ull),
|
||
tclk_psecs) - 1;
|
||
|
||
debug("TIMING_PARAMS2 : 0x%016llx\n",
|
||
tp2.u64);
|
||
lmc_wr(priv, CVMX_LMCX_TIMING_PARAMS2(if_num), tp2.u64);
|
||
|
||
/*
|
||
* Workaround Errata 25823 - LMC: Possible DDR4 tWTR_L not met
|
||
* for Write-to-Read operations to the same Bank Group
|
||
*/
|
||
if (tp1.cn78xx.twtr < (tp2.s.twtr_l - 4)) {
|
||
tp1.cn78xx.twtr = tp2.s.twtr_l - 4;
|
||
debug("ERRATA 25823: NEW: TWTR: %d, TWTR_L: %d\n",
|
||
tp1.cn78xx.twtr, tp2.s.twtr_l);
|
||
debug("TIMING_PARAMS1 : 0x%016llx\n",
|
||
tp1.u64);
|
||
lmc_wr(priv, CVMX_LMCX_TIMING_PARAMS1(if_num), tp1.u64);
|
||
}
|
||
}
|
||
}
|
||
|
||
static void lmc_modereg_params0(struct ddr_priv *priv)
|
||
{
|
||
union cvmx_lmcx_modereg_params0 mp0;
|
||
int param;
|
||
char *s;
|
||
|
||
mp0.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num));
|
||
|
||
if (ddr_type == DDR4_DRAM) {
|
||
mp0.s.cwl = 0; /* 1600 (1250ps) */
|
||
if (tclk_psecs < 1250)
|
||
mp0.s.cwl = 1; /* 1866 (1072ps) */
|
||
if (tclk_psecs < 1072)
|
||
mp0.s.cwl = 2; /* 2133 (938ps) */
|
||
if (tclk_psecs < 938)
|
||
mp0.s.cwl = 3; /* 2400 (833ps) */
|
||
if (tclk_psecs < 833)
|
||
mp0.s.cwl = 4; /* 2666 (750ps) */
|
||
if (tclk_psecs < 750)
|
||
mp0.s.cwl = 5; /* 3200 (625ps) */
|
||
} else {
|
||
/*
|
||
** CSR CWL CAS write Latency
|
||
** === === =================================
|
||
** 0 5 ( tCK(avg) >= 2.5 ns)
|
||
** 1 6 (2.5 ns > tCK(avg) >= 1.875 ns)
|
||
** 2 7 (1.875 ns > tCK(avg) >= 1.5 ns)
|
||
** 3 8 (1.5 ns > tCK(avg) >= 1.25 ns)
|
||
** 4 9 (1.25 ns > tCK(avg) >= 1.07 ns)
|
||
** 5 10 (1.07 ns > tCK(avg) >= 0.935 ns)
|
||
** 6 11 (0.935 ns > tCK(avg) >= 0.833 ns)
|
||
** 7 12 (0.833 ns > tCK(avg) >= 0.75 ns)
|
||
*/
|
||
|
||
mp0.s.cwl = 0;
|
||
if (tclk_psecs < 2500)
|
||
mp0.s.cwl = 1;
|
||
if (tclk_psecs < 1875)
|
||
mp0.s.cwl = 2;
|
||
if (tclk_psecs < 1500)
|
||
mp0.s.cwl = 3;
|
||
if (tclk_psecs < 1250)
|
||
mp0.s.cwl = 4;
|
||
if (tclk_psecs < 1070)
|
||
mp0.s.cwl = 5;
|
||
if (tclk_psecs < 935)
|
||
mp0.s.cwl = 6;
|
||
if (tclk_psecs < 833)
|
||
mp0.s.cwl = 7;
|
||
}
|
||
|
||
s = lookup_env(priv, "ddr_cwl");
|
||
if (s)
|
||
mp0.s.cwl = simple_strtoul(s, NULL, 0) - 5;
|
||
|
||
if (ddr_type == DDR4_DRAM) {
|
||
debug("%-45s : %d, [0x%x]\n", "CAS Write Latency CWL, [CSR]",
|
||
mp0.s.cwl + 9
|
||
+ ((mp0.s.cwl > 2) ? (mp0.s.cwl - 3) * 2 : 0), mp0.s.cwl);
|
||
} else {
|
||
debug("%-45s : %d, [0x%x]\n", "CAS Write Latency CWL, [CSR]",
|
||
mp0.s.cwl + 5, mp0.s.cwl);
|
||
}
|
||
|
||
mp0.s.mprloc = 0;
|
||
mp0.s.mpr = 0;
|
||
mp0.s.dll = (ddr_type == DDR4_DRAM); /* 0 for DDR3 and 1 for DDR4 */
|
||
mp0.s.al = 0;
|
||
mp0.s.wlev = 0; /* Read Only */
|
||
if (octeon_is_cpuid(OCTEON_CN70XX) || ddr_type == DDR4_DRAM)
|
||
mp0.s.tdqs = 0;
|
||
else
|
||
mp0.s.tdqs = 1;
|
||
mp0.s.qoff = 0;
|
||
|
||
s = lookup_env(priv, "ddr_cl");
|
||
if (s) {
|
||
cl = simple_strtoul(s, NULL, 0);
|
||
debug("CAS Latency : %6d\n",
|
||
cl);
|
||
}
|
||
|
||
if (ddr_type == DDR4_DRAM) {
|
||
mp0.s.cl = 0x0;
|
||
if (cl > 9)
|
||
mp0.s.cl = 0x1;
|
||
if (cl > 10)
|
||
mp0.s.cl = 0x2;
|
||
if (cl > 11)
|
||
mp0.s.cl = 0x3;
|
||
if (cl > 12)
|
||
mp0.s.cl = 0x4;
|
||
if (cl > 13)
|
||
mp0.s.cl = 0x5;
|
||
if (cl > 14)
|
||
mp0.s.cl = 0x6;
|
||
if (cl > 15)
|
||
mp0.s.cl = 0x7;
|
||
if (cl > 16)
|
||
mp0.s.cl = 0x8;
|
||
if (cl > 18)
|
||
mp0.s.cl = 0x9;
|
||
if (cl > 20)
|
||
mp0.s.cl = 0xA;
|
||
if (cl > 24)
|
||
mp0.s.cl = 0xB;
|
||
} else {
|
||
mp0.s.cl = 0x2;
|
||
if (cl > 5)
|
||
mp0.s.cl = 0x4;
|
||
if (cl > 6)
|
||
mp0.s.cl = 0x6;
|
||
if (cl > 7)
|
||
mp0.s.cl = 0x8;
|
||
if (cl > 8)
|
||
mp0.s.cl = 0xA;
|
||
if (cl > 9)
|
||
mp0.s.cl = 0xC;
|
||
if (cl > 10)
|
||
mp0.s.cl = 0xE;
|
||
if (cl > 11)
|
||
mp0.s.cl = 0x1;
|
||
if (cl > 12)
|
||
mp0.s.cl = 0x3;
|
||
if (cl > 13)
|
||
mp0.s.cl = 0x5;
|
||
if (cl > 14)
|
||
mp0.s.cl = 0x7;
|
||
if (cl > 15)
|
||
mp0.s.cl = 0x9;
|
||
}
|
||
|
||
mp0.s.rbt = 0; /* Read Only. */
|
||
mp0.s.tm = 0;
|
||
mp0.s.dllr = 0;
|
||
|
||
param = divide_roundup(twr, tclk_psecs);
|
||
|
||
if (ddr_type == DDR4_DRAM) { /* DDR4 */
|
||
mp0.s.wrp = 1;
|
||
if (param > 12)
|
||
mp0.s.wrp = 2;
|
||
if (param > 14)
|
||
mp0.s.wrp = 3;
|
||
if (param > 16)
|
||
mp0.s.wrp = 4;
|
||
if (param > 18)
|
||
mp0.s.wrp = 5;
|
||
if (param > 20)
|
||
mp0.s.wrp = 6;
|
||
if (param > 24) /* RESERVED in DDR4 spec */
|
||
mp0.s.wrp = 7;
|
||
} else { /* DDR3 */
|
||
mp0.s.wrp = 1;
|
||
if (param > 5)
|
||
mp0.s.wrp = 2;
|
||
if (param > 6)
|
||
mp0.s.wrp = 3;
|
||
if (param > 7)
|
||
mp0.s.wrp = 4;
|
||
if (param > 8)
|
||
mp0.s.wrp = 5;
|
||
if (param > 10)
|
||
mp0.s.wrp = 6;
|
||
if (param > 12)
|
||
mp0.s.wrp = 7;
|
||
}
|
||
|
||
mp0.s.ppd = 0;
|
||
|
||
s = lookup_env(priv, "ddr_wrp");
|
||
if (s)
|
||
mp0.s.wrp = simple_strtoul(s, NULL, 0);
|
||
|
||
debug("%-45s : %d, [0x%x]\n",
|
||
"Write recovery for auto precharge WRP, [CSR]", param, mp0.s.wrp);
|
||
|
||
s = lookup_env_ull(priv, "ddr_modereg_params0");
|
||
if (s)
|
||
mp0.u64 = simple_strtoull(s, NULL, 0);
|
||
|
||
debug("MODEREG_PARAMS0 : 0x%016llx\n",
|
||
mp0.u64);
|
||
lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num), mp0.u64);
|
||
}
|
||
|
||
static void lmc_modereg_params1(struct ddr_priv *priv)
|
||
{
|
||
union cvmx_lmcx_modereg_params1 mp1;
|
||
char *s;
|
||
int i;
|
||
|
||
mp1.u64 = odt_config[odt_idx].modereg_params1.u64;
|
||
|
||
/*
|
||
* Special request: mismatched DIMM support. Slot 0: 2-Rank,
|
||
* Slot 1: 1-Rank
|
||
*/
|
||
if (rank_mask == 0x7) { /* 2-Rank, 1-Rank */
|
||
mp1.s.rtt_nom_00 = 0;
|
||
mp1.s.rtt_nom_01 = 3; /* rttnom_40ohm */
|
||
mp1.s.rtt_nom_10 = 3; /* rttnom_40ohm */
|
||
mp1.s.rtt_nom_11 = 0;
|
||
dyn_rtt_nom_mask = 0x6;
|
||
}
|
||
|
||
s = lookup_env(priv, "ddr_rtt_nom_mask");
|
||
if (s)
|
||
dyn_rtt_nom_mask = simple_strtoul(s, NULL, 0);
|
||
|
||
/*
|
||
* Save the original rtt_nom settings before sweeping through
|
||
* settings.
|
||
*/
|
||
default_rtt_nom[0] = mp1.s.rtt_nom_00;
|
||
default_rtt_nom[1] = mp1.s.rtt_nom_01;
|
||
default_rtt_nom[2] = mp1.s.rtt_nom_10;
|
||
default_rtt_nom[3] = mp1.s.rtt_nom_11;
|
||
|
||
ddr_rtt_nom_auto = c_cfg->ddr_rtt_nom_auto;
|
||
|
||
for (i = 0; i < 4; ++i) {
|
||
u64 value;
|
||
|
||
s = lookup_env(priv, "ddr_rtt_nom_%1d%1d", !!(i & 2),
|
||
!!(i & 1));
|
||
if (!s)
|
||
s = lookup_env(priv, "ddr%d_rtt_nom_%1d%1d", if_num,
|
||
!!(i & 2), !!(i & 1));
|
||
if (s) {
|
||
value = simple_strtoul(s, NULL, 0);
|
||
mp1.u64 &= ~((u64)0x7 << (i * 12 + 9));
|
||
mp1.u64 |= ((value & 0x7) << (i * 12 + 9));
|
||
default_rtt_nom[i] = value;
|
||
ddr_rtt_nom_auto = 0;
|
||
}
|
||
}
|
||
|
||
s = lookup_env(priv, "ddr_rtt_nom");
|
||
if (!s)
|
||
s = lookup_env(priv, "ddr%d_rtt_nom", if_num);
|
||
if (s) {
|
||
u64 value;
|
||
|
||
value = simple_strtoul(s, NULL, 0);
|
||
|
||
if (dyn_rtt_nom_mask & 1) {
|
||
default_rtt_nom[0] = value;
|
||
mp1.s.rtt_nom_00 = value;
|
||
}
|
||
if (dyn_rtt_nom_mask & 2) {
|
||
default_rtt_nom[1] = value;
|
||
mp1.s.rtt_nom_01 = value;
|
||
}
|
||
if (dyn_rtt_nom_mask & 4) {
|
||
default_rtt_nom[2] = value;
|
||
mp1.s.rtt_nom_10 = value;
|
||
}
|
||
if (dyn_rtt_nom_mask & 8) {
|
||
default_rtt_nom[3] = value;
|
||
mp1.s.rtt_nom_11 = value;
|
||
}
|
||
|
||
ddr_rtt_nom_auto = 0;
|
||
}
|
||
|
||
for (i = 0; i < 4; ++i) {
|
||
u64 value;
|
||
|
||
s = lookup_env(priv, "ddr_rtt_wr_%1d%1d", !!(i & 2), !!(i & 1));
|
||
if (!s)
|
||
s = lookup_env(priv, "ddr%d_rtt_wr_%1d%1d", if_num,
|
||
!!(i & 2), !!(i & 1));
|
||
if (s) {
|
||
value = simple_strtoul(s, NULL, 0);
|
||
insrt_wr(&mp1.u64, i, value);
|
||
}
|
||
}
|
||
|
||
// Make sure 78XX pass 1 has valid RTT_WR settings, because
|
||
// configuration files may be set-up for later chips, and
|
||
// 78XX pass 1 supports no RTT_WR extension bits
|
||
if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) {
|
||
for (i = 0; i < 4; ++i) {
|
||
// if 80 or undefined
|
||
if (extr_wr(mp1.u64, i) > 3) {
|
||
// FIXME? always insert 120
|
||
insrt_wr(&mp1.u64, i, 1);
|
||
debug("RTT_WR_%d%d set to 120 for CN78XX pass 1\n",
|
||
!!(i & 2), i & 1);
|
||
}
|
||
}
|
||
}
|
||
|
||
s = lookup_env(priv, "ddr_dic");
|
||
if (s) {
|
||
u64 value = simple_strtoul(s, NULL, 0);
|
||
|
||
for (i = 0; i < 4; ++i) {
|
||
mp1.u64 &= ~((u64)0x3 << (i * 12 + 7));
|
||
mp1.u64 |= ((value & 0x3) << (i * 12 + 7));
|
||
}
|
||
}
|
||
|
||
for (i = 0; i < 4; ++i) {
|
||
u64 value;
|
||
|
||
s = lookup_env(priv, "ddr_dic_%1d%1d", !!(i & 2), !!(i & 1));
|
||
if (s) {
|
||
value = simple_strtoul(s, NULL, 0);
|
||
mp1.u64 &= ~((u64)0x3 << (i * 12 + 7));
|
||
mp1.u64 |= ((value & 0x3) << (i * 12 + 7));
|
||
}
|
||
}
|
||
|
||
s = lookup_env_ull(priv, "ddr_modereg_params1");
|
||
if (s)
|
||
mp1.u64 = simple_strtoull(s, NULL, 0);
|
||
|
||
debug("RTT_NOM %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n",
|
||
imp_val->rtt_nom_ohms[mp1.s.rtt_nom_11],
|
||
imp_val->rtt_nom_ohms[mp1.s.rtt_nom_10],
|
||
imp_val->rtt_nom_ohms[mp1.s.rtt_nom_01],
|
||
imp_val->rtt_nom_ohms[mp1.s.rtt_nom_00],
|
||
mp1.s.rtt_nom_11,
|
||
mp1.s.rtt_nom_10, mp1.s.rtt_nom_01, mp1.s.rtt_nom_00);
|
||
|
||
debug("RTT_WR %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n",
|
||
imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 3)],
|
||
imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 2)],
|
||
imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 1)],
|
||
imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 0)],
|
||
extr_wr(mp1.u64, 3),
|
||
extr_wr(mp1.u64, 2), extr_wr(mp1.u64, 1), extr_wr(mp1.u64, 0));
|
||
|
||
debug("DIC %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n",
|
||
imp_val->dic_ohms[mp1.s.dic_11],
|
||
imp_val->dic_ohms[mp1.s.dic_10],
|
||
imp_val->dic_ohms[mp1.s.dic_01],
|
||
imp_val->dic_ohms[mp1.s.dic_00],
|
||
mp1.s.dic_11, mp1.s.dic_10, mp1.s.dic_01, mp1.s.dic_00);
|
||
|
||
debug("MODEREG_PARAMS1 : 0x%016llx\n",
|
||
mp1.u64);
|
||
lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS1(if_num), mp1.u64);
|
||
}
|
||
|
||
static void lmc_modereg_params2(struct ddr_priv *priv)
|
||
{
|
||
char *s;
|
||
int i;
|
||
|
||
if (ddr_type == DDR4_DRAM) {
|
||
union cvmx_lmcx_modereg_params2 mp2;
|
||
|
||
mp2.u64 = odt_config[odt_idx].modereg_params2.u64;
|
||
|
||
s = lookup_env(priv, "ddr_rtt_park");
|
||
if (s) {
|
||
u64 value = simple_strtoul(s, NULL, 0);
|
||
|
||
for (i = 0; i < 4; ++i) {
|
||
mp2.u64 &= ~((u64)0x7 << (i * 10 + 0));
|
||
mp2.u64 |= ((value & 0x7) << (i * 10 + 0));
|
||
}
|
||
}
|
||
|
||
for (i = 0; i < 4; ++i) {
|
||
u64 value;
|
||
|
||
s = lookup_env(priv, "ddr_rtt_park_%1d%1d", !!(i & 2),
|
||
!!(i & 1));
|
||
if (s) {
|
||
value = simple_strtoul(s, NULL, 0);
|
||
mp2.u64 &= ~((u64)0x7 << (i * 10 + 0));
|
||
mp2.u64 |= ((value & 0x7) << (i * 10 + 0));
|
||
}
|
||
}
|
||
|
||
s = lookup_env_ull(priv, "ddr_modereg_params2");
|
||
if (s)
|
||
mp2.u64 = simple_strtoull(s, NULL, 0);
|
||
|
||
debug("RTT_PARK %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n",
|
||
imp_val->rtt_nom_ohms[mp2.s.rtt_park_11],
|
||
imp_val->rtt_nom_ohms[mp2.s.rtt_park_10],
|
||
imp_val->rtt_nom_ohms[mp2.s.rtt_park_01],
|
||
imp_val->rtt_nom_ohms[mp2.s.rtt_park_00],
|
||
mp2.s.rtt_park_11, mp2.s.rtt_park_10, mp2.s.rtt_park_01,
|
||
mp2.s.rtt_park_00);
|
||
|
||
debug("%-45s : 0x%x,0x%x,0x%x,0x%x\n", "VREF_RANGE",
|
||
mp2.s.vref_range_11,
|
||
mp2.s.vref_range_10,
|
||
mp2.s.vref_range_01, mp2.s.vref_range_00);
|
||
|
||
debug("%-45s : 0x%x,0x%x,0x%x,0x%x\n", "VREF_VALUE",
|
||
mp2.s.vref_value_11,
|
||
mp2.s.vref_value_10,
|
||
mp2.s.vref_value_01, mp2.s.vref_value_00);
|
||
|
||
debug("MODEREG_PARAMS2 : 0x%016llx\n",
|
||
mp2.u64);
|
||
lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS2(if_num), mp2.u64);
|
||
}
|
||
}
|
||
|
||
static void lmc_modereg_params3(struct ddr_priv *priv)
|
||
{
|
||
char *s;
|
||
|
||
if (ddr_type == DDR4_DRAM) {
|
||
union cvmx_lmcx_modereg_params3 mp3;
|
||
|
||
mp3.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS3(if_num));
|
||
/* Disable as workaround to Errata 20547 */
|
||
mp3.s.rd_dbi = 0;
|
||
mp3.s.tccd_l = max(divide_roundup(ddr4_tccd_lmin, tclk_psecs),
|
||
5ull) - 4;
|
||
|
||
s = lookup_env(priv, "ddr_rd_preamble");
|
||
if (s)
|
||
mp3.s.rd_preamble = !!simple_strtoul(s, NULL, 0);
|
||
|
||
if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) {
|
||
int delay = 0;
|
||
|
||
if (lranks_per_prank == 4 && ddr_hertz >= 1000000000)
|
||
delay = 1;
|
||
|
||
mp3.s.xrank_add_tccd_l = delay;
|
||
mp3.s.xrank_add_tccd_s = delay;
|
||
}
|
||
|
||
lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS3(if_num), mp3.u64);
|
||
debug("MODEREG_PARAMS3 : 0x%016llx\n",
|
||
mp3.u64);
|
||
}
|
||
}
|
||
|
||
static void lmc_nxm(struct ddr_priv *priv)
|
||
{
|
||
union cvmx_lmcx_nxm lmc_nxm;
|
||
int num_bits = row_lsb + row_bits + lranks_bits - 26;
|
||
char *s;
|
||
|
||
lmc_nxm.u64 = lmc_rd(priv, CVMX_LMCX_NXM(if_num));
|
||
|
||
/* .cn78xx. */
|
||
if (rank_mask & 0x1)
|
||
lmc_nxm.cn78xx.mem_msb_d0_r0 = num_bits;
|
||
if (rank_mask & 0x2)
|
||
lmc_nxm.cn78xx.mem_msb_d0_r1 = num_bits;
|
||
if (rank_mask & 0x4)
|
||
lmc_nxm.cn78xx.mem_msb_d1_r0 = num_bits;
|
||
if (rank_mask & 0x8)
|
||
lmc_nxm.cn78xx.mem_msb_d1_r1 = num_bits;
|
||
|
||
/* Set the mask for non-existent ranks. */
|
||
lmc_nxm.cn78xx.cs_mask = ~rank_mask & 0xff;
|
||
|
||
s = lookup_env_ull(priv, "ddr_nxm");
|
||
if (s)
|
||
lmc_nxm.u64 = simple_strtoull(s, NULL, 0);
|
||
|
||
debug("LMC_NXM : 0x%016llx\n",
|
||
lmc_nxm.u64);
|
||
lmc_wr(priv, CVMX_LMCX_NXM(if_num), lmc_nxm.u64);
|
||
}
|
||
|
||
static void lmc_wodt_mask(struct ddr_priv *priv)
|
||
{
|
||
union cvmx_lmcx_wodt_mask wodt_mask;
|
||
char *s;
|
||
|
||
wodt_mask.u64 = odt_config[odt_idx].odt_mask;
|
||
|
||
s = lookup_env_ull(priv, "ddr_wodt_mask");
|
||
if (s)
|
||
wodt_mask.u64 = simple_strtoull(s, NULL, 0);
|
||
|
||
debug("WODT_MASK : 0x%016llx\n",
|
||
wodt_mask.u64);
|
||
lmc_wr(priv, CVMX_LMCX_WODT_MASK(if_num), wodt_mask.u64);
|
||
}
|
||
|
||
static void lmc_rodt_mask(struct ddr_priv *priv)
|
||
{
|
||
union cvmx_lmcx_rodt_mask rodt_mask;
|
||
int rankx;
|
||
char *s;
|
||
|
||
rodt_mask.u64 = odt_config[odt_idx].rodt_ctl;
|
||
|
||
s = lookup_env_ull(priv, "ddr_rodt_mask");
|
||
if (s)
|
||
rodt_mask.u64 = simple_strtoull(s, NULL, 0);
|
||
|
||
debug("%-45s : 0x%016llx\n", "RODT_MASK", rodt_mask.u64);
|
||
lmc_wr(priv, CVMX_LMCX_RODT_MASK(if_num), rodt_mask.u64);
|
||
|
||
dyn_rtt_nom_mask = 0;
|
||
for (rankx = 0; rankx < dimm_count * 4; rankx++) {
|
||
if (!(rank_mask & (1 << rankx)))
|
||
continue;
|
||
dyn_rtt_nom_mask |= ((rodt_mask.u64 >> (8 * rankx)) & 0xff);
|
||
}
|
||
if (num_ranks == 4) {
|
||
/*
|
||
* Normally ODT1 is wired to rank 1. For quad-ranked DIMMs
|
||
* ODT1 is wired to the third rank (rank 2). The mask,
|
||
* dyn_rtt_nom_mask, is used to indicate for which ranks
|
||
* to sweep RTT_NOM during read-leveling. Shift the bit
|
||
* from the ODT1 position over to the "ODT2" position so
|
||
* that the read-leveling analysis comes out right.
|
||
*/
|
||
int odt1_bit = dyn_rtt_nom_mask & 2;
|
||
|
||
dyn_rtt_nom_mask &= ~2;
|
||
dyn_rtt_nom_mask |= odt1_bit << 1;
|
||
}
|
||
debug("%-45s : 0x%02x\n", "DYN_RTT_NOM_MASK", dyn_rtt_nom_mask);
|
||
}
|
||
|
||
static void lmc_comp_ctl2(struct ddr_priv *priv)
|
||
{
|
||
union cvmx_lmcx_comp_ctl2 cc2;
|
||
char *s;
|
||
|
||
cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
|
||
|
||
cc2.cn78xx.dqx_ctl = odt_config[odt_idx].odt_ena;
|
||
/* Default 4=34.3 ohm */
|
||
cc2.cn78xx.ck_ctl = (c_cfg->ck_ctl == 0) ? 4 : c_cfg->ck_ctl;
|
||
/* Default 4=34.3 ohm */
|
||
cc2.cn78xx.cmd_ctl = (c_cfg->cmd_ctl == 0) ? 4 : c_cfg->cmd_ctl;
|
||
/* Default 4=34.3 ohm */
|
||
cc2.cn78xx.control_ctl = (c_cfg->ctl_ctl == 0) ? 4 : c_cfg->ctl_ctl;
|
||
|
||
ddr_rodt_ctl_auto = c_cfg->ddr_rodt_ctl_auto;
|
||
s = lookup_env(priv, "ddr_rodt_ctl_auto");
|
||
if (s)
|
||
ddr_rodt_ctl_auto = !!simple_strtoul(s, NULL, 0);
|
||
|
||
default_rodt_ctl = odt_config[odt_idx].qs_dic;
|
||
s = lookup_env(priv, "ddr_rodt_ctl");
|
||
if (!s)
|
||
s = lookup_env(priv, "ddr%d_rodt_ctl", if_num);
|
||
if (s) {
|
||
default_rodt_ctl = simple_strtoul(s, NULL, 0);
|
||
ddr_rodt_ctl_auto = 0;
|
||
}
|
||
|
||
cc2.cn70xx.rodt_ctl = default_rodt_ctl;
|
||
|
||
// if DDR4, force CK_CTL to 26 ohms if it is currently 34 ohms,
|
||
// and DCLK speed is 1 GHz or more...
|
||
if (ddr_type == DDR4_DRAM && cc2.s.ck_ctl == ddr4_driver_34_ohm &&
|
||
ddr_hertz >= 1000000000) {
|
||
// lowest for DDR4 is 26 ohms
|
||
cc2.s.ck_ctl = ddr4_driver_26_ohm;
|
||
debug("N%d.LMC%d: Forcing DDR4 COMP_CTL2[CK_CTL] to %d, %d ohms\n",
|
||
node, if_num, cc2.s.ck_ctl,
|
||
imp_val->drive_strength[cc2.s.ck_ctl]);
|
||
}
|
||
|
||
// if DDR4, 2DPC, UDIMM, force CONTROL_CTL and CMD_CTL to 26 ohms,
|
||
// if DCLK speed is 1 GHz or more...
|
||
if (ddr_type == DDR4_DRAM && dimm_count == 2 &&
|
||
(spd_dimm_type == 2 || spd_dimm_type == 6) &&
|
||
ddr_hertz >= 1000000000) {
|
||
// lowest for DDR4 is 26 ohms
|
||
cc2.cn78xx.control_ctl = ddr4_driver_26_ohm;
|
||
// lowest for DDR4 is 26 ohms
|
||
cc2.cn78xx.cmd_ctl = ddr4_driver_26_ohm;
|
||
debug("N%d.LMC%d: Forcing DDR4 COMP_CTL2[CONTROL_CTL,CMD_CTL] to %d, %d ohms\n",
|
||
node, if_num, ddr4_driver_26_ohm,
|
||
imp_val->drive_strength[ddr4_driver_26_ohm]);
|
||
}
|
||
|
||
s = lookup_env(priv, "ddr_ck_ctl");
|
||
if (s)
|
||
cc2.cn78xx.ck_ctl = simple_strtoul(s, NULL, 0);
|
||
|
||
s = lookup_env(priv, "ddr_cmd_ctl");
|
||
if (s)
|
||
cc2.cn78xx.cmd_ctl = simple_strtoul(s, NULL, 0);
|
||
|
||
s = lookup_env(priv, "ddr_control_ctl");
|
||
if (s)
|
||
cc2.cn70xx.control_ctl = simple_strtoul(s, NULL, 0);
|
||
|
||
s = lookup_env(priv, "ddr_dqx_ctl");
|
||
if (s)
|
||
cc2.cn78xx.dqx_ctl = simple_strtoul(s, NULL, 0);
|
||
|
||
debug("%-45s : %d, %d ohms\n", "DQX_CTL ", cc2.cn78xx.dqx_ctl,
|
||
imp_val->drive_strength[cc2.cn78xx.dqx_ctl]);
|
||
debug("%-45s : %d, %d ohms\n", "CK_CTL ", cc2.cn78xx.ck_ctl,
|
||
imp_val->drive_strength[cc2.cn78xx.ck_ctl]);
|
||
debug("%-45s : %d, %d ohms\n", "CMD_CTL ", cc2.cn78xx.cmd_ctl,
|
||
imp_val->drive_strength[cc2.cn78xx.cmd_ctl]);
|
||
debug("%-45s : %d, %d ohms\n", "CONTROL_CTL ",
|
||
cc2.cn78xx.control_ctl,
|
||
imp_val->drive_strength[cc2.cn78xx.control_ctl]);
|
||
debug("Read ODT_CTL : 0x%x (%d ohms)\n",
|
||
cc2.cn78xx.rodt_ctl, imp_val->rodt_ohms[cc2.cn78xx.rodt_ctl]);
|
||
|
||
debug("%-45s : 0x%016llx\n", "COMP_CTL2", cc2.u64);
|
||
lmc_wr(priv, CVMX_LMCX_COMP_CTL2(if_num), cc2.u64);
|
||
}
|
||
|
||
static void lmc_phy_ctl(struct ddr_priv *priv)
|
||
{
|
||
union cvmx_lmcx_phy_ctl phy_ctl;
|
||
|
||
phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
|
||
phy_ctl.s.ts_stagger = 0;
|
||
// FIXME: are there others TBD?
|
||
phy_ctl.s.dsk_dbg_overwrt_ena = 0;
|
||
|
||
if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) && lranks_per_prank > 1) {
|
||
// C0 is TEN, C1 is A17
|
||
phy_ctl.s.c0_sel = 2;
|
||
phy_ctl.s.c1_sel = 2;
|
||
debug("N%d.LMC%d: 3DS: setting PHY_CTL[cx_csel] = %d\n",
|
||
node, if_num, phy_ctl.s.c1_sel);
|
||
}
|
||
|
||
debug("PHY_CTL : 0x%016llx\n",
|
||
phy_ctl.u64);
|
||
lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
|
||
}
|
||
|
||
static void lmc_ext_config(struct ddr_priv *priv)
|
||
{
|
||
union cvmx_lmcx_ext_config ext_cfg;
|
||
char *s;
|
||
|
||
ext_cfg.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG(if_num));
|
||
ext_cfg.s.vrefint_seq_deskew = 0;
|
||
ext_cfg.s.read_ena_bprch = 1;
|
||
ext_cfg.s.read_ena_fprch = 1;
|
||
ext_cfg.s.drive_ena_fprch = 1;
|
||
ext_cfg.s.drive_ena_bprch = 1;
|
||
// make sure this is OFF for all current chips
|
||
ext_cfg.s.invert_data = 0;
|
||
|
||
s = lookup_env(priv, "ddr_read_fprch");
|
||
if (s)
|
||
ext_cfg.s.read_ena_fprch = strtoul(s, NULL, 0);
|
||
|
||
s = lookup_env(priv, "ddr_read_bprch");
|
||
if (s)
|
||
ext_cfg.s.read_ena_bprch = strtoul(s, NULL, 0);
|
||
|
||
s = lookup_env(priv, "ddr_drive_fprch");
|
||
if (s)
|
||
ext_cfg.s.drive_ena_fprch = strtoul(s, NULL, 0);
|
||
|
||
s = lookup_env(priv, "ddr_drive_bprch");
|
||
if (s)
|
||
ext_cfg.s.drive_ena_bprch = strtoul(s, NULL, 0);
|
||
|
||
if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) && lranks_per_prank > 1) {
|
||
ext_cfg.s.dimm0_cid = lranks_bits;
|
||
ext_cfg.s.dimm1_cid = lranks_bits;
|
||
debug("N%d.LMC%d: 3DS: setting EXT_CONFIG[dimmx_cid] = %d\n",
|
||
node, if_num, ext_cfg.s.dimm0_cid);
|
||
}
|
||
|
||
lmc_wr(priv, CVMX_LMCX_EXT_CONFIG(if_num), ext_cfg.u64);
|
||
debug("%-45s : 0x%016llx\n", "EXT_CONFIG", ext_cfg.u64);
|
||
}
|
||
|
||
static void lmc_ext_config2(struct ddr_priv *priv)
|
||
{
|
||
char *s;
|
||
|
||
// NOTE: all chips have this register, but not necessarily the
|
||
// fields we modify...
|
||
if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) &&
|
||
!octeon_is_cpuid(OCTEON_CN73XX)) {
|
||
union cvmx_lmcx_ext_config2 ext_cfg2;
|
||
int value = 1; // default to 1
|
||
|
||
ext_cfg2.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG2(if_num));
|
||
|
||
s = lookup_env(priv, "ddr_ext2_delay_unload");
|
||
if (s)
|
||
value = !!simple_strtoul(s, NULL, 0);
|
||
|
||
ext_cfg2.s.delay_unload_r0 = value;
|
||
ext_cfg2.s.delay_unload_r1 = value;
|
||
ext_cfg2.s.delay_unload_r2 = value;
|
||
ext_cfg2.s.delay_unload_r3 = value;
|
||
|
||
lmc_wr(priv, CVMX_LMCX_EXT_CONFIG2(if_num), ext_cfg2.u64);
|
||
debug("%-45s : 0x%016llx\n", "EXT_CONFIG2", ext_cfg2.u64);
|
||
}
|
||
}
|
||
|
||
static void lmc_dimm01_params_loop(struct ddr_priv *priv)
|
||
{
|
||
union cvmx_lmcx_dimmx_params dimm_p;
|
||
int dimmx = didx;
|
||
char *s;
|
||
int rc;
|
||
int i;
|
||
|
||
dimm_p.u64 = lmc_rd(priv, CVMX_LMCX_DIMMX_PARAMS(dimmx, if_num));
|
||
|
||
if (ddr_type == DDR4_DRAM) {
|
||
union cvmx_lmcx_dimmx_ddr4_params0 ddr4_p0;
|
||
union cvmx_lmcx_dimmx_ddr4_params1 ddr4_p1;
|
||
union cvmx_lmcx_ddr4_dimm_ctl ddr4_ctl;
|
||
|
||
dimm_p.s.rc0 = 0;
|
||
dimm_p.s.rc1 = 0;
|
||
dimm_p.s.rc2 = 0;
|
||
|
||
rc = read_spd(&dimm_config_table[didx], 0,
|
||
DDR4_SPD_RDIMM_REGISTER_DRIVE_STRENGTH_CTL);
|
||
dimm_p.s.rc3 = (rc >> 4) & 0xf;
|
||
dimm_p.s.rc4 = ((rc >> 0) & 0x3) << 2;
|
||
dimm_p.s.rc4 |= ((rc >> 2) & 0x3) << 0;
|
||
|
||
rc = read_spd(&dimm_config_table[didx], 0,
|
||
DDR4_SPD_RDIMM_REGISTER_DRIVE_STRENGTH_CK);
|
||
dimm_p.s.rc5 = ((rc >> 0) & 0x3) << 2;
|
||
dimm_p.s.rc5 |= ((rc >> 2) & 0x3) << 0;
|
||
|
||
dimm_p.s.rc6 = 0;
|
||
dimm_p.s.rc7 = 0;
|
||
dimm_p.s.rc8 = 0;
|
||
dimm_p.s.rc9 = 0;
|
||
|
||
/*
|
||
* rc10 DDR4 RDIMM Operating Speed
|
||
* === ===================================================
|
||
* 0 tclk_psecs >= 1250 psec DDR4-1600 (1250 ps)
|
||
* 1 1250 psec > tclk_psecs >= 1071 psec DDR4-1866 (1071 ps)
|
||
* 2 1071 psec > tclk_psecs >= 938 psec DDR4-2133 ( 938 ps)
|
||
* 3 938 psec > tclk_psecs >= 833 psec DDR4-2400 ( 833 ps)
|
||
* 4 833 psec > tclk_psecs >= 750 psec DDR4-2666 ( 750 ps)
|
||
* 5 750 psec > tclk_psecs >= 625 psec DDR4-3200 ( 625 ps)
|
||
*/
|
||
dimm_p.s.rc10 = 0;
|
||
if (tclk_psecs < 1250)
|
||
dimm_p.s.rc10 = 1;
|
||
if (tclk_psecs < 1071)
|
||
dimm_p.s.rc10 = 2;
|
||
if (tclk_psecs < 938)
|
||
dimm_p.s.rc10 = 3;
|
||
if (tclk_psecs < 833)
|
||
dimm_p.s.rc10 = 4;
|
||
if (tclk_psecs < 750)
|
||
dimm_p.s.rc10 = 5;
|
||
|
||
dimm_p.s.rc11 = 0;
|
||
dimm_p.s.rc12 = 0;
|
||
/* 0=LRDIMM, 1=RDIMM */
|
||
dimm_p.s.rc13 = (spd_dimm_type == 4) ? 0 : 4;
|
||
dimm_p.s.rc13 |= (ddr_type == DDR4_DRAM) ?
|
||
(spd_addr_mirror << 3) : 0;
|
||
dimm_p.s.rc14 = 0;
|
||
dimm_p.s.rc15 = 0; /* 1 nCK latency adder */
|
||
|
||
ddr4_p0.u64 = 0;
|
||
|
||
ddr4_p0.s.rc8x = 0;
|
||
ddr4_p0.s.rc7x = 0;
|
||
ddr4_p0.s.rc6x = 0;
|
||
ddr4_p0.s.rc5x = 0;
|
||
ddr4_p0.s.rc4x = 0;
|
||
|
||
ddr4_p0.s.rc3x = compute_rc3x(tclk_psecs);
|
||
|
||
ddr4_p0.s.rc2x = 0;
|
||
ddr4_p0.s.rc1x = 0;
|
||
|
||
ddr4_p1.u64 = 0;
|
||
|
||
ddr4_p1.s.rcbx = 0;
|
||
ddr4_p1.s.rcax = 0;
|
||
ddr4_p1.s.rc9x = 0;
|
||
|
||
ddr4_ctl.u64 = 0;
|
||
ddr4_ctl.cn70xx.ddr4_dimm0_wmask = 0x004;
|
||
ddr4_ctl.cn70xx.ddr4_dimm1_wmask =
|
||
(dimm_count > 1) ? 0x004 : 0x0000;
|
||
|
||
/*
|
||
* Handle any overrides from envvars here...
|
||
*/
|
||
s = lookup_env(priv, "ddr_ddr4_params0");
|
||
if (s)
|
||
ddr4_p0.u64 = simple_strtoul(s, NULL, 0);
|
||
|
||
s = lookup_env(priv, "ddr_ddr4_params1");
|
||
if (s)
|
||
ddr4_p1.u64 = simple_strtoul(s, NULL, 0);
|
||
|
||
s = lookup_env(priv, "ddr_ddr4_dimm_ctl");
|
||
if (s)
|
||
ddr4_ctl.u64 = simple_strtoul(s, NULL, 0);
|
||
|
||
for (i = 0; i < 11; ++i) {
|
||
u64 value;
|
||
|
||
s = lookup_env(priv, "ddr_ddr4_rc%1xx", i + 1);
|
||
if (s) {
|
||
value = simple_strtoul(s, NULL, 0);
|
||
if (i < 8) {
|
||
ddr4_p0.u64 &= ~((u64)0xff << (i * 8));
|
||
ddr4_p0.u64 |= (value << (i * 8));
|
||
} else {
|
||
ddr4_p1.u64 &=
|
||
~((u64)0xff << ((i - 8) * 8));
|
||
ddr4_p1.u64 |= (value << ((i - 8) * 8));
|
||
}
|
||
}
|
||
}
|
||
|
||
/*
|
||
* write the final CSR values
|
||
*/
|
||
lmc_wr(priv, CVMX_LMCX_DIMMX_DDR4_PARAMS0(dimmx, if_num),
|
||
ddr4_p0.u64);
|
||
|
||
lmc_wr(priv, CVMX_LMCX_DDR4_DIMM_CTL(if_num), ddr4_ctl.u64);
|
||
|
||
lmc_wr(priv, CVMX_LMCX_DIMMX_DDR4_PARAMS1(dimmx, if_num),
|
||
ddr4_p1.u64);
|
||
|
||
debug("DIMM%d Register Control Words RCBx:RC1x : %x %x %x %x %x %x %x %x %x %x %x\n",
|
||
dimmx, ddr4_p1.s.rcbx, ddr4_p1.s.rcax,
|
||
ddr4_p1.s.rc9x, ddr4_p0.s.rc8x,
|
||
ddr4_p0.s.rc7x, ddr4_p0.s.rc6x,
|
||
ddr4_p0.s.rc5x, ddr4_p0.s.rc4x,
|
||
ddr4_p0.s.rc3x, ddr4_p0.s.rc2x, ddr4_p0.s.rc1x);
|
||
|
||
} else {
|
||
rc = read_spd(&dimm_config_table[didx], 0, 69);
|
||
dimm_p.s.rc0 = (rc >> 0) & 0xf;
|
||
dimm_p.s.rc1 = (rc >> 4) & 0xf;
|
||
|
||
rc = read_spd(&dimm_config_table[didx], 0, 70);
|
||
dimm_p.s.rc2 = (rc >> 0) & 0xf;
|
||
dimm_p.s.rc3 = (rc >> 4) & 0xf;
|
||
|
||
rc = read_spd(&dimm_config_table[didx], 0, 71);
|
||
dimm_p.s.rc4 = (rc >> 0) & 0xf;
|
||
dimm_p.s.rc5 = (rc >> 4) & 0xf;
|
||
|
||
rc = read_spd(&dimm_config_table[didx], 0, 72);
|
||
dimm_p.s.rc6 = (rc >> 0) & 0xf;
|
||
dimm_p.s.rc7 = (rc >> 4) & 0xf;
|
||
|
||
rc = read_spd(&dimm_config_table[didx], 0, 73);
|
||
dimm_p.s.rc8 = (rc >> 0) & 0xf;
|
||
dimm_p.s.rc9 = (rc >> 4) & 0xf;
|
||
|
||
rc = read_spd(&dimm_config_table[didx], 0, 74);
|
||
dimm_p.s.rc10 = (rc >> 0) & 0xf;
|
||
dimm_p.s.rc11 = (rc >> 4) & 0xf;
|
||
|
||
rc = read_spd(&dimm_config_table[didx], 0, 75);
|
||
dimm_p.s.rc12 = (rc >> 0) & 0xf;
|
||
dimm_p.s.rc13 = (rc >> 4) & 0xf;
|
||
|
||
rc = read_spd(&dimm_config_table[didx], 0, 76);
|
||
dimm_p.s.rc14 = (rc >> 0) & 0xf;
|
||
dimm_p.s.rc15 = (rc >> 4) & 0xf;
|
||
|
||
s = ddr_getenv_debug(priv, "ddr_clk_drive");
|
||
if (s) {
|
||
if (strcmp(s, "light") == 0)
|
||
dimm_p.s.rc5 = 0x0; /* Light Drive */
|
||
if (strcmp(s, "moderate") == 0)
|
||
dimm_p.s.rc5 = 0x5; /* Moderate Drive */
|
||
if (strcmp(s, "strong") == 0)
|
||
dimm_p.s.rc5 = 0xA; /* Strong Drive */
|
||
printf("Parameter found in environment. ddr_clk_drive = %s\n",
|
||
s);
|
||
}
|
||
|
||
s = ddr_getenv_debug(priv, "ddr_cmd_drive");
|
||
if (s) {
|
||
if (strcmp(s, "light") == 0)
|
||
dimm_p.s.rc3 = 0x0; /* Light Drive */
|
||
if (strcmp(s, "moderate") == 0)
|
||
dimm_p.s.rc3 = 0x5; /* Moderate Drive */
|
||
if (strcmp(s, "strong") == 0)
|
||
dimm_p.s.rc3 = 0xA; /* Strong Drive */
|
||
printf("Parameter found in environment. ddr_cmd_drive = %s\n",
|
||
s);
|
||
}
|
||
|
||
s = ddr_getenv_debug(priv, "ddr_ctl_drive");
|
||
if (s) {
|
||
if (strcmp(s, "light") == 0)
|
||
dimm_p.s.rc4 = 0x0; /* Light Drive */
|
||
if (strcmp(s, "moderate") == 0)
|
||
dimm_p.s.rc4 = 0x5; /* Moderate Drive */
|
||
printf("Parameter found in environment. ddr_ctl_drive = %s\n",
|
||
s);
|
||
}
|
||
|
||
/*
|
||
* rc10 DDR3 RDIMM Operating Speed
|
||
* == =====================================================
|
||
* 0 tclk_psecs >= 2500 psec DDR3/DDR3L-800 def
|
||
* 1 2500 psec > tclk_psecs >= 1875 psec DDR3/DDR3L-1066
|
||
* 2 1875 psec > tclk_psecs >= 1500 psec DDR3/DDR3L-1333
|
||
* 3 1500 psec > tclk_psecs >= 1250 psec DDR3/DDR3L-1600
|
||
* 4 1250 psec > tclk_psecs >= 1071 psec DDR3-1866
|
||
*/
|
||
dimm_p.s.rc10 = 0;
|
||
if (tclk_psecs < 2500)
|
||
dimm_p.s.rc10 = 1;
|
||
if (tclk_psecs < 1875)
|
||
dimm_p.s.rc10 = 2;
|
||
if (tclk_psecs < 1500)
|
||
dimm_p.s.rc10 = 3;
|
||
if (tclk_psecs < 1250)
|
||
dimm_p.s.rc10 = 4;
|
||
}
|
||
|
||
s = lookup_env(priv, "ddr_dimmx_params", i);
|
||
if (s)
|
||
dimm_p.u64 = simple_strtoul(s, NULL, 0);
|
||
|
||
for (i = 0; i < 16; ++i) {
|
||
u64 value;
|
||
|
||
s = lookup_env(priv, "ddr_rc%d", i);
|
||
if (s) {
|
||
value = simple_strtoul(s, NULL, 0);
|
||
dimm_p.u64 &= ~((u64)0xf << (i * 4));
|
||
dimm_p.u64 |= (value << (i * 4));
|
||
}
|
||
}
|
||
|
||
lmc_wr(priv, CVMX_LMCX_DIMMX_PARAMS(dimmx, if_num), dimm_p.u64);
|
||
|
||
debug("DIMM%d Register Control Words RC15:RC0 : %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x\n",
|
||
dimmx, dimm_p.s.rc15, dimm_p.s.rc14, dimm_p.s.rc13,
|
||
dimm_p.s.rc12, dimm_p.s.rc11, dimm_p.s.rc10,
|
||
dimm_p.s.rc9, dimm_p.s.rc8, dimm_p.s.rc7,
|
||
dimm_p.s.rc6, dimm_p.s.rc5, dimm_p.s.rc4,
|
||
dimm_p.s.rc3, dimm_p.s.rc2, dimm_p.s.rc1, dimm_p.s.rc0);
|
||
|
||
// FIXME: recognize a DDR3 RDIMM with 4 ranks and 2 registers,
|
||
// and treat it specially
|
||
if (ddr_type == DDR3_DRAM && num_ranks == 4 &&
|
||
spd_rdimm_registers == 2 && dimmx == 0) {
|
||
debug("DDR3: Copying DIMM0_PARAMS to DIMM1_PARAMS for pseudo-DIMM #1...\n");
|
||
lmc_wr(priv, CVMX_LMCX_DIMMX_PARAMS(1, if_num), dimm_p.u64);
|
||
}
|
||
}
|
||
|
||
static void lmc_dimm01_params(struct ddr_priv *priv)
|
||
{
|
||
union cvmx_lmcx_dimm_ctl dimm_ctl;
|
||
char *s;
|
||
|
||
if (spd_rdimm) {
|
||
for (didx = 0; didx < (unsigned int)dimm_count; ++didx)
|
||
lmc_dimm01_params_loop(priv);
|
||
|
||
if (ddr_type == DDR4_DRAM) {
|
||
/* LMC0_DIMM_CTL */
|
||
dimm_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DIMM_CTL(if_num));
|
||
dimm_ctl.s.dimm0_wmask = 0xdf3f;
|
||
dimm_ctl.s.dimm1_wmask =
|
||
(dimm_count > 1) ? 0xdf3f : 0x0000;
|
||
dimm_ctl.s.tcws = 0x4e0;
|
||
dimm_ctl.s.parity = c_cfg->parity;
|
||
|
||
s = lookup_env(priv, "ddr_dimm0_wmask");
|
||
if (s) {
|
||
dimm_ctl.s.dimm0_wmask =
|
||
simple_strtoul(s, NULL, 0);
|
||
}
|
||
|
||
s = lookup_env(priv, "ddr_dimm1_wmask");
|
||
if (s) {
|
||
dimm_ctl.s.dimm1_wmask =
|
||
simple_strtoul(s, NULL, 0);
|
||
}
|
||
|
||
s = lookup_env(priv, "ddr_dimm_ctl_parity");
|
||
if (s)
|
||
dimm_ctl.s.parity = simple_strtoul(s, NULL, 0);
|
||
|
||
s = lookup_env(priv, "ddr_dimm_ctl_tcws");
|
||
if (s)
|
||
dimm_ctl.s.tcws = simple_strtoul(s, NULL, 0);
|
||
|
||
debug("LMC DIMM_CTL : 0x%016llx\n",
|
||
dimm_ctl.u64);
|
||
lmc_wr(priv, CVMX_LMCX_DIMM_CTL(if_num), dimm_ctl.u64);
|
||
|
||
/* Init RCW */
|
||
oct3_ddr3_seq(priv, rank_mask, if_num, 0x7);
|
||
|
||
/* Write RC0D last */
|
||
dimm_ctl.s.dimm0_wmask = 0x2000;
|
||
dimm_ctl.s.dimm1_wmask = (dimm_count > 1) ?
|
||
0x2000 : 0x0000;
|
||
debug("LMC DIMM_CTL : 0x%016llx\n",
|
||
dimm_ctl.u64);
|
||
lmc_wr(priv, CVMX_LMCX_DIMM_CTL(if_num), dimm_ctl.u64);
|
||
|
||
/*
|
||
* Don't write any extended registers the second time
|
||
*/
|
||
lmc_wr(priv, CVMX_LMCX_DDR4_DIMM_CTL(if_num), 0);
|
||
|
||
/* Init RCW */
|
||
oct3_ddr3_seq(priv, rank_mask, if_num, 0x7);
|
||
} else {
|
||
/* LMC0_DIMM_CTL */
|
||
dimm_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DIMM_CTL(if_num));
|
||
dimm_ctl.s.dimm0_wmask = 0xffff;
|
||
// FIXME: recognize a DDR3 RDIMM with 4 ranks and 2
|
||
// registers, and treat it specially
|
||
if (num_ranks == 4 && spd_rdimm_registers == 2) {
|
||
debug("DDR3: Activating DIMM_CTL[dimm1_mask] bits...\n");
|
||
dimm_ctl.s.dimm1_wmask = 0xffff;
|
||
} else {
|
||
dimm_ctl.s.dimm1_wmask =
|
||
(dimm_count > 1) ? 0xffff : 0x0000;
|
||
}
|
||
dimm_ctl.s.tcws = 0x4e0;
|
||
dimm_ctl.s.parity = c_cfg->parity;
|
||
|
||
s = lookup_env(priv, "ddr_dimm0_wmask");
|
||
if (s) {
|
||
dimm_ctl.s.dimm0_wmask =
|
||
simple_strtoul(s, NULL, 0);
|
||
}
|
||
|
||
s = lookup_env(priv, "ddr_dimm1_wmask");
|
||
if (s) {
|
||
dimm_ctl.s.dimm1_wmask =
|
||
simple_strtoul(s, NULL, 0);
|
||
}
|
||
|
||
s = lookup_env(priv, "ddr_dimm_ctl_parity");
|
||
if (s)
|
||
dimm_ctl.s.parity = simple_strtoul(s, NULL, 0);
|
||
|
||
s = lookup_env(priv, "ddr_dimm_ctl_tcws");
|
||
if (s)
|
||
dimm_ctl.s.tcws = simple_strtoul(s, NULL, 0);
|
||
|
||
debug("LMC DIMM_CTL : 0x%016llx\n",
|
||
dimm_ctl.u64);
|
||
lmc_wr(priv, CVMX_LMCX_DIMM_CTL(if_num), dimm_ctl.u64);
|
||
|
||
/* Init RCW */
|
||
oct3_ddr3_seq(priv, rank_mask, if_num, 0x7);
|
||
}
|
||
|
||
} else {
|
||
/* Disable register control writes for unbuffered */
|
||
union cvmx_lmcx_dimm_ctl dimm_ctl;
|
||
|
||
dimm_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DIMM_CTL(if_num));
|
||
dimm_ctl.s.dimm0_wmask = 0;
|
||
dimm_ctl.s.dimm1_wmask = 0;
|
||
lmc_wr(priv, CVMX_LMCX_DIMM_CTL(if_num), dimm_ctl.u64);
|
||
}
|
||
}
|
||
|
||
static int lmc_rank_init(struct ddr_priv *priv)
|
||
{
|
||
char *s;
|
||
|
||
if (enable_by_rank_init) {
|
||
by_rank = 3;
|
||
saved_rank_mask = rank_mask;
|
||
}
|
||
|
||
start_by_rank_init:
|
||
|
||
if (enable_by_rank_init) {
|
||
rank_mask = (1 << by_rank);
|
||
if (!(rank_mask & saved_rank_mask))
|
||
goto end_by_rank_init;
|
||
if (by_rank == 0)
|
||
rank_mask = saved_rank_mask;
|
||
|
||
debug("\n>>>>> BY_RANK: starting rank %d with mask 0x%02x\n\n",
|
||
by_rank, rank_mask);
|
||
}
|
||
|
||
/*
|
||
* Comments (steps 3 through 5) continue in oct3_ddr3_seq()
|
||
*/
|
||
union cvmx_lmcx_modereg_params0 mp0;
|
||
|
||
if (ddr_memory_preserved(priv)) {
|
||
/*
|
||
* Contents are being preserved. Take DRAM out of self-refresh
|
||
* first. Then init steps can procede normally
|
||
*/
|
||
/* self-refresh exit */
|
||
oct3_ddr3_seq(priv, rank_mask, if_num, 3);
|
||
}
|
||
|
||
mp0.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num));
|
||
mp0.s.dllr = 1; /* Set during first init sequence */
|
||
lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num), mp0.u64);
|
||
|
||
ddr_init_seq(priv, rank_mask, if_num);
|
||
|
||
mp0.s.dllr = 0; /* Clear for normal operation */
|
||
lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num), mp0.u64);
|
||
|
||
if (spd_rdimm && ddr_type == DDR4_DRAM &&
|
||
octeon_is_cpuid(OCTEON_CN7XXX)) {
|
||
debug("Running init sequence 1\n");
|
||
change_rdimm_mpr_pattern(priv, rank_mask, if_num, dimm_count);
|
||
}
|
||
|
||
memset(lanes, 0, sizeof(lanes));
|
||
for (lane = 0; lane < last_lane; lane++) {
|
||
// init all lanes to reset value
|
||
dac_settings[lane] = 127;
|
||
}
|
||
|
||
// FIXME: disable internal VREF if deskew is disabled?
|
||
if (disable_deskew_training) {
|
||
debug("N%d.LMC%d: internal VREF Training disabled, leaving them in RESET.\n",
|
||
node, if_num);
|
||
num_samples = 0;
|
||
} else if (ddr_type == DDR4_DRAM &&
|
||
!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) {
|
||
num_samples = DEFAULT_DAC_SAMPLES;
|
||
} else {
|
||
// if DDR3 or no ability to write DAC values
|
||
num_samples = 1;
|
||
}
|
||
|
||
perform_internal_vref_training:
|
||
|
||
total_dac_eval_retries = 0;
|
||
dac_eval_exhausted = 0;
|
||
|
||
for (sample = 0; sample < num_samples; sample++) {
|
||
dac_eval_retries = 0;
|
||
|
||
// make offset and internal vref training repeatable
|
||
do {
|
||
/*
|
||
* 6.9.8 LMC Offset Training
|
||
* LMC requires input-receiver offset training.
|
||
*/
|
||
perform_offset_training(priv, rank_mask, if_num);
|
||
|
||
/*
|
||
* 6.9.9 LMC Internal vref Training
|
||
* LMC requires input-reference-voltage training.
|
||
*/
|
||
perform_internal_vref_training(priv, rank_mask, if_num);
|
||
|
||
// read and maybe display the DAC values for a sample
|
||
read_dac_dbi_settings(priv, if_num, /*DAC*/ 1,
|
||
dac_settings);
|
||
if (num_samples == 1 || ddr_verbose(priv)) {
|
||
display_dac_dbi_settings(if_num, /*DAC*/ 1,
|
||
use_ecc, dac_settings,
|
||
"Internal VREF");
|
||
}
|
||
|
||
// for DDR4, evaluate the DAC settings and retry
|
||
// if any issues
|
||
if (ddr_type == DDR4_DRAM) {
|
||
if (evaluate_dac_settings
|
||
(if_64b, use_ecc, dac_settings)) {
|
||
dac_eval_retries += 1;
|
||
if (dac_eval_retries >
|
||
DAC_RETRIES_LIMIT) {
|
||
debug("N%d.LMC%d: DDR4 internal VREF DAC settings: retries exhausted; continuing...\n",
|
||
node, if_num);
|
||
dac_eval_exhausted += 1;
|
||
} else {
|
||
debug("N%d.LMC%d: DDR4 internal VREF DAC settings inconsistent; retrying....\n",
|
||
node, if_num);
|
||
total_dac_eval_retries += 1;
|
||
// try another sample
|
||
continue;
|
||
}
|
||
}
|
||
|
||
// taking multiple samples, otherwise do nothing
|
||
if (num_samples > 1) {
|
||
// good sample or exhausted retries,
|
||
// record it
|
||
for (lane = 0; lane < last_lane;
|
||
lane++) {
|
||
lanes[lane].bytes[sample] =
|
||
dac_settings[lane];
|
||
}
|
||
}
|
||
}
|
||
// done if DDR3, or good sample, or exhausted retries
|
||
break;
|
||
} while (1);
|
||
}
|
||
|
||
if (ddr_type == DDR4_DRAM && dac_eval_exhausted > 0) {
|
||
debug("N%d.LMC%d: DDR internal VREF DAC settings: total retries %d, exhausted %d\n",
|
||
node, if_num, total_dac_eval_retries, dac_eval_exhausted);
|
||
}
|
||
|
||
if (num_samples > 1) {
|
||
debug("N%d.LMC%d: DDR4 internal VREF DAC settings: processing multiple samples...\n",
|
||
node, if_num);
|
||
|
||
for (lane = 0; lane < last_lane; lane++) {
|
||
dac_settings[lane] =
|
||
process_samples_average(&lanes[lane].bytes[0],
|
||
num_samples, if_num, lane);
|
||
}
|
||
display_dac_dbi_settings(if_num, /*DAC*/ 1, use_ecc,
|
||
dac_settings, "Averaged VREF");
|
||
|
||
// finally, write the final DAC values
|
||
for (lane = 0; lane < last_lane; lane++) {
|
||
load_dac_override(priv, if_num, dac_settings[lane],
|
||
lane);
|
||
}
|
||
}
|
||
|
||
// allow override of any byte-lane internal VREF
|
||
int overrode_vref_dac = 0;
|
||
|
||
for (lane = 0; lane < last_lane; lane++) {
|
||
s = lookup_env(priv, "ddr%d_vref_dac_byte%d", if_num, lane);
|
||
if (s) {
|
||
dac_settings[lane] = simple_strtoul(s, NULL, 0);
|
||
overrode_vref_dac = 1;
|
||
// finally, write the new DAC value
|
||
load_dac_override(priv, if_num, dac_settings[lane],
|
||
lane);
|
||
}
|
||
}
|
||
if (overrode_vref_dac) {
|
||
display_dac_dbi_settings(if_num, /*DAC*/ 1, use_ecc,
|
||
dac_settings, "Override VREF");
|
||
}
|
||
|
||
// as a second step, after internal VREF training, before starting
|
||
// deskew training:
|
||
// for DDR3 and OCTEON3 not O78 pass 1.x, override the DAC setting
|
||
// to 127
|
||
if (ddr_type == DDR3_DRAM && !octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) &&
|
||
!disable_deskew_training) {
|
||
load_dac_override(priv, if_num, 127, /* all */ 0x0A);
|
||
debug("N%d.LMC%d: Overriding DDR3 internal VREF DAC settings to 127.\n",
|
||
node, if_num);
|
||
}
|
||
|
||
/*
|
||
* 4.8.8 LMC Deskew Training
|
||
*
|
||
* LMC requires input-read-data deskew training.
|
||
*/
|
||
if (!disable_deskew_training) {
|
||
deskew_training_errors =
|
||
perform_deskew_training(priv, rank_mask, if_num,
|
||
spd_rawcard_aorb);
|
||
|
||
// All the Deskew lock and saturation retries (may) have
|
||
// been done, but we ended up with nibble errors; so,
|
||
// as a last ditch effort, try the Internal vref
|
||
// Training again...
|
||
if (deskew_training_errors) {
|
||
if (internal_retries <
|
||
DEFAULT_INTERNAL_VREF_TRAINING_LIMIT) {
|
||
internal_retries++;
|
||
debug("N%d.LMC%d: Deskew training results still unsettled - retrying internal vref training (%d)\n",
|
||
node, if_num, internal_retries);
|
||
goto perform_internal_vref_training;
|
||
} else {
|
||
if (restart_if_dsk_incomplete) {
|
||
debug("N%d.LMC%d: INFO: Deskew training incomplete - %d retries exhausted, Restarting LMC init...\n",
|
||
node, if_num, internal_retries);
|
||
return -EAGAIN;
|
||
}
|
||
debug("N%d.LMC%d: Deskew training incomplete - %d retries exhausted, but continuing...\n",
|
||
node, if_num, internal_retries);
|
||
}
|
||
} /* if (deskew_training_errors) */
|
||
|
||
// FIXME: treat this as the final DSK print from now on,
|
||
// and print if VBL_NORM or above also, save the results
|
||
// of the original training in case we want them later
|
||
validate_deskew_training(priv, rank_mask, if_num,
|
||
&deskew_training_results, 1);
|
||
} else { /* if (! disable_deskew_training) */
|
||
debug("N%d.LMC%d: Deskew Training disabled, printing settings before HWL.\n",
|
||
node, if_num);
|
||
validate_deskew_training(priv, rank_mask, if_num,
|
||
&deskew_training_results, 1);
|
||
} /* if (! disable_deskew_training) */
|
||
|
||
if (enable_by_rank_init) {
|
||
read_dac_dbi_settings(priv, if_num, /*dac */ 1,
|
||
&rank_dac[by_rank].bytes[0]);
|
||
get_deskew_settings(priv, if_num, &rank_dsk[by_rank]);
|
||
debug("\n>>>>> BY_RANK: ending rank %d\n\n", by_rank);
|
||
}
|
||
|
||
end_by_rank_init:
|
||
|
||
if (enable_by_rank_init) {
|
||
//debug("\n>>>>> BY_RANK: ending rank %d\n\n", by_rank);
|
||
|
||
by_rank--;
|
||
if (by_rank >= 0)
|
||
goto start_by_rank_init;
|
||
|
||
rank_mask = saved_rank_mask;
|
||
ddr_init_seq(priv, rank_mask, if_num);
|
||
|
||
process_by_rank_dac(priv, if_num, rank_mask, rank_dac);
|
||
process_by_rank_dsk(priv, if_num, rank_mask, rank_dsk);
|
||
|
||
// FIXME: set this to prevent later checking!!!
|
||
disable_deskew_training = 1;
|
||
|
||
debug("\n>>>>> BY_RANK: FINISHED!!\n\n");
|
||
}
|
||
|
||
return 0;
|
||
}
|
||
|
||
static void lmc_config_2(struct ddr_priv *priv)
|
||
{
|
||
union cvmx_lmcx_config lmc_config;
|
||
int save_ref_zqcs_int;
|
||
u64 temp_delay_usecs;
|
||
|
||
lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
|
||
|
||
/*
|
||
* Temporarily select the minimum ZQCS interval and wait
|
||
* long enough for a few ZQCS calibrations to occur. This
|
||
* should ensure that the calibration circuitry is
|
||
* stabilized before read/write leveling occurs.
|
||
*/
|
||
if (octeon_is_cpuid(OCTEON_CN7XXX)) {
|
||
save_ref_zqcs_int = lmc_config.cn78xx.ref_zqcs_int;
|
||
/* set smallest interval */
|
||
lmc_config.cn78xx.ref_zqcs_int = 1 | (32 << 7);
|
||
} else {
|
||
save_ref_zqcs_int = lmc_config.cn63xx.ref_zqcs_int;
|
||
/* set smallest interval */
|
||
lmc_config.cn63xx.ref_zqcs_int = 1 | (32 << 7);
|
||
}
|
||
lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), lmc_config.u64);
|
||
lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
|
||
|
||
/*
|
||
* Compute an appropriate delay based on the current ZQCS
|
||
* interval. The delay should be long enough for the
|
||
* current ZQCS delay counter to expire plus ten of the
|
||
* minimum intarvals to ensure that some calibrations
|
||
* occur.
|
||
*/
|
||
temp_delay_usecs = (((u64)save_ref_zqcs_int >> 7) * tclk_psecs *
|
||
100 * 512 * 128) / (10000 * 10000) + 10 *
|
||
((u64)32 * tclk_psecs * 100 * 512 * 128) / (10000 * 10000);
|
||
|
||
debug("Waiting %lld usecs for ZQCS calibrations to start\n",
|
||
temp_delay_usecs);
|
||
udelay(temp_delay_usecs);
|
||
|
||
if (octeon_is_cpuid(OCTEON_CN7XXX)) {
|
||
/* Restore computed interval */
|
||
lmc_config.cn78xx.ref_zqcs_int = save_ref_zqcs_int;
|
||
} else {
|
||
/* Restore computed interval */
|
||
lmc_config.cn63xx.ref_zqcs_int = save_ref_zqcs_int;
|
||
}
|
||
|
||
lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), lmc_config.u64);
|
||
lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
|
||
}
|
||
|
||
static union cvmx_lmcx_wlevel_ctl wl_ctl __section(".data");
|
||
static union cvmx_lmcx_wlevel_rankx wl_rank __section(".data");
|
||
static union cvmx_lmcx_modereg_params1 mp1 __section(".data");
|
||
|
||
static int wl_mask[9] __section(".data");
|
||
static int byte_idx __section(".data");
|
||
static int ecc_ena __section(".data");
|
||
static int wl_roundup __section(".data");
|
||
static int save_mode32b __section(".data");
|
||
static int disable_hwl_validity __section(".data");
|
||
static int default_wl_rtt_nom __section(".data");
|
||
static int wl_pbm_pump __section(".data");
|
||
|
||
static void lmc_write_leveling_loop(struct ddr_priv *priv, int rankx)
|
||
{
|
||
int wloop = 0;
|
||
// retries per sample for HW-related issues with bitmasks or values
|
||
int wloop_retries = 0;
|
||
int wloop_retries_total = 0;
|
||
int wloop_retries_exhausted = 0;
|
||
#define WLOOP_RETRIES_DEFAULT 5
|
||
int wl_val_err;
|
||
int wl_mask_err_rank = 0;
|
||
int wl_val_err_rank = 0;
|
||
// array to collect counts of byte-lane values
|
||
// assume low-order 3 bits and even, so really only 2-bit values
|
||
struct wlevel_bitcnt wl_bytes[9], wl_bytes_extra[9];
|
||
int extra_bumps, extra_mask;
|
||
int rank_nom = 0;
|
||
|
||
if (!(rank_mask & (1 << rankx)))
|
||
return;
|
||
|
||
if (match_wl_rtt_nom) {
|
||
if (rankx == 0)
|
||
rank_nom = mp1.s.rtt_nom_00;
|
||
if (rankx == 1)
|
||
rank_nom = mp1.s.rtt_nom_01;
|
||
if (rankx == 2)
|
||
rank_nom = mp1.s.rtt_nom_10;
|
||
if (rankx == 3)
|
||
rank_nom = mp1.s.rtt_nom_11;
|
||
|
||
debug("N%d.LMC%d.R%d: Setting WLEVEL_CTL[rtt_nom] to %d (%d)\n",
|
||
node, if_num, rankx, rank_nom,
|
||
imp_val->rtt_nom_ohms[rank_nom]);
|
||
}
|
||
|
||
memset(wl_bytes, 0, sizeof(wl_bytes));
|
||
memset(wl_bytes_extra, 0, sizeof(wl_bytes_extra));
|
||
|
||
// restructure the looping so we can keep trying until we get the
|
||
// samples we want
|
||
while (wloop < wl_loops) {
|
||
wl_ctl.u64 = lmc_rd(priv, CVMX_LMCX_WLEVEL_CTL(if_num));
|
||
|
||
wl_ctl.cn78xx.rtt_nom =
|
||
(default_wl_rtt_nom > 0) ? (default_wl_rtt_nom - 1) : 7;
|
||
|
||
if (match_wl_rtt_nom) {
|
||
wl_ctl.cn78xx.rtt_nom =
|
||
(rank_nom > 0) ? (rank_nom - 1) : 7;
|
||
}
|
||
|
||
/* Clear write-level delays */
|
||
lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num), 0);
|
||
|
||
wl_mask_err = 0; /* Reset error counters */
|
||
wl_val_err = 0;
|
||
|
||
for (byte_idx = 0; byte_idx < 9; ++byte_idx)
|
||
wl_mask[byte_idx] = 0; /* Reset bitmasks */
|
||
|
||
// do all the byte-lanes at the same time
|
||
wl_ctl.cn78xx.lanemask = 0x1ff;
|
||
|
||
lmc_wr(priv, CVMX_LMCX_WLEVEL_CTL(if_num), wl_ctl.u64);
|
||
|
||
/*
|
||
* Read and write values back in order to update the
|
||
* status field. This insures that we read the updated
|
||
* values after write-leveling has completed.
|
||
*/
|
||
lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num),
|
||
lmc_rd(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num)));
|
||
|
||
/* write-leveling */
|
||
oct3_ddr3_seq(priv, 1 << rankx, if_num, 6);
|
||
|
||
do {
|
||
wl_rank.u64 = lmc_rd(priv,
|
||
CVMX_LMCX_WLEVEL_RANKX(rankx,
|
||
if_num));
|
||
} while (wl_rank.cn78xx.status != 3);
|
||
|
||
wl_rank.u64 = lmc_rd(priv, CVMX_LMCX_WLEVEL_RANKX(rankx,
|
||
if_num));
|
||
|
||
for (byte_idx = 0; byte_idx < (8 + ecc_ena); ++byte_idx) {
|
||
wl_mask[byte_idx] = lmc_ddr3_wl_dbg_read(priv,
|
||
if_num,
|
||
byte_idx);
|
||
if (wl_mask[byte_idx] == 0)
|
||
++wl_mask_err;
|
||
}
|
||
|
||
// check validity only if no bitmask errors
|
||
if (wl_mask_err == 0) {
|
||
if ((spd_dimm_type == 1 || spd_dimm_type == 2) &&
|
||
dram_width != 16 && if_64b &&
|
||
!disable_hwl_validity) {
|
||
// bypass if [mini|SO]-[RU]DIMM or x16 or
|
||
// 32-bit
|
||
wl_val_err =
|
||
validate_hw_wl_settings(if_num,
|
||
&wl_rank,
|
||
spd_rdimm, ecc_ena);
|
||
wl_val_err_rank += (wl_val_err != 0);
|
||
}
|
||
} else {
|
||
wl_mask_err_rank++;
|
||
}
|
||
|
||
// before we print, if we had bitmask or validity errors,
|
||
// do a retry...
|
||
if (wl_mask_err != 0 || wl_val_err != 0) {
|
||
if (wloop_retries < WLOOP_RETRIES_DEFAULT) {
|
||
wloop_retries++;
|
||
wloop_retries_total++;
|
||
// this printout is per-retry: only when VBL
|
||
// is high enough (DEV?)
|
||
// FIXME: do we want to show the bad bitmaps
|
||
// or delays here also?
|
||
debug("N%d.LMC%d.R%d: H/W Write-Leveling had %s errors - retrying...\n",
|
||
node, if_num, rankx,
|
||
(wl_mask_err) ? "Bitmask" : "Validity");
|
||
// this takes us back to the top without
|
||
// counting a sample
|
||
return;
|
||
}
|
||
|
||
// retries exhausted, do not print at normal VBL
|
||
debug("N%d.LMC%d.R%d: H/W Write-Leveling issues: %s errors\n",
|
||
node, if_num, rankx,
|
||
(wl_mask_err) ? "Bitmask" : "Validity");
|
||
wloop_retries_exhausted++;
|
||
}
|
||
// no errors or exhausted retries, use this sample
|
||
wloop_retries = 0; //reset for next sample
|
||
|
||
// when only 1 sample or forced, print the bitmasks then
|
||
// current HW WL
|
||
if (wl_loops == 1 || wl_print) {
|
||
if (wl_print > 1)
|
||
display_wl_bm(if_num, rankx, wl_mask);
|
||
display_wl(if_num, wl_rank, rankx);
|
||
}
|
||
|
||
if (wl_roundup) { /* Round up odd bitmask delays */
|
||
for (byte_idx = 0; byte_idx < (8 + ecc_ena);
|
||
++byte_idx) {
|
||
if (!(if_bytemask & (1 << byte_idx)))
|
||
return;
|
||
upd_wl_rank(&wl_rank, byte_idx,
|
||
roundup_ddr3_wlevel_bitmask
|
||
(wl_mask[byte_idx]));
|
||
}
|
||
lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num),
|
||
wl_rank.u64);
|
||
display_wl(if_num, wl_rank, rankx);
|
||
}
|
||
|
||
// OK, we have a decent sample, no bitmask or validity errors
|
||
extra_bumps = 0;
|
||
extra_mask = 0;
|
||
for (byte_idx = 0; byte_idx < (8 + ecc_ena); ++byte_idx) {
|
||
int ix;
|
||
|
||
if (!(if_bytemask & (1 << byte_idx)))
|
||
return;
|
||
|
||
// increment count of byte-lane value
|
||
// only 4 values
|
||
ix = (get_wl_rank(&wl_rank, byte_idx) >> 1) & 3;
|
||
wl_bytes[byte_idx].bitcnt[ix]++;
|
||
wl_bytes_extra[byte_idx].bitcnt[ix]++;
|
||
// if perfect...
|
||
if (__builtin_popcount(wl_mask[byte_idx]) == 4) {
|
||
wl_bytes_extra[byte_idx].bitcnt[ix] +=
|
||
wl_pbm_pump;
|
||
extra_bumps++;
|
||
extra_mask |= 1 << byte_idx;
|
||
}
|
||
}
|
||
|
||
if (extra_bumps) {
|
||
if (wl_print > 1) {
|
||
debug("N%d.LMC%d.R%d: HWL sample had %d bumps (0x%02x).\n",
|
||
node, if_num, rankx, extra_bumps,
|
||
extra_mask);
|
||
}
|
||
}
|
||
|
||
// if we get here, we have taken a decent sample
|
||
wloop++;
|
||
|
||
} /* while (wloop < wl_loops) */
|
||
|
||
// if we did sample more than once, try to pick a majority vote
|
||
if (wl_loops > 1) {
|
||
// look for the majority in each byte-lane
|
||
for (byte_idx = 0; byte_idx < (8 + ecc_ena); ++byte_idx) {
|
||
int mx, mc, xc, cc;
|
||
int ix, alts;
|
||
int maj, xmaj, xmx, xmc, xxc, xcc;
|
||
|
||
if (!(if_bytemask & (1 << byte_idx)))
|
||
return;
|
||
maj = find_wl_majority(&wl_bytes[byte_idx], &mx,
|
||
&mc, &xc, &cc);
|
||
xmaj = find_wl_majority(&wl_bytes_extra[byte_idx],
|
||
&xmx, &xmc, &xxc, &xcc);
|
||
if (maj != xmaj) {
|
||
if (wl_print) {
|
||
debug("N%d.LMC%d.R%d: Byte %d: HWL maj %d(%d), USING xmaj %d(%d)\n",
|
||
node, if_num, rankx,
|
||
byte_idx, maj, xc, xmaj, xxc);
|
||
}
|
||
mx = xmx;
|
||
mc = xmc;
|
||
xc = xxc;
|
||
cc = xcc;
|
||
}
|
||
|
||
// see if there was an alternate
|
||
// take out the majority choice
|
||
alts = (mc & ~(1 << mx));
|
||
if (alts != 0) {
|
||
for (ix = 0; ix < 4; ix++) {
|
||
// FIXME: could be done multiple times?
|
||
// bad if so
|
||
if (alts & (1 << ix)) {
|
||
// set the mask
|
||
hwl_alts[rankx].hwl_alt_mask |=
|
||
(1 << byte_idx);
|
||
// record the value
|
||
hwl_alts[rankx].hwl_alt_delay[byte_idx] =
|
||
ix << 1;
|
||
if (wl_print > 1) {
|
||
debug("N%d.LMC%d.R%d: SWL_TRY_HWL_ALT: Byte %d maj %d (%d) alt %d (%d).\n",
|
||
node,
|
||
if_num,
|
||
rankx,
|
||
byte_idx,
|
||
mx << 1,
|
||
xc,
|
||
ix << 1,
|
||
wl_bytes
|
||
[byte_idx].bitcnt
|
||
[ix]);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
if (cc > 2) { // unlikely, but...
|
||
// assume: counts for 3 indices are all 1
|
||
// possiblities are: 0/2/4, 2/4/6, 0/4/6, 0/2/6
|
||
// and the desired?: 2 , 4 , 6, 0
|
||
// we choose the middle, assuming one of the
|
||
// outliers is bad
|
||
// NOTE: this is an ugly hack at the moment;
|
||
// there must be a better way
|
||
switch (mc) {
|
||
case 0x7:
|
||
mx = 1;
|
||
break; // was 0/2/4, choose 2
|
||
case 0xb:
|
||
mx = 0;
|
||
break; // was 0/2/6, choose 0
|
||
case 0xd:
|
||
mx = 3;
|
||
break; // was 0/4/6, choose 6
|
||
case 0xe:
|
||
mx = 2;
|
||
break; // was 2/4/6, choose 4
|
||
default:
|
||
case 0xf:
|
||
mx = 1;
|
||
break; // was 0/2/4/6, choose 2?
|
||
}
|
||
printf("N%d.LMC%d.R%d: HW WL MAJORITY: bad byte-lane %d (0x%x), using %d.\n",
|
||
node, if_num, rankx, byte_idx, mc,
|
||
mx << 1);
|
||
}
|
||
upd_wl_rank(&wl_rank, byte_idx, mx << 1);
|
||
}
|
||
|
||
lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num),
|
||
wl_rank.u64);
|
||
display_wl_with_final(if_num, wl_rank, rankx);
|
||
|
||
// FIXME: does this help make the output a little easier
|
||
// to focus?
|
||
if (wl_print > 0)
|
||
debug("-----------\n");
|
||
|
||
} /* if (wl_loops > 1) */
|
||
|
||
// maybe print an error summary for the rank
|
||
if (wl_mask_err_rank != 0 || wl_val_err_rank != 0) {
|
||
debug("N%d.LMC%d.R%d: H/W Write-Leveling errors - %d bitmask, %d validity, %d retries, %d exhausted\n",
|
||
node, if_num, rankx, wl_mask_err_rank,
|
||
wl_val_err_rank, wloop_retries_total,
|
||
wloop_retries_exhausted);
|
||
}
|
||
}
|
||
|
||
static void lmc_write_leveling(struct ddr_priv *priv)
|
||
{
|
||
union cvmx_lmcx_config cfg;
|
||
int rankx;
|
||
char *s;
|
||
|
||
/*
|
||
* 4.8.9 LMC Write Leveling
|
||
*
|
||
* LMC supports an automatic write leveling like that described in the
|
||
* JEDEC DDR3 specifications separately per byte-lane.
|
||
*
|
||
* All of DDR PLL, LMC CK, LMC DRESET, and early LMC initializations
|
||
* must be completed prior to starting this LMC write-leveling sequence.
|
||
*
|
||
* There are many possible procedures that will write-level all the
|
||
* attached DDR3 DRAM parts. One possibility is for software to simply
|
||
* write the desired values into LMC(0)_WLEVEL_RANK(0..3). This section
|
||
* describes one possible sequence that uses LMC's autowrite-leveling
|
||
* capabilities.
|
||
*
|
||
* 1. If the DQS/DQ delays on the board may be more than the ADD/CMD
|
||
* delays, then ensure that LMC(0)_CONFIG[EARLY_DQX] is set at this
|
||
* point.
|
||
*
|
||
* Do the remaining steps 2-7 separately for each rank i with attached
|
||
* DRAM.
|
||
*
|
||
* 2. Write LMC(0)_WLEVEL_RANKi = 0.
|
||
*
|
||
* 3. For x8 parts:
|
||
*
|
||
* Without changing any other fields in LMC(0)_WLEVEL_CTL, write
|
||
* LMC(0)_WLEVEL_CTL[LANEMASK] to select all byte lanes with attached
|
||
* DRAM.
|
||
*
|
||
* For x16 parts:
|
||
*
|
||
* Without changing any other fields in LMC(0)_WLEVEL_CTL, write
|
||
* LMC(0)_WLEVEL_CTL[LANEMASK] to select all even byte lanes with
|
||
* attached DRAM.
|
||
*
|
||
* 4. Without changing any other fields in LMC(0)_CONFIG,
|
||
*
|
||
* o write LMC(0)_SEQ_CTL[SEQ_SEL] to select write-leveling
|
||
*
|
||
* o write LMC(0)_CONFIG[RANKMASK] = (1 << i)
|
||
*
|
||
* o write LMC(0)_SEQ_CTL[INIT_START] = 1
|
||
*
|
||
* LMC will initiate write-leveling at this point. Assuming
|
||
* LMC(0)_WLEVEL_CTL [SSET] = 0, LMC first enables write-leveling on
|
||
* the selected DRAM rank via a DDR3 MR1 write, then sequences
|
||
* through
|
||
* and accumulates write-leveling results for eight different delay
|
||
* settings twice, starting at a delay of zero in this case since
|
||
* LMC(0)_WLEVEL_RANKi[BYTE*<4:3>] = 0, increasing by 1/8 CK each
|
||
* setting, covering a total distance of one CK, then disables the
|
||
* write-leveling via another DDR3 MR1 write.
|
||
*
|
||
* After the sequence through 16 delay settings is complete:
|
||
*
|
||
* o LMC sets LMC(0)_WLEVEL_RANKi[STATUS] = 3
|
||
*
|
||
* o LMC sets LMC(0)_WLEVEL_RANKi[BYTE*<2:0>] (for all ranks selected
|
||
* by LMC(0)_WLEVEL_CTL[LANEMASK]) to indicate the first write
|
||
* leveling result of 1 that followed result of 0 during the
|
||
* sequence, except that the LMC always writes
|
||
* LMC(0)_WLEVEL_RANKi[BYTE*<0>]=0.
|
||
*
|
||
* o Software can read the eight write-leveling results from the
|
||
* first pass through the delay settings by reading
|
||
* LMC(0)_WLEVEL_DBG[BITMASK] (after writing
|
||
* LMC(0)_WLEVEL_DBG[BYTE]). (LMC does not retain the writeleveling
|
||
* results from the second pass through the eight delay
|
||
* settings. They should often be identical to the
|
||
* LMC(0)_WLEVEL_DBG[BITMASK] results, though.)
|
||
*
|
||
* 5. Wait until LMC(0)_WLEVEL_RANKi[STATUS] != 2.
|
||
*
|
||
* LMC will have updated LMC(0)_WLEVEL_RANKi[BYTE*<2:0>] for all byte
|
||
* lanes selected by LMC(0)_WLEVEL_CTL[LANEMASK] at this point.
|
||
* LMC(0)_WLEVEL_RANKi[BYTE*<4:3>] will still be the value that
|
||
* software wrote in substep 2 above, which is 0.
|
||
*
|
||
* 6. For x16 parts:
|
||
*
|
||
* Without changing any other fields in LMC(0)_WLEVEL_CTL, write
|
||
* LMC(0)_WLEVEL_CTL[LANEMASK] to select all odd byte lanes with
|
||
* attached DRAM.
|
||
*
|
||
* Repeat substeps 4 and 5 with this new LMC(0)_WLEVEL_CTL[LANEMASK]
|
||
* setting. Skip to substep 7 if this has already been done.
|
||
*
|
||
* For x8 parts:
|
||
*
|
||
* Skip this substep. Go to substep 7.
|
||
*
|
||
* 7. Calculate LMC(0)_WLEVEL_RANKi[BYTE*<4:3>] settings for all byte
|
||
* lanes on all ranks with attached DRAM.
|
||
*
|
||
* At this point, all byte lanes on rank i with attached DRAM should
|
||
* have been write-leveled, and LMC(0)_WLEVEL_RANKi[BYTE*<2:0>] has
|
||
* the result for each byte lane.
|
||
*
|
||
* But note that the DDR3 write-leveling sequence will only determine
|
||
* the delay modulo the CK cycle time, and cannot determine how many
|
||
* additional CK cycles of delay are present. Software must calculate
|
||
* the number of CK cycles, or equivalently, the
|
||
* LMC(0)_WLEVEL_RANKi[BYTE*<4:3>] settings.
|
||
*
|
||
* This BYTE*<4:3> calculation is system/board specific.
|
||
*
|
||
* Many techniques can be used to calculate write-leveling BYTE*<4:3>
|
||
* values, including:
|
||
*
|
||
* o Known values for some byte lanes.
|
||
*
|
||
* o Relative values for some byte lanes relative to others.
|
||
*
|
||
* For example, suppose lane X is likely to require a larger
|
||
* write-leveling delay than lane Y. A BYTEX<2:0> value that is much
|
||
* smaller than the BYTEY<2:0> value may then indicate that the
|
||
* required lane X delay wrapped into the next CK, so BYTEX<4:3>
|
||
* should be set to BYTEY<4:3>+1.
|
||
*
|
||
* When ECC DRAM is not present (i.e. when DRAM is not attached to
|
||
* the DDR_CBS_0_* and DDR_CB<7:0> chip signals, or the
|
||
* DDR_DQS_<4>_* and DDR_DQ<35:32> chip signals), write
|
||
* LMC(0)_WLEVEL_RANK*[BYTE8] = LMC(0)_WLEVEL_RANK*[BYTE0],
|
||
* using the final calculated BYTE0 value.
|
||
* Write LMC(0)_WLEVEL_RANK*[BYTE4] = LMC(0)_WLEVEL_RANK*[BYTE0],
|
||
* using the final calculated BYTE0 value.
|
||
*
|
||
* 8. Initialize LMC(0)_WLEVEL_RANK* values for all unused ranks.
|
||
*
|
||
* Let rank i be a rank with attached DRAM.
|
||
*
|
||
* For all ranks j that do not have attached DRAM, set
|
||
* LMC(0)_WLEVEL_RANKj = LMC(0)_WLEVEL_RANKi.
|
||
*/
|
||
|
||
rankx = 0;
|
||
wl_roundup = 0;
|
||
disable_hwl_validity = 0;
|
||
|
||
// wl_pbm_pump: weight for write-leveling PBMs...
|
||
// 0 causes original behavior
|
||
// 1 allows a minority of 2 pbms to outscore a majority of 3 non-pbms
|
||
// 4 would allow a minority of 1 pbm to outscore a majority of 4
|
||
// non-pbms
|
||
wl_pbm_pump = 4; // FIXME: is 4 too much?
|
||
|
||
if (wl_loops) {
|
||
debug("N%d.LMC%d: Performing Hardware Write-Leveling\n", node,
|
||
if_num);
|
||
} else {
|
||
/* Force software write-leveling to run */
|
||
wl_mask_err = 1;
|
||
debug("N%d.LMC%d: Forcing software Write-Leveling\n", node,
|
||
if_num);
|
||
}
|
||
|
||
default_wl_rtt_nom = (ddr_type == DDR3_DRAM) ?
|
||
rttnom_20ohm : ddr4_rttnom_40ohm;
|
||
|
||
cfg.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
|
||
ecc_ena = cfg.s.ecc_ena;
|
||
save_mode32b = cfg.cn78xx.mode32b;
|
||
cfg.cn78xx.mode32b = (!if_64b);
|
||
lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), cfg.u64);
|
||
debug("%-45s : %d\n", "MODE32B", cfg.cn78xx.mode32b);
|
||
|
||
s = lookup_env(priv, "ddr_wlevel_roundup");
|
||
if (s)
|
||
wl_roundup = simple_strtoul(s, NULL, 0);
|
||
|
||
s = lookup_env(priv, "ddr_wlevel_printall");
|
||
if (s)
|
||
wl_print = strtoul(s, NULL, 0);
|
||
|
||
s = lookup_env(priv, "ddr_wlevel_pbm_bump");
|
||
if (s)
|
||
wl_pbm_pump = strtoul(s, NULL, 0);
|
||
|
||
// default to disable when RL sequential delay check is disabled
|
||
disable_hwl_validity = disable_sequential_delay_check;
|
||
s = lookup_env(priv, "ddr_disable_hwl_validity");
|
||
if (s)
|
||
disable_hwl_validity = !!strtoul(s, NULL, 0);
|
||
|
||
s = lookup_env(priv, "ddr_wl_rtt_nom");
|
||
if (s)
|
||
default_wl_rtt_nom = simple_strtoul(s, NULL, 0);
|
||
|
||
s = lookup_env(priv, "ddr_match_wl_rtt_nom");
|
||
if (s)
|
||
match_wl_rtt_nom = !!simple_strtoul(s, NULL, 0);
|
||
|
||
if (match_wl_rtt_nom)
|
||
mp1.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS1(if_num));
|
||
|
||
// For DDR3, we do not touch WLEVEL_CTL fields OR_DIS or BITMASK
|
||
// For DDR4, we touch WLEVEL_CTL fields OR_DIS or BITMASK here
|
||
if (ddr_type == DDR4_DRAM) {
|
||
int default_or_dis = 1;
|
||
int default_bitmask = 0xff;
|
||
|
||
// when x4, use only the lower nibble
|
||
if (dram_width == 4) {
|
||
default_bitmask = 0x0f;
|
||
if (wl_print) {
|
||
debug("N%d.LMC%d: WLEVEL_CTL: default bitmask is 0x%02x for DDR4 x4\n",
|
||
node, if_num, default_bitmask);
|
||
}
|
||
}
|
||
|
||
wl_ctl.u64 = lmc_rd(priv, CVMX_LMCX_WLEVEL_CTL(if_num));
|
||
wl_ctl.s.or_dis = default_or_dis;
|
||
wl_ctl.s.bitmask = default_bitmask;
|
||
|
||
// allow overrides
|
||
s = lookup_env(priv, "ddr_wlevel_ctl_or_dis");
|
||
if (s)
|
||
wl_ctl.s.or_dis = !!strtoul(s, NULL, 0);
|
||
|
||
s = lookup_env(priv, "ddr_wlevel_ctl_bitmask");
|
||
if (s)
|
||
wl_ctl.s.bitmask = simple_strtoul(s, NULL, 0);
|
||
|
||
// print only if not defaults
|
||
if (wl_ctl.s.or_dis != default_or_dis ||
|
||
wl_ctl.s.bitmask != default_bitmask) {
|
||
debug("N%d.LMC%d: WLEVEL_CTL: or_dis=%d, bitmask=0x%02x\n",
|
||
node, if_num, wl_ctl.s.or_dis, wl_ctl.s.bitmask);
|
||
}
|
||
|
||
// always write
|
||
lmc_wr(priv, CVMX_LMCX_WLEVEL_CTL(if_num), wl_ctl.u64);
|
||
}
|
||
|
||
// Start the hardware write-leveling loop per rank
|
||
for (rankx = 0; rankx < dimm_count * 4; rankx++)
|
||
lmc_write_leveling_loop(priv, rankx);
|
||
|
||
cfg.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
|
||
cfg.cn78xx.mode32b = save_mode32b;
|
||
lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), cfg.u64);
|
||
debug("%-45s : %d\n", "MODE32B", cfg.cn78xx.mode32b);
|
||
|
||
// At the end of HW Write Leveling, check on some DESKEW things...
|
||
if (!disable_deskew_training) {
|
||
struct deskew_counts dsk_counts;
|
||
int retry_count = 0;
|
||
|
||
debug("N%d.LMC%d: Check Deskew Settings before Read-Leveling.\n",
|
||
node, if_num);
|
||
|
||
do {
|
||
validate_deskew_training(priv, rank_mask, if_num,
|
||
&dsk_counts, 1);
|
||
|
||
// only RAWCARD A or B will not benefit from
|
||
// retraining if there's only saturation
|
||
// or any rawcard if there is a nibble error
|
||
if ((!spd_rawcard_aorb && dsk_counts.saturated > 0) ||
|
||
(dsk_counts.nibrng_errs != 0 ||
|
||
dsk_counts.nibunl_errs != 0)) {
|
||
retry_count++;
|
||
debug("N%d.LMC%d: Deskew Status indicates saturation or nibble errors - retry %d Training.\n",
|
||
node, if_num, retry_count);
|
||
perform_deskew_training(priv, rank_mask, if_num,
|
||
spd_rawcard_aorb);
|
||
} else {
|
||
break;
|
||
}
|
||
} while (retry_count < 5);
|
||
}
|
||
}
|
||
|
||
static void lmc_workaround(struct ddr_priv *priv)
|
||
{
|
||
/* Workaround Trcd overflow by using Additive latency. */
|
||
if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) {
|
||
union cvmx_lmcx_modereg_params0 mp0;
|
||
union cvmx_lmcx_timing_params1 tp1;
|
||
union cvmx_lmcx_control ctrl;
|
||
int rankx;
|
||
|
||
tp1.u64 = lmc_rd(priv, CVMX_LMCX_TIMING_PARAMS1(if_num));
|
||
mp0.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num));
|
||
ctrl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
|
||
|
||
if (tp1.cn78xx.trcd == 0) {
|
||
debug("Workaround Trcd overflow by using Additive latency.\n");
|
||
/* Hard code this to 12 and enable additive latency */
|
||
tp1.cn78xx.trcd = 12;
|
||
mp0.s.al = 2; /* CL-2 */
|
||
ctrl.s.pocas = 1;
|
||
|
||
debug("MODEREG_PARAMS0 : 0x%016llx\n",
|
||
mp0.u64);
|
||
lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num),
|
||
mp0.u64);
|
||
debug("TIMING_PARAMS1 : 0x%016llx\n",
|
||
tp1.u64);
|
||
lmc_wr(priv, CVMX_LMCX_TIMING_PARAMS1(if_num), tp1.u64);
|
||
|
||
debug("LMC_CONTROL : 0x%016llx\n",
|
||
ctrl.u64);
|
||
lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctrl.u64);
|
||
|
||
for (rankx = 0; rankx < dimm_count * 4; rankx++) {
|
||
if (!(rank_mask & (1 << rankx)))
|
||
continue;
|
||
|
||
/* MR1 */
|
||
ddr4_mrw(priv, if_num, rankx, -1, 1, 0);
|
||
}
|
||
}
|
||
}
|
||
|
||
// this is here just for output, to allow check of the Deskew
|
||
// settings one last time...
|
||
if (!disable_deskew_training) {
|
||
struct deskew_counts dsk_counts;
|
||
|
||
debug("N%d.LMC%d: Check Deskew Settings before software Write-Leveling.\n",
|
||
node, if_num);
|
||
validate_deskew_training(priv, rank_mask, if_num, &dsk_counts,
|
||
3);
|
||
}
|
||
|
||
/*
|
||
* Workaround Errata 26304 (T88@2.0, O75@1.x, O78@2.x)
|
||
*
|
||
* When the CSRs LMCX_DLL_CTL3[WR_DESKEW_ENA] = 1 AND
|
||
* LMCX_PHY_CTL2[DQS[0..8]_DSK_ADJ] > 4, set
|
||
* LMCX_EXT_CONFIG[DRIVE_ENA_BPRCH] = 1.
|
||
*/
|
||
if (octeon_is_cpuid(OCTEON_CN78XX_PASS2_X) ||
|
||
octeon_is_cpuid(OCTEON_CNF75XX_PASS1_X)) {
|
||
union cvmx_lmcx_dll_ctl3 dll_ctl3;
|
||
union cvmx_lmcx_phy_ctl2 phy_ctl2;
|
||
union cvmx_lmcx_ext_config ext_cfg;
|
||
int increased_dsk_adj = 0;
|
||
int byte;
|
||
|
||
phy_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL2(if_num));
|
||
ext_cfg.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG(if_num));
|
||
dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));
|
||
|
||
for (byte = 0; byte < 8; ++byte) {
|
||
if (!(if_bytemask & (1 << byte)))
|
||
continue;
|
||
increased_dsk_adj |=
|
||
(((phy_ctl2.u64 >> (byte * 3)) & 0x7) > 4);
|
||
}
|
||
|
||
if (dll_ctl3.s.wr_deskew_ena == 1 && increased_dsk_adj) {
|
||
ext_cfg.s.drive_ena_bprch = 1;
|
||
lmc_wr(priv, CVMX_LMCX_EXT_CONFIG(if_num), ext_cfg.u64);
|
||
debug("LMC%d: Forcing DRIVE_ENA_BPRCH for Workaround Errata 26304.\n",
|
||
if_num);
|
||
}
|
||
}
|
||
}
|
||
|
||
// Software Write-Leveling block
|
||
|
||
#define VREF_RANGE1_LIMIT 0x33 // range1 is valid for 0x00 - 0x32
|
||
#define VREF_RANGE2_LIMIT 0x18 // range2 is valid for 0x00 - 0x17
|
||
// full window is valid for 0x00 to 0x4A
|
||
// let 0x00 - 0x17 be range2, 0x18 - 0x4a be range 1
|
||
#define VREF_LIMIT (VREF_RANGE1_LIMIT + VREF_RANGE2_LIMIT)
|
||
#define VREF_FINAL (VREF_LIMIT - 1)
|
||
|
||
enum sw_wl_status {
|
||
WL_ESTIMATED = 0, /* HW/SW wleveling failed. Reslt estimated */
|
||
WL_HARDWARE = 1, /* H/W wleveling succeeded */
|
||
WL_SOFTWARE = 2, /* S/W wleveling passed 2 contiguous setting */
|
||
WL_SOFTWARE1 = 3, /* S/W wleveling passed 1 marginal setting */
|
||
};
|
||
|
||
static u64 rank_addr __section(".data");
|
||
static int vref_val __section(".data");
|
||
static int final_vref_val __section(".data");
|
||
static int final_vref_range __section(".data");
|
||
static int start_vref_val __section(".data");
|
||
static int computed_final_vref_val __section(".data");
|
||
static char best_vref_val_count __section(".data");
|
||
static char vref_val_count __section(".data");
|
||
static char best_vref_val_start __section(".data");
|
||
static char vref_val_start __section(".data");
|
||
static int bytes_failed __section(".data");
|
||
static enum sw_wl_status byte_test_status[9] __section(".data");
|
||
static enum sw_wl_status sw_wl_rank_status __section(".data");
|
||
static int sw_wl_failed __section(".data");
|
||
static int sw_wl_hw __section(".data");
|
||
static int measured_vref_flag __section(".data");
|
||
|
||
static void ddr4_vref_loop(struct ddr_priv *priv, int rankx)
|
||
{
|
||
char *s;
|
||
|
||
if (vref_val < VREF_FINAL) {
|
||
int vrange, vvalue;
|
||
|
||
if (vref_val < VREF_RANGE2_LIMIT) {
|
||
vrange = 1;
|
||
vvalue = vref_val;
|
||
} else {
|
||
vrange = 0;
|
||
vvalue = vref_val - VREF_RANGE2_LIMIT;
|
||
}
|
||
|
||
set_vref(priv, if_num, rankx, vrange, vvalue);
|
||
} else { /* if (vref_val < VREF_FINAL) */
|
||
/* Print the final vref value first. */
|
||
|
||
/* Always print the computed first if its valid */
|
||
if (computed_final_vref_val >= 0) {
|
||
debug("N%d.LMC%d.R%d: vref Computed Summary : %2d (0x%02x)\n",
|
||
node, if_num, rankx,
|
||
computed_final_vref_val, computed_final_vref_val);
|
||
}
|
||
|
||
if (!measured_vref_flag) { // setup to use the computed
|
||
best_vref_val_count = 1;
|
||
final_vref_val = computed_final_vref_val;
|
||
} else { // setup to use the measured
|
||
if (best_vref_val_count > 0) {
|
||
best_vref_val_count =
|
||
max(best_vref_val_count, (char)2);
|
||
final_vref_val = best_vref_val_start +
|
||
divide_nint(best_vref_val_count - 1, 2);
|
||
|
||
if (final_vref_val < VREF_RANGE2_LIMIT) {
|
||
final_vref_range = 1;
|
||
} else {
|
||
final_vref_range = 0;
|
||
final_vref_val -= VREF_RANGE2_LIMIT;
|
||
}
|
||
|
||
int vvlo = best_vref_val_start;
|
||
int vrlo;
|
||
int vvhi = best_vref_val_start +
|
||
best_vref_val_count - 1;
|
||
int vrhi;
|
||
|
||
if (vvlo < VREF_RANGE2_LIMIT) {
|
||
vrlo = 2;
|
||
} else {
|
||
vrlo = 1;
|
||
vvlo -= VREF_RANGE2_LIMIT;
|
||
}
|
||
|
||
if (vvhi < VREF_RANGE2_LIMIT) {
|
||
vrhi = 2;
|
||
} else {
|
||
vrhi = 1;
|
||
vvhi -= VREF_RANGE2_LIMIT;
|
||
}
|
||
debug("N%d.LMC%d.R%d: vref Training Summary : 0x%02x/%1d <----- 0x%02x/%1d -----> 0x%02x/%1d, range: %2d\n",
|
||
node, if_num, rankx, vvlo, vrlo,
|
||
final_vref_val,
|
||
final_vref_range + 1, vvhi, vrhi,
|
||
best_vref_val_count - 1);
|
||
|
||
} else {
|
||
/*
|
||
* If nothing passed use the default vref
|
||
* value for this rank
|
||
*/
|
||
union cvmx_lmcx_modereg_params2 mp2;
|
||
|
||
mp2.u64 =
|
||
lmc_rd(priv,
|
||
CVMX_LMCX_MODEREG_PARAMS2(if_num));
|
||
final_vref_val = (mp2.u64 >>
|
||
(rankx * 10 + 3)) & 0x3f;
|
||
final_vref_range = (mp2.u64 >>
|
||
(rankx * 10 + 9)) & 0x01;
|
||
|
||
debug("N%d.LMC%d.R%d: vref Using Default : %2d <----- %2d (0x%02x) -----> %2d, range%1d\n",
|
||
node, if_num, rankx, final_vref_val,
|
||
final_vref_val, final_vref_val,
|
||
final_vref_val, final_vref_range + 1);
|
||
}
|
||
}
|
||
|
||
// allow override
|
||
s = lookup_env(priv, "ddr%d_vref_val_%1d%1d",
|
||
if_num, !!(rankx & 2), !!(rankx & 1));
|
||
if (s)
|
||
final_vref_val = strtoul(s, NULL, 0);
|
||
|
||
set_vref(priv, if_num, rankx, final_vref_range, final_vref_val);
|
||
}
|
||
}
|
||
|
||
#define WL_MIN_NO_ERRORS_COUNT 3 // FIXME? three passes without errors
|
||
|
||
static int errors __section(".data");
|
||
static int byte_delay[9] __section(".data");
|
||
static u64 bytemask __section(".data");
|
||
static int bytes_todo __section(".data");
|
||
static int no_errors_count __section(".data");
|
||
static u64 bad_bits[2] __section(".data");
|
||
static u64 sum_dram_dclk __section(".data");
|
||
static u64 sum_dram_ops __section(".data");
|
||
static u64 start_dram_dclk __section(".data");
|
||
static u64 stop_dram_dclk __section(".data");
|
||
static u64 start_dram_ops __section(".data");
|
||
static u64 stop_dram_ops __section(".data");
|
||
|
||
static void lmc_sw_write_leveling_loop(struct ddr_priv *priv, int rankx)
|
||
{
|
||
int delay;
|
||
int b;
|
||
|
||
// write the current set of WL delays
|
||
lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num), wl_rank.u64);
|
||
wl_rank.u64 = lmc_rd(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num));
|
||
|
||
// do the test
|
||
if (sw_wl_hw) {
|
||
errors = run_best_hw_patterns(priv, if_num, rank_addr,
|
||
DBTRAIN_TEST, bad_bits);
|
||
errors &= bytes_todo; // keep only the ones we are still doing
|
||
} else {
|
||
start_dram_dclk = lmc_rd(priv, CVMX_LMCX_DCLK_CNT(if_num));
|
||
start_dram_ops = lmc_rd(priv, CVMX_LMCX_OPS_CNT(if_num));
|
||
errors = test_dram_byte64(priv, if_num, rank_addr, bytemask,
|
||
bad_bits);
|
||
|
||
stop_dram_dclk = lmc_rd(priv, CVMX_LMCX_DCLK_CNT(if_num));
|
||
stop_dram_ops = lmc_rd(priv, CVMX_LMCX_OPS_CNT(if_num));
|
||
sum_dram_dclk += stop_dram_dclk - start_dram_dclk;
|
||
sum_dram_ops += stop_dram_ops - start_dram_ops;
|
||
}
|
||
|
||
debug("WL pass1: test_dram_byte returned 0x%x\n", errors);
|
||
|
||
// remember, errors will not be returned for byte-lanes that have
|
||
// maxxed out...
|
||
if (errors == 0) {
|
||
no_errors_count++; // bump
|
||
// bypass check/update completely
|
||
if (no_errors_count > 1)
|
||
return; // to end of do-while
|
||
} else {
|
||
no_errors_count = 0; // reset
|
||
}
|
||
|
||
// check errors by byte
|
||
for (b = 0; b < 9; ++b) {
|
||
if (!(bytes_todo & (1 << b)))
|
||
continue;
|
||
|
||
delay = byte_delay[b];
|
||
// yes, an error in this byte lane
|
||
if (errors & (1 << b)) {
|
||
debug(" byte %d delay %2d Errors\n", b, delay);
|
||
// since this byte had an error, we move to the next
|
||
// delay value, unless done with it
|
||
delay += 8; // incr by 8 to do delay high-order bits
|
||
if (delay < 32) {
|
||
upd_wl_rank(&wl_rank, b, delay);
|
||
debug(" byte %d delay %2d New\n",
|
||
b, delay);
|
||
byte_delay[b] = delay;
|
||
} else {
|
||
// reached max delay, maybe really done with
|
||
// this byte
|
||
// consider an alt only for computed VREF and
|
||
if (!measured_vref_flag &&
|
||
(hwl_alts[rankx].hwl_alt_mask & (1 << b))) {
|
||
// if an alt exists...
|
||
// just orig low-3 bits
|
||
int bad_delay = delay & 0x6;
|
||
|
||
// yes, use it
|
||
delay = hwl_alts[rankx].hwl_alt_delay[b];
|
||
// clear that flag
|
||
hwl_alts[rankx].hwl_alt_mask &=
|
||
~(1 << b);
|
||
upd_wl_rank(&wl_rank, b, delay);
|
||
byte_delay[b] = delay;
|
||
debug(" byte %d delay %2d ALTERNATE\n",
|
||
b, delay);
|
||
debug("N%d.LMC%d.R%d: SWL: Byte %d: %d FAIL, trying ALTERNATE %d\n",
|
||
node, if_num,
|
||
rankx, b, bad_delay, delay);
|
||
|
||
} else {
|
||
unsigned int bits_bad;
|
||
|
||
if (b < 8) {
|
||
// test no longer, remove from
|
||
// byte mask
|
||
bytemask &=
|
||
~(0xffULL << (8 * b));
|
||
bits_bad = (unsigned int)
|
||
((bad_bits[0] >>
|
||
(8 * b)) & 0xffUL);
|
||
} else {
|
||
bits_bad = (unsigned int)
|
||
(bad_bits[1] & 0xffUL);
|
||
}
|
||
|
||
// remove from bytes to do
|
||
bytes_todo &= ~(1 << b);
|
||
// make sure this is set for this case
|
||
byte_test_status[b] = WL_ESTIMATED;
|
||
debug(" byte %d delay %2d Exhausted\n",
|
||
b, delay);
|
||
if (!measured_vref_flag) {
|
||
// this is too noisy when doing
|
||
// measured VREF
|
||
debug("N%d.LMC%d.R%d: SWL: Byte %d (0x%02x): delay %d EXHAUSTED\n",
|
||
node, if_num, rankx,
|
||
b, bits_bad, delay);
|
||
}
|
||
}
|
||
}
|
||
} else {
|
||
// no error, stay with current delay, but keep testing
|
||
// it...
|
||
debug(" byte %d delay %2d Passed\n", b, delay);
|
||
byte_test_status[b] = WL_HARDWARE; // change status
|
||
}
|
||
} /* for (b = 0; b < 9; ++b) */
|
||
}
|
||
|
||
static void sw_write_lvl_use_ecc(struct ddr_priv *priv, int rankx)
|
||
{
|
||
int save_byte8 = wl_rank.s.byte8;
|
||
|
||
byte_test_status[8] = WL_HARDWARE; /* H/W delay value */
|
||
|
||
if (save_byte8 != wl_rank.s.byte3 &&
|
||
save_byte8 != wl_rank.s.byte4) {
|
||
int test_byte8 = save_byte8;
|
||
int test_byte8_error;
|
||
int byte8_error = 0x1f;
|
||
int adder;
|
||
int avg_bytes = divide_nint(wl_rank.s.byte3 + wl_rank.s.byte4,
|
||
2);
|
||
|
||
for (adder = 0; adder <= 32; adder += 8) {
|
||
test_byte8_error = abs((adder + save_byte8) -
|
||
avg_bytes);
|
||
if (test_byte8_error < byte8_error) {
|
||
byte8_error = test_byte8_error;
|
||
test_byte8 = save_byte8 + adder;
|
||
}
|
||
}
|
||
|
||
// only do the check if we are not using measured VREF
|
||
if (!measured_vref_flag) {
|
||
/* Use only even settings, rounding down... */
|
||
test_byte8 &= ~1;
|
||
|
||
// do validity check on the calculated ECC delay value
|
||
// this depends on the DIMM type
|
||
if (spd_rdimm) { // RDIMM
|
||
// but not mini-RDIMM
|
||
if (spd_dimm_type != 5) {
|
||
// it can be > byte4, but should never
|
||
// be > byte3
|
||
if (test_byte8 > wl_rank.s.byte3) {
|
||
/* say it is still estimated */
|
||
byte_test_status[8] =
|
||
WL_ESTIMATED;
|
||
}
|
||
}
|
||
} else { // UDIMM
|
||
if (test_byte8 < wl_rank.s.byte3 ||
|
||
test_byte8 > wl_rank.s.byte4) {
|
||
// should never be outside the
|
||
// byte 3-4 range
|
||
/* say it is still estimated */
|
||
byte_test_status[8] = WL_ESTIMATED;
|
||
}
|
||
}
|
||
/*
|
||
* Report whenever the calculation appears bad.
|
||
* This happens if some of the original values were off,
|
||
* or unexpected geometry from DIMM type, or custom
|
||
* circuitry (NIC225E, I am looking at you!).
|
||
* We will trust the calculated value, and depend on
|
||
* later testing to catch any instances when that
|
||
* value is truly bad.
|
||
*/
|
||
// ESTIMATED means there may be an issue
|
||
if (byte_test_status[8] == WL_ESTIMATED) {
|
||
debug("N%d.LMC%d.R%d: SWL: (%cDIMM): calculated ECC delay unexpected (%d/%d/%d)\n",
|
||
node, if_num, rankx,
|
||
(spd_rdimm ? 'R' : 'U'), wl_rank.s.byte4,
|
||
test_byte8, wl_rank.s.byte3);
|
||
byte_test_status[8] = WL_HARDWARE;
|
||
}
|
||
}
|
||
/* Use only even settings */
|
||
wl_rank.s.byte8 = test_byte8 & ~1;
|
||
}
|
||
|
||
if (wl_rank.s.byte8 != save_byte8) {
|
||
/* Change the status if s/w adjusted the delay */
|
||
byte_test_status[8] = WL_SOFTWARE; /* Estimated delay */
|
||
}
|
||
}
|
||
|
||
static __maybe_unused void parallel_wl_block_delay(struct ddr_priv *priv,
|
||
int rankx)
|
||
{
|
||
int errors;
|
||
int byte_delay[8];
|
||
int byte_passed[8];
|
||
u64 bytemask;
|
||
u64 bitmask;
|
||
int wl_offset;
|
||
int bytes_todo;
|
||
int sw_wl_offset = 1;
|
||
int delay;
|
||
int b;
|
||
|
||
for (b = 0; b < 8; ++b)
|
||
byte_passed[b] = 0;
|
||
|
||
bytes_todo = if_bytemask;
|
||
|
||
for (wl_offset = sw_wl_offset; wl_offset >= 0; --wl_offset) {
|
||
debug("Starting wl_offset for-loop: %d\n", wl_offset);
|
||
|
||
bytemask = 0;
|
||
|
||
for (b = 0; b < 8; ++b) {
|
||
byte_delay[b] = 0;
|
||
// this does not contain fully passed bytes
|
||
if (!(bytes_todo & (1 << b)))
|
||
continue;
|
||
|
||
// reset across passes if not fully passed
|
||
byte_passed[b] = 0;
|
||
upd_wl_rank(&wl_rank, b, 0); // all delays start at 0
|
||
bitmask = ((!if_64b) && (b == 4)) ? 0x0f : 0xff;
|
||
// set the bytes bits in the bytemask
|
||
bytemask |= bitmask << (8 * b);
|
||
} /* for (b = 0; b < 8; ++b) */
|
||
|
||
// start a pass if there is any byte lane to test
|
||
while (bytemask != 0) {
|
||
debug("Starting bytemask while-loop: 0x%llx\n",
|
||
bytemask);
|
||
|
||
// write this set of WL delays
|
||
lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num),
|
||
wl_rank.u64);
|
||
wl_rank.u64 = lmc_rd(priv,
|
||
CVMX_LMCX_WLEVEL_RANKX(rankx,
|
||
if_num));
|
||
|
||
// do the test
|
||
if (sw_wl_hw) {
|
||
errors = run_best_hw_patterns(priv, if_num,
|
||
rank_addr,
|
||
DBTRAIN_TEST,
|
||
NULL) & 0xff;
|
||
} else {
|
||
errors = test_dram_byte64(priv, if_num,
|
||
rank_addr, bytemask,
|
||
NULL);
|
||
}
|
||
|
||
debug("test_dram_byte returned 0x%x\n", errors);
|
||
|
||
// check errors by byte
|
||
for (b = 0; b < 8; ++b) {
|
||
if (!(bytes_todo & (1 << b)))
|
||
continue;
|
||
|
||
delay = byte_delay[b];
|
||
if (errors & (1 << b)) { // yes, an error
|
||
debug(" byte %d delay %2d Errors\n",
|
||
b, delay);
|
||
byte_passed[b] = 0;
|
||
} else { // no error
|
||
byte_passed[b] += 1;
|
||
// Look for consecutive working settings
|
||
if (byte_passed[b] == (1 + wl_offset)) {
|
||
debug(" byte %d delay %2d FULLY Passed\n",
|
||
b, delay);
|
||
if (wl_offset == 1) {
|
||
byte_test_status[b] =
|
||
WL_SOFTWARE;
|
||
} else if (wl_offset == 0) {
|
||
byte_test_status[b] =
|
||
WL_SOFTWARE1;
|
||
}
|
||
|
||
// test no longer, remove
|
||
// from byte mask this pass
|
||
bytemask &= ~(0xffULL <<
|
||
(8 * b));
|
||
// remove completely from
|
||
// concern
|
||
bytes_todo &= ~(1 << b);
|
||
// on to the next byte, bypass
|
||
// delay updating!!
|
||
continue;
|
||
} else {
|
||
debug(" byte %d delay %2d Passed\n",
|
||
b, delay);
|
||
}
|
||
}
|
||
|
||
// error or no, here we move to the next delay
|
||
// value for this byte, unless done all delays
|
||
// only a byte that has "fully passed" will
|
||
// bypass around this,
|
||
delay += 2;
|
||
if (delay < 32) {
|
||
upd_wl_rank(&wl_rank, b, delay);
|
||
debug(" byte %d delay %2d New\n",
|
||
b, delay);
|
||
byte_delay[b] = delay;
|
||
} else {
|
||
// reached max delay, done with this
|
||
// byte
|
||
debug(" byte %d delay %2d Exhausted\n",
|
||
b, delay);
|
||
// test no longer, remove from byte
|
||
// mask this pass
|
||
bytemask &= ~(0xffULL << (8 * b));
|
||
}
|
||
} /* for (b = 0; b < 8; ++b) */
|
||
debug("End of for-loop: bytemask 0x%llx\n", bytemask);
|
||
} /* while (bytemask != 0) */
|
||
}
|
||
|
||
for (b = 0; b < 8; ++b) {
|
||
// any bytes left in bytes_todo did not pass
|
||
if (bytes_todo & (1 << b)) {
|
||
union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank;
|
||
|
||
/*
|
||
* Last resort. Use Rlevel settings to estimate
|
||
* Wlevel if software write-leveling fails
|
||
*/
|
||
debug("Using RLEVEL as WLEVEL estimate for byte %d\n",
|
||
b);
|
||
lmc_rlevel_rank.u64 =
|
||
lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx,
|
||
if_num));
|
||
rlevel_to_wlevel(&lmc_rlevel_rank, &wl_rank, b);
|
||
}
|
||
} /* for (b = 0; b < 8; ++b) */
|
||
}
|
||
|
||
static int lmc_sw_write_leveling(struct ddr_priv *priv)
|
||
{
|
||
/* Try to determine/optimize write-level delays experimentally. */
|
||
union cvmx_lmcx_wlevel_rankx wl_rank_hw_res;
|
||
union cvmx_lmcx_config cfg;
|
||
int rankx;
|
||
int byte;
|
||
char *s;
|
||
int i;
|
||
|
||
int active_rank;
|
||
int sw_wl_enable = 1; /* FIX... Should be customizable. */
|
||
int interfaces;
|
||
|
||
static const char * const wl_status_strings[] = {
|
||
"(e)",
|
||
" ",
|
||
" ",
|
||
"(1)"
|
||
};
|
||
|
||
// FIXME: make HW-assist the default now?
|
||
int sw_wl_hw_default = SW_WLEVEL_HW_DEFAULT;
|
||
int dram_connection = c_cfg->dram_connection;
|
||
|
||
s = lookup_env(priv, "ddr_sw_wlevel_hw");
|
||
if (s)
|
||
sw_wl_hw_default = !!strtoul(s, NULL, 0);
|
||
if (!if_64b) // must use SW algo if 32-bit mode
|
||
sw_wl_hw_default = 0;
|
||
|
||
// can never use hw-assist
|
||
if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X))
|
||
sw_wl_hw_default = 0;
|
||
|
||
s = lookup_env(priv, "ddr_software_wlevel");
|
||
if (s)
|
||
sw_wl_enable = strtoul(s, NULL, 0);
|
||
|
||
s = lookup_env(priv, "ddr%d_dram_connection", if_num);
|
||
if (s)
|
||
dram_connection = !!strtoul(s, NULL, 0);
|
||
|
||
cvmx_rng_enable();
|
||
|
||
/*
|
||
* Get the measured_vref setting from the config, check for an
|
||
* override...
|
||
*/
|
||
/* NOTE: measured_vref=1 (ON) means force use of MEASURED vref... */
|
||
// NOTE: measured VREF can only be done for DDR4
|
||
if (ddr_type == DDR4_DRAM) {
|
||
measured_vref_flag = c_cfg->measured_vref;
|
||
s = lookup_env(priv, "ddr_measured_vref");
|
||
if (s)
|
||
measured_vref_flag = !!strtoul(s, NULL, 0);
|
||
} else {
|
||
measured_vref_flag = 0; // OFF for DDR3
|
||
}
|
||
|
||
/*
|
||
* Ensure disabled ECC for DRAM tests using the SW algo, else leave
|
||
* it untouched
|
||
*/
|
||
if (!sw_wl_hw_default) {
|
||
cfg.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
|
||
cfg.cn78xx.ecc_ena = 0;
|
||
lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), cfg.u64);
|
||
}
|
||
|
||
/*
|
||
* We need to track absolute rank number, as well as how many
|
||
* active ranks we have. Two single rank DIMMs show up as
|
||
* ranks 0 and 2, but only 2 ranks are active.
|
||
*/
|
||
active_rank = 0;
|
||
|
||
interfaces = __builtin_popcount(if_mask);
|
||
|
||
for (rankx = 0; rankx < dimm_count * 4; rankx++) {
|
||
final_vref_range = 0;
|
||
start_vref_val = 0;
|
||
computed_final_vref_val = -1;
|
||
sw_wl_rank_status = WL_HARDWARE;
|
||
sw_wl_failed = 0;
|
||
sw_wl_hw = sw_wl_hw_default;
|
||
|
||
if (!sw_wl_enable)
|
||
break;
|
||
|
||
if (!(rank_mask & (1 << rankx)))
|
||
continue;
|
||
|
||
debug("N%d.LMC%d.R%d: Performing Software Write-Leveling %s\n",
|
||
node, if_num, rankx,
|
||
(sw_wl_hw) ? "with H/W assist" :
|
||
"with S/W algorithm");
|
||
|
||
if (ddr_type == DDR4_DRAM && num_ranks != 4) {
|
||
// always compute when we can...
|
||
computed_final_vref_val =
|
||
compute_vref_val(priv, if_num, rankx, dimm_count,
|
||
num_ranks, imp_val,
|
||
is_stacked_die, dram_connection);
|
||
|
||
// but only use it if allowed
|
||
if (!measured_vref_flag) {
|
||
// skip all the measured vref processing,
|
||
// just the final setting
|
||
start_vref_val = VREF_FINAL;
|
||
}
|
||
}
|
||
|
||
/* Save off the h/w wl results */
|
||
wl_rank_hw_res.u64 = lmc_rd(priv,
|
||
CVMX_LMCX_WLEVEL_RANKX(rankx,
|
||
if_num));
|
||
|
||
vref_val_count = 0;
|
||
vref_val_start = 0;
|
||
best_vref_val_count = 0;
|
||
best_vref_val_start = 0;
|
||
|
||
/* Loop one extra time using the Final vref value. */
|
||
for (vref_val = start_vref_val; vref_val < VREF_LIMIT;
|
||
++vref_val) {
|
||
if (ddr_type == DDR4_DRAM)
|
||
ddr4_vref_loop(priv, rankx);
|
||
|
||
/* Restore the saved value */
|
||
wl_rank.u64 = wl_rank_hw_res.u64;
|
||
|
||
for (byte = 0; byte < 9; ++byte)
|
||
byte_test_status[byte] = WL_ESTIMATED;
|
||
|
||
if (wl_mask_err == 0) {
|
||
/*
|
||
* Determine address of DRAM to test for
|
||
* pass 1 of software write leveling.
|
||
*/
|
||
rank_addr = active_rank *
|
||
(1ull << (pbank_lsb - bunk_enable +
|
||
(interfaces / 2)));
|
||
|
||
/*
|
||
* Adjust address for boot bus hole in memory
|
||
* map.
|
||
*/
|
||
if (rank_addr > 0x10000000)
|
||
rank_addr += 0x10000000;
|
||
|
||
debug("N%d.LMC%d.R%d: Active Rank %d Address: 0x%llx\n",
|
||
node, if_num, rankx, active_rank,
|
||
rank_addr);
|
||
|
||
// start parallel write-leveling block for
|
||
// delay high-order bits
|
||
errors = 0;
|
||
no_errors_count = 0;
|
||
sum_dram_dclk = 0;
|
||
sum_dram_ops = 0;
|
||
|
||
if (if_64b) {
|
||
bytes_todo = (sw_wl_hw) ?
|
||
if_bytemask : 0xFF;
|
||
bytemask = ~0ULL;
|
||
} else {
|
||
// 32-bit, must be using SW algo,
|
||
// only data bytes
|
||
bytes_todo = 0x0f;
|
||
bytemask = 0x00000000ffffffffULL;
|
||
}
|
||
|
||
for (byte = 0; byte < 9; ++byte) {
|
||
if (!(bytes_todo & (1 << byte))) {
|
||
byte_delay[byte] = 0;
|
||
} else {
|
||
byte_delay[byte] =
|
||
get_wl_rank(&wl_rank, byte);
|
||
}
|
||
} /* for (byte = 0; byte < 9; ++byte) */
|
||
|
||
do {
|
||
lmc_sw_write_leveling_loop(priv, rankx);
|
||
} while (no_errors_count <
|
||
WL_MIN_NO_ERRORS_COUNT);
|
||
|
||
if (!sw_wl_hw) {
|
||
u64 percent_x10;
|
||
|
||
if (sum_dram_dclk == 0)
|
||
sum_dram_dclk = 1;
|
||
percent_x10 = sum_dram_ops * 1000 /
|
||
sum_dram_dclk;
|
||
debug("N%d.LMC%d.R%d: ops %llu, cycles %llu, used %llu.%llu%%\n",
|
||
node, if_num, rankx, sum_dram_ops,
|
||
sum_dram_dclk, percent_x10 / 10,
|
||
percent_x10 % 10);
|
||
}
|
||
if (errors) {
|
||
debug("End WLEV_64 while loop: vref_val %d(0x%x), errors 0x%02x\n",
|
||
vref_val, vref_val, errors);
|
||
}
|
||
// end parallel write-leveling block for
|
||
// delay high-order bits
|
||
|
||
// if we used HW-assist, we did the ECC byte
|
||
// when approp.
|
||
if (sw_wl_hw) {
|
||
if (wl_print) {
|
||
debug("N%d.LMC%d.R%d: HW-assisted SWL - ECC estimate not needed.\n",
|
||
node, if_num, rankx);
|
||
}
|
||
goto no_ecc_estimate;
|
||
}
|
||
|
||
if ((if_bytemask & 0xff) == 0xff) {
|
||
if (use_ecc) {
|
||
sw_write_lvl_use_ecc(priv,
|
||
rankx);
|
||
} else {
|
||
/* H/W delay value */
|
||
byte_test_status[8] =
|
||
WL_HARDWARE;
|
||
/* ECC is not used */
|
||
wl_rank.s.byte8 =
|
||
wl_rank.s.byte0;
|
||
}
|
||
} else {
|
||
if (use_ecc) {
|
||
/* Estimate the ECC byte dly */
|
||
// add hi-order to b4
|
||
wl_rank.s.byte4 |=
|
||
(wl_rank.s.byte3 &
|
||
0x38);
|
||
if ((wl_rank.s.byte4 & 0x06) <
|
||
(wl_rank.s.byte3 & 0x06)) {
|
||
// must be next clock
|
||
wl_rank.s.byte4 += 8;
|
||
}
|
||
} else {
|
||
/* ECC is not used */
|
||
wl_rank.s.byte4 =
|
||
wl_rank.s.byte0;
|
||
}
|
||
|
||
/*
|
||
* Change the status if s/w adjusted
|
||
* the delay
|
||
*/
|
||
/* Estimated delay */
|
||
byte_test_status[4] = WL_SOFTWARE;
|
||
} /* if ((if_bytemask & 0xff) == 0xff) */
|
||
} /* if (wl_mask_err == 0) */
|
||
|
||
no_ecc_estimate:
|
||
|
||
bytes_failed = 0;
|
||
for (byte = 0; byte < 9; ++byte) {
|
||
/* Don't accumulate errors for untested bytes */
|
||
if (!(if_bytemask & (1 << byte)))
|
||
continue;
|
||
bytes_failed +=
|
||
(byte_test_status[byte] == WL_ESTIMATED);
|
||
}
|
||
|
||
/* vref training loop is only used for DDR4 */
|
||
if (ddr_type != DDR4_DRAM)
|
||
break;
|
||
|
||
if (bytes_failed == 0) {
|
||
if (vref_val_count == 0)
|
||
vref_val_start = vref_val;
|
||
|
||
++vref_val_count;
|
||
if (vref_val_count > best_vref_val_count) {
|
||
best_vref_val_count = vref_val_count;
|
||
best_vref_val_start = vref_val_start;
|
||
debug("N%d.LMC%d.R%d: vref Training (%2d) : 0x%02x <----- ???? -----> 0x%02x\n",
|
||
node, if_num, rankx, vref_val,
|
||
best_vref_val_start,
|
||
best_vref_val_start +
|
||
best_vref_val_count - 1);
|
||
}
|
||
} else {
|
||
vref_val_count = 0;
|
||
debug("N%d.LMC%d.R%d: vref Training (%2d) : failed\n",
|
||
node, if_num, rankx, vref_val);
|
||
}
|
||
}
|
||
|
||
/*
|
||
* Determine address of DRAM to test for software write
|
||
* leveling.
|
||
*/
|
||
rank_addr = active_rank * (1ull << (pbank_lsb - bunk_enable +
|
||
(interfaces / 2)));
|
||
/* Adjust address for boot bus hole in memory map. */
|
||
if (rank_addr > 0x10000000)
|
||
rank_addr += 0x10000000;
|
||
|
||
debug("Rank Address: 0x%llx\n", rank_addr);
|
||
|
||
if (bytes_failed) {
|
||
// FIXME? the big hammer, did not even try SW WL pass2,
|
||
// assume only chip reset will help
|
||
debug("N%d.LMC%d.R%d: S/W write-leveling pass 1 failed\n",
|
||
node, if_num, rankx);
|
||
sw_wl_failed = 1;
|
||
} else { /* if (bytes_failed) */
|
||
// SW WL pass 1 was OK, write the settings
|
||
lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num),
|
||
wl_rank.u64);
|
||
wl_rank.u64 = lmc_rd(priv,
|
||
CVMX_LMCX_WLEVEL_RANKX(rankx,
|
||
if_num));
|
||
|
||
// do validity check on the delay values by running
|
||
// the test 1 more time...
|
||
// FIXME: we really need to check the ECC byte setting
|
||
// here as well, so we need to enable ECC for this test!
|
||
// if there are any errors, claim SW WL failure
|
||
u64 datamask = (if_64b) ? 0xffffffffffffffffULL :
|
||
0x00000000ffffffffULL;
|
||
int errors;
|
||
|
||
// do the test
|
||
if (sw_wl_hw) {
|
||
errors = run_best_hw_patterns(priv, if_num,
|
||
rank_addr,
|
||
DBTRAIN_TEST,
|
||
NULL) & 0xff;
|
||
} else {
|
||
errors = test_dram_byte64(priv, if_num,
|
||
rank_addr, datamask,
|
||
NULL);
|
||
}
|
||
|
||
if (errors) {
|
||
debug("N%d.LMC%d.R%d: Wlevel Rank Final Test errors 0x%03x\n",
|
||
node, if_num, rankx, errors);
|
||
sw_wl_failed = 1;
|
||
}
|
||
} /* if (bytes_failed) */
|
||
|
||
// FIXME? dump the WL settings, so we get more of a clue
|
||
// as to what happened where
|
||
debug("N%d.LMC%d.R%d: Wlevel Rank %#4x, 0x%016llX : %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %s\n",
|
||
node, if_num, rankx, wl_rank.s.status, wl_rank.u64,
|
||
wl_rank.s.byte8, wl_status_strings[byte_test_status[8]],
|
||
wl_rank.s.byte7, wl_status_strings[byte_test_status[7]],
|
||
wl_rank.s.byte6, wl_status_strings[byte_test_status[6]],
|
||
wl_rank.s.byte5, wl_status_strings[byte_test_status[5]],
|
||
wl_rank.s.byte4, wl_status_strings[byte_test_status[4]],
|
||
wl_rank.s.byte3, wl_status_strings[byte_test_status[3]],
|
||
wl_rank.s.byte2, wl_status_strings[byte_test_status[2]],
|
||
wl_rank.s.byte1, wl_status_strings[byte_test_status[1]],
|
||
wl_rank.s.byte0, wl_status_strings[byte_test_status[0]],
|
||
(sw_wl_rank_status == WL_HARDWARE) ? "" : "(s)");
|
||
|
||
// finally, check for fatal conditions: either chip reset
|
||
// right here, or return error flag
|
||
if ((ddr_type == DDR4_DRAM && best_vref_val_count == 0) ||
|
||
sw_wl_failed) {
|
||
if (!ddr_disable_chip_reset) { // do chip RESET
|
||
printf("N%d.LMC%d.R%d: INFO: Short memory test indicates a retry is needed. Resetting node...\n",
|
||
node, if_num, rankx);
|
||
mdelay(500);
|
||
do_reset(NULL, 0, 0, NULL);
|
||
} else {
|
||
// return error flag so LMC init can be retried.
|
||
debug("N%d.LMC%d.R%d: INFO: Short memory test indicates a retry is needed. Restarting LMC init...\n",
|
||
node, if_num, rankx);
|
||
return -EAGAIN; // 0 indicates restart possible.
|
||
}
|
||
}
|
||
active_rank++;
|
||
}
|
||
|
||
for (rankx = 0; rankx < dimm_count * 4; rankx++) {
|
||
int parameter_set = 0;
|
||
u64 value;
|
||
|
||
if (!(rank_mask & (1 << rankx)))
|
||
continue;
|
||
|
||
wl_rank.u64 = lmc_rd(priv, CVMX_LMCX_WLEVEL_RANKX(rankx,
|
||
if_num));
|
||
|
||
for (i = 0; i < 9; ++i) {
|
||
s = lookup_env(priv, "ddr%d_wlevel_rank%d_byte%d",
|
||
if_num, rankx, i);
|
||
if (s) {
|
||
parameter_set |= 1;
|
||
value = strtoul(s, NULL, 0);
|
||
|
||
upd_wl_rank(&wl_rank, i, value);
|
||
}
|
||
}
|
||
|
||
s = lookup_env_ull(priv, "ddr%d_wlevel_rank%d", if_num, rankx);
|
||
if (s) {
|
||
parameter_set |= 1;
|
||
value = strtoull(s, NULL, 0);
|
||
wl_rank.u64 = value;
|
||
}
|
||
|
||
if (parameter_set) {
|
||
lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num),
|
||
wl_rank.u64);
|
||
wl_rank.u64 =
|
||
lmc_rd(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num));
|
||
display_wl(if_num, wl_rank, rankx);
|
||
}
|
||
// if there are unused entries to be filled
|
||
if ((rank_mask & 0x0F) != 0x0F) {
|
||
if (rankx < 3) {
|
||
debug("N%d.LMC%d.R%d: checking for WLEVEL_RANK unused entries.\n",
|
||
node, if_num, rankx);
|
||
|
||
// if rank 0, write ranks 1 and 2 here if empty
|
||
if (rankx == 0) {
|
||
// check that rank 1 is empty
|
||
if (!(rank_mask & (1 << 1))) {
|
||
debug("N%d.LMC%d.R%d: writing WLEVEL_RANK unused entry R%d.\n",
|
||
node, if_num, rankx, 1);
|
||
lmc_wr(priv,
|
||
CVMX_LMCX_WLEVEL_RANKX(1,
|
||
if_num),
|
||
wl_rank.u64);
|
||
}
|
||
|
||
// check that rank 2 is empty
|
||
if (!(rank_mask & (1 << 2))) {
|
||
debug("N%d.LMC%d.R%d: writing WLEVEL_RANK unused entry R%d.\n",
|
||
node, if_num, rankx, 2);
|
||
lmc_wr(priv,
|
||
CVMX_LMCX_WLEVEL_RANKX(2,
|
||
if_num),
|
||
wl_rank.u64);
|
||
}
|
||
}
|
||
|
||
// if rank 0, 1 or 2, write rank 3 here if empty
|
||
// check that rank 3 is empty
|
||
if (!(rank_mask & (1 << 3))) {
|
||
debug("N%d.LMC%d.R%d: writing WLEVEL_RANK unused entry R%d.\n",
|
||
node, if_num, rankx, 3);
|
||
lmc_wr(priv,
|
||
CVMX_LMCX_WLEVEL_RANKX(3,
|
||
if_num),
|
||
wl_rank.u64);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
/* Enable 32-bit mode if required. */
|
||
cfg.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
|
||
cfg.cn78xx.mode32b = (!if_64b);
|
||
debug("%-45s : %d\n", "MODE32B", cfg.cn78xx.mode32b);
|
||
|
||
/* Restore the ECC configuration */
|
||
if (!sw_wl_hw_default)
|
||
cfg.cn78xx.ecc_ena = use_ecc;
|
||
|
||
lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), cfg.u64);
|
||
|
||
return 0;
|
||
}
|
||
|
||
static void lmc_dll(struct ddr_priv *priv)
|
||
{
|
||
union cvmx_lmcx_dll_ctl3 ddr_dll_ctl3;
|
||
int setting[9];
|
||
int i;
|
||
|
||
ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));
|
||
|
||
for (i = 0; i < 9; ++i) {
|
||
SET_DDR_DLL_CTL3(dll90_byte_sel, ENCODE_DLL90_BYTE_SEL(i));
|
||
lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
|
||
lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));
|
||
ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));
|
||
setting[i] = GET_DDR_DLL_CTL3(dll90_setting);
|
||
debug("%d. LMC%d_DLL_CTL3[%d] = %016llx %d\n", i, if_num,
|
||
GET_DDR_DLL_CTL3(dll90_byte_sel), ddr_dll_ctl3.u64,
|
||
setting[i]);
|
||
}
|
||
|
||
debug("N%d.LMC%d: %-36s : %5d %5d %5d %5d %5d %5d %5d %5d %5d\n",
|
||
node, if_num, "DLL90 Setting 8:0",
|
||
setting[8], setting[7], setting[6], setting[5], setting[4],
|
||
setting[3], setting[2], setting[1], setting[0]);
|
||
|
||
process_custom_dll_offsets(priv, if_num, "ddr_dll_write_offset",
|
||
c_cfg->dll_write_offset,
|
||
"ddr%d_dll_write_offset_byte%d", 1);
|
||
process_custom_dll_offsets(priv, if_num, "ddr_dll_read_offset",
|
||
c_cfg->dll_read_offset,
|
||
"ddr%d_dll_read_offset_byte%d", 2);
|
||
}
|
||
|
||
#define SLOT_CTL_INCR(csr, chip, field, incr) \
|
||
csr.chip.field = (csr.chip.field < (64 - incr)) ? \
|
||
(csr.chip.field + incr) : 63
|
||
|
||
#define INCR(csr, chip, field, incr) \
|
||
csr.chip.field = (csr.chip.field < (64 - incr)) ? \
|
||
(csr.chip.field + incr) : 63
|
||
|
||
static void lmc_workaround_2(struct ddr_priv *priv)
|
||
{
|
||
/* Workaround Errata 21063 */
|
||
if (octeon_is_cpuid(OCTEON_CN78XX) ||
|
||
octeon_is_cpuid(OCTEON_CN70XX_PASS1_X)) {
|
||
union cvmx_lmcx_slot_ctl0 slot_ctl0;
|
||
union cvmx_lmcx_slot_ctl1 slot_ctl1;
|
||
union cvmx_lmcx_slot_ctl2 slot_ctl2;
|
||
union cvmx_lmcx_ext_config ext_cfg;
|
||
|
||
slot_ctl0.u64 = lmc_rd(priv, CVMX_LMCX_SLOT_CTL0(if_num));
|
||
slot_ctl1.u64 = lmc_rd(priv, CVMX_LMCX_SLOT_CTL1(if_num));
|
||
slot_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_SLOT_CTL2(if_num));
|
||
|
||
ext_cfg.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG(if_num));
|
||
|
||
/* When ext_cfg.s.read_ena_bprch is set add 1 */
|
||
if (ext_cfg.s.read_ena_bprch) {
|
||
SLOT_CTL_INCR(slot_ctl0, cn78xx, r2w_init, 1);
|
||
SLOT_CTL_INCR(slot_ctl0, cn78xx, r2w_l_init, 1);
|
||
SLOT_CTL_INCR(slot_ctl1, cn78xx, r2w_xrank_init, 1);
|
||
SLOT_CTL_INCR(slot_ctl2, cn78xx, r2w_xdimm_init, 1);
|
||
}
|
||
|
||
/* Always add 2 */
|
||
SLOT_CTL_INCR(slot_ctl1, cn78xx, w2r_xrank_init, 2);
|
||
SLOT_CTL_INCR(slot_ctl2, cn78xx, w2r_xdimm_init, 2);
|
||
|
||
lmc_wr(priv, CVMX_LMCX_SLOT_CTL0(if_num), slot_ctl0.u64);
|
||
lmc_wr(priv, CVMX_LMCX_SLOT_CTL1(if_num), slot_ctl1.u64);
|
||
lmc_wr(priv, CVMX_LMCX_SLOT_CTL2(if_num), slot_ctl2.u64);
|
||
}
|
||
|
||
/* Workaround Errata 21216 */
|
||
if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) ||
|
||
octeon_is_cpuid(OCTEON_CN70XX_PASS1_X)) {
|
||
union cvmx_lmcx_slot_ctl1 slot_ctl1;
|
||
union cvmx_lmcx_slot_ctl2 slot_ctl2;
|
||
|
||
slot_ctl1.u64 = lmc_rd(priv, CVMX_LMCX_SLOT_CTL1(if_num));
|
||
slot_ctl1.cn78xx.w2w_xrank_init =
|
||
max(10, (int)slot_ctl1.cn78xx.w2w_xrank_init);
|
||
lmc_wr(priv, CVMX_LMCX_SLOT_CTL1(if_num), slot_ctl1.u64);
|
||
|
||
slot_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_SLOT_CTL2(if_num));
|
||
slot_ctl2.cn78xx.w2w_xdimm_init =
|
||
max(10, (int)slot_ctl2.cn78xx.w2w_xdimm_init);
|
||
lmc_wr(priv, CVMX_LMCX_SLOT_CTL2(if_num), slot_ctl2.u64);
|
||
}
|
||
}
|
||
|
||
static void lmc_final(struct ddr_priv *priv)
|
||
{
|
||
/*
|
||
* 4.8.11 Final LMC Initialization
|
||
*
|
||
* Early LMC initialization, LMC write-leveling, and LMC read-leveling
|
||
* must be completed prior to starting this final LMC initialization.
|
||
*
|
||
* LMC hardware updates the LMC(0)_SLOT_CTL0, LMC(0)_SLOT_CTL1,
|
||
* LMC(0)_SLOT_CTL2 CSRs with minimum values based on the selected
|
||
* readleveling and write-leveling settings. Software should not write
|
||
* the final LMC(0)_SLOT_CTL0, LMC(0)_SLOT_CTL1, and LMC(0)_SLOT_CTL2
|
||
* values until after the final read-leveling and write-leveling
|
||
* settings are written.
|
||
*
|
||
* Software must ensure the LMC(0)_SLOT_CTL0, LMC(0)_SLOT_CTL1, and
|
||
* LMC(0)_SLOT_CTL2 CSR values are appropriate for this step. These CSRs
|
||
* select the minimum gaps between read operations and write operations
|
||
* of various types.
|
||
*
|
||
* Software must not reduce the values in these CSR fields below the
|
||
* values previously selected by the LMC hardware (during write-leveling
|
||
* and read-leveling steps above).
|
||
*
|
||
* All sections in this chapter may be used to derive proper settings
|
||
* for these registers.
|
||
*
|
||
* For minimal read latency, L2C_CTL[EF_ENA,EF_CNT] should be programmed
|
||
* properly. This should be done prior to the first read.
|
||
*/
|
||
|
||
/* Clear any residual ECC errors */
|
||
int num_tads = 1;
|
||
int tad;
|
||
int num_mcis = 1;
|
||
int mci;
|
||
|
||
if (octeon_is_cpuid(OCTEON_CN78XX)) {
|
||
num_tads = 8;
|
||
num_mcis = 4;
|
||
} else if (octeon_is_cpuid(OCTEON_CN70XX)) {
|
||
num_tads = 1;
|
||
num_mcis = 1;
|
||
} else if (octeon_is_cpuid(OCTEON_CN73XX) ||
|
||
octeon_is_cpuid(OCTEON_CNF75XX)) {
|
||
num_tads = 4;
|
||
num_mcis = 3;
|
||
}
|
||
|
||
lmc_wr(priv, CVMX_LMCX_INT(if_num), -1ULL);
|
||
lmc_rd(priv, CVMX_LMCX_INT(if_num));
|
||
|
||
for (tad = 0; tad < num_tads; tad++) {
|
||
l2c_wr(priv, CVMX_L2C_TADX_INT(tad),
|
||
l2c_rd(priv, CVMX_L2C_TADX_INT(tad)));
|
||
debug("%-45s : (%d) 0x%08llx\n", "CVMX_L2C_TAD_INT", tad,
|
||
l2c_rd(priv, CVMX_L2C_TADX_INT(tad)));
|
||
}
|
||
|
||
for (mci = 0; mci < num_mcis; mci++) {
|
||
l2c_wr(priv, CVMX_L2C_MCIX_INT(mci),
|
||
l2c_rd(priv, CVMX_L2C_MCIX_INT(mci)));
|
||
debug("%-45s : (%d) 0x%08llx\n", "L2C_MCI_INT", mci,
|
||
l2c_rd(priv, CVMX_L2C_MCIX_INT(mci)));
|
||
}
|
||
|
||
debug("%-45s : 0x%08llx\n", "LMC_INT",
|
||
lmc_rd(priv, CVMX_LMCX_INT(if_num)));
|
||
}
|
||
|
||
static void lmc_scrambling(struct ddr_priv *priv)
|
||
{
|
||
// Make sure scrambling is disabled during init...
|
||
union cvmx_lmcx_control ctrl;
|
||
union cvmx_lmcx_scramble_cfg0 lmc_scramble_cfg0;
|
||
union cvmx_lmcx_scramble_cfg1 lmc_scramble_cfg1;
|
||
union cvmx_lmcx_scramble_cfg2 lmc_scramble_cfg2;
|
||
union cvmx_lmcx_ns_ctl lmc_ns_ctl;
|
||
int use_scramble = 0; // default OFF
|
||
char *s;
|
||
|
||
ctrl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
|
||
lmc_scramble_cfg0.u64 = lmc_rd(priv, CVMX_LMCX_SCRAMBLE_CFG0(if_num));
|
||
lmc_scramble_cfg1.u64 = lmc_rd(priv, CVMX_LMCX_SCRAMBLE_CFG1(if_num));
|
||
lmc_scramble_cfg2.u64 = 0; // quiet compiler
|
||
if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) {
|
||
lmc_scramble_cfg2.u64 =
|
||
lmc_rd(priv, CVMX_LMCX_SCRAMBLE_CFG2(if_num));
|
||
}
|
||
lmc_ns_ctl.u64 = lmc_rd(priv, CVMX_LMCX_NS_CTL(if_num));
|
||
|
||
s = lookup_env_ull(priv, "ddr_use_scramble");
|
||
if (s)
|
||
use_scramble = simple_strtoull(s, NULL, 0);
|
||
|
||
/* Generate random values if scrambling is needed */
|
||
if (use_scramble) {
|
||
lmc_scramble_cfg0.u64 = cvmx_rng_get_random64();
|
||
lmc_scramble_cfg1.u64 = cvmx_rng_get_random64();
|
||
lmc_scramble_cfg2.u64 = cvmx_rng_get_random64();
|
||
lmc_ns_ctl.s.ns_scramble_dis = 0;
|
||
lmc_ns_ctl.s.adr_offset = 0;
|
||
ctrl.s.scramble_ena = 1;
|
||
}
|
||
|
||
s = lookup_env_ull(priv, "ddr_scramble_cfg0");
|
||
if (s) {
|
||
lmc_scramble_cfg0.u64 = simple_strtoull(s, NULL, 0);
|
||
ctrl.s.scramble_ena = 1;
|
||
}
|
||
debug("%-45s : 0x%016llx\n", "LMC_SCRAMBLE_CFG0",
|
||
lmc_scramble_cfg0.u64);
|
||
|
||
lmc_wr(priv, CVMX_LMCX_SCRAMBLE_CFG0(if_num), lmc_scramble_cfg0.u64);
|
||
|
||
s = lookup_env_ull(priv, "ddr_scramble_cfg1");
|
||
if (s) {
|
||
lmc_scramble_cfg1.u64 = simple_strtoull(s, NULL, 0);
|
||
ctrl.s.scramble_ena = 1;
|
||
}
|
||
debug("%-45s : 0x%016llx\n", "LMC_SCRAMBLE_CFG1",
|
||
lmc_scramble_cfg1.u64);
|
||
lmc_wr(priv, CVMX_LMCX_SCRAMBLE_CFG1(if_num), lmc_scramble_cfg1.u64);
|
||
|
||
if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) {
|
||
s = lookup_env_ull(priv, "ddr_scramble_cfg2");
|
||
if (s) {
|
||
lmc_scramble_cfg2.u64 = simple_strtoull(s, NULL, 0);
|
||
ctrl.s.scramble_ena = 1;
|
||
}
|
||
debug("%-45s : 0x%016llx\n", "LMC_SCRAMBLE_CFG2",
|
||
lmc_scramble_cfg1.u64);
|
||
lmc_wr(priv, CVMX_LMCX_SCRAMBLE_CFG2(if_num),
|
||
lmc_scramble_cfg2.u64);
|
||
}
|
||
|
||
s = lookup_env_ull(priv, "ddr_ns_ctl");
|
||
if (s)
|
||
lmc_ns_ctl.u64 = simple_strtoull(s, NULL, 0);
|
||
debug("%-45s : 0x%016llx\n", "LMC_NS_CTL", lmc_ns_ctl.u64);
|
||
lmc_wr(priv, CVMX_LMCX_NS_CTL(if_num), lmc_ns_ctl.u64);
|
||
|
||
lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctrl.u64);
|
||
}
|
||
|
||
struct rl_score {
|
||
u64 setting;
|
||
int score;
|
||
};
|
||
|
||
static union cvmx_lmcx_rlevel_rankx rl_rank __section(".data");
|
||
static union cvmx_lmcx_rlevel_ctl rl_ctl __section(".data");
|
||
static unsigned char rodt_ctl __section(".data");
|
||
|
||
static int rl_rodt_err __section(".data");
|
||
static unsigned char rtt_nom __section(".data");
|
||
static unsigned char rtt_idx __section(".data");
|
||
static char min_rtt_nom_idx __section(".data");
|
||
static char max_rtt_nom_idx __section(".data");
|
||
static char min_rodt_ctl __section(".data");
|
||
static char max_rodt_ctl __section(".data");
|
||
static int rl_dbg_loops __section(".data");
|
||
static unsigned char save_ddr2t __section(".data");
|
||
static int rl_samples __section(".data");
|
||
static char rl_compute __section(".data");
|
||
static char saved_ddr__ptune __section(".data");
|
||
static char saved_ddr__ntune __section(".data");
|
||
static char rl_comp_offs __section(".data");
|
||
static char saved_int_zqcs_dis __section(".data");
|
||
static int max_adj_rl_del_inc __section(".data");
|
||
static int print_nom_ohms __section(".data");
|
||
static int rl_print __section(".data");
|
||
|
||
#ifdef ENABLE_HARDCODED_RLEVEL
|
||
static char part_number[21] __section(".data");
|
||
#endif /* ENABLE_HARDCODED_RLEVEL */
|
||
|
||
struct perfect_counts {
|
||
u16 count[9][32]; // 8+ECC by 64 values
|
||
u32 mask[9]; // 8+ECC, bitmask of perfect delays
|
||
};
|
||
|
||
static struct perfect_counts rank_perf[4] __section(".data");
|
||
static struct perfect_counts rodt_perfect_counts __section(".data");
|
||
static int pbm_lowsum_limit __section(".data");
|
||
// FIXME: PBM skip for RODT 240 and 34
|
||
static u32 pbm_rodt_skip __section(".data");
|
||
|
||
// control rank majority processing
|
||
static int disable_rank_majority __section(".data");
|
||
|
||
// default to mask 11b ODDs for DDR4 (except 73xx), else DISABLE
|
||
// for DDR3
|
||
static int enable_rldelay_bump __section(".data");
|
||
static int rldelay_bump_incr __section(".data");
|
||
static int disable_rlv_bump_this_byte __section(".data");
|
||
static u64 value_mask __section(".data");
|
||
|
||
static struct rlevel_byte_data rl_byte[9] __section(".data");
|
||
static int sample_loops __section(".data");
|
||
static int max_samples __section(".data");
|
||
static int rl_rank_errors __section(".data");
|
||
static int rl_mask_err __section(".data");
|
||
static int rl_nonseq_err __section(".data");
|
||
static struct rlevel_bitmask rl_mask[9] __section(".data");
|
||
static int rl_best_rank_score __section(".data");
|
||
|
||
static int rodt_row_skip_mask __section(".data");
|
||
|
||
static void rodt_loop(struct ddr_priv *priv, int rankx, struct rl_score
|
||
rl_score[RTT_NOM_OHMS_COUNT][RODT_OHMS_COUNT][4])
|
||
{
|
||
union cvmx_lmcx_comp_ctl2 cc2;
|
||
const int rl_separate_ab = 1;
|
||
int i;
|
||
|
||
rl_best_rank_score = DEFAULT_BEST_RANK_SCORE;
|
||
rl_rodt_err = 0;
|
||
cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
|
||
cc2.cn78xx.rodt_ctl = rodt_ctl;
|
||
lmc_wr(priv, CVMX_LMCX_COMP_CTL2(if_num), cc2.u64);
|
||
cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
|
||
udelay(1); /* Give it a little time to take affect */
|
||
if (rl_print > 1) {
|
||
debug("Read ODT_CTL : 0x%x (%d ohms)\n",
|
||
cc2.cn78xx.rodt_ctl,
|
||
imp_val->rodt_ohms[cc2.cn78xx.rodt_ctl]);
|
||
}
|
||
|
||
memset(rl_byte, 0, sizeof(rl_byte));
|
||
memset(&rodt_perfect_counts, 0, sizeof(rodt_perfect_counts));
|
||
|
||
// when iter RODT is the target RODT, take more samples...
|
||
max_samples = rl_samples;
|
||
if (rodt_ctl == default_rodt_ctl)
|
||
max_samples += rl_samples + 1;
|
||
|
||
for (sample_loops = 0; sample_loops < max_samples; sample_loops++) {
|
||
int redoing_nonseq_errs = 0;
|
||
|
||
rl_mask_err = 0;
|
||
|
||
if (!(rl_separate_ab && spd_rdimm &&
|
||
ddr_type == DDR4_DRAM)) {
|
||
/* Clear read-level delays */
|
||
lmc_wr(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num), 0);
|
||
|
||
/* read-leveling */
|
||
oct3_ddr3_seq(priv, 1 << rankx, if_num, 1);
|
||
|
||
do {
|
||
rl_rank.u64 =
|
||
lmc_rd(priv,
|
||
CVMX_LMCX_RLEVEL_RANKX(rankx,
|
||
if_num));
|
||
} while (rl_rank.cn78xx.status != 3);
|
||
}
|
||
|
||
rl_rank.u64 =
|
||
lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num));
|
||
|
||
// start bitmask interpretation block
|
||
|
||
memset(rl_mask, 0, sizeof(rl_mask));
|
||
|
||
if (rl_separate_ab && spd_rdimm && ddr_type == DDR4_DRAM) {
|
||
union cvmx_lmcx_rlevel_rankx rl_rank_aside;
|
||
union cvmx_lmcx_modereg_params0 mp0;
|
||
|
||
/* A-side */
|
||
mp0.u64 =
|
||
lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num));
|
||
mp0.s.mprloc = 0; /* MPR Page 0 Location 0 */
|
||
lmc_wr(priv,
|
||
CVMX_LMCX_MODEREG_PARAMS0(if_num),
|
||
mp0.u64);
|
||
|
||
/* Clear read-level delays */
|
||
lmc_wr(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num), 0);
|
||
|
||
/* read-leveling */
|
||
oct3_ddr3_seq(priv, 1 << rankx, if_num, 1);
|
||
|
||
do {
|
||
rl_rank.u64 =
|
||
lmc_rd(priv,
|
||
CVMX_LMCX_RLEVEL_RANKX(rankx,
|
||
if_num));
|
||
} while (rl_rank.cn78xx.status != 3);
|
||
|
||
rl_rank.u64 =
|
||
lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx,
|
||
if_num));
|
||
|
||
rl_rank_aside.u64 = rl_rank.u64;
|
||
|
||
rl_mask[0].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 0);
|
||
rl_mask[1].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 1);
|
||
rl_mask[2].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 2);
|
||
rl_mask[3].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 3);
|
||
rl_mask[8].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 8);
|
||
/* A-side complete */
|
||
|
||
/* B-side */
|
||
mp0.u64 =
|
||
lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num));
|
||
mp0.s.mprloc = 3; /* MPR Page 0 Location 3 */
|
||
lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num),
|
||
mp0.u64);
|
||
|
||
/* Clear read-level delays */
|
||
lmc_wr(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num), 0);
|
||
|
||
/* read-leveling */
|
||
oct3_ddr3_seq(priv, 1 << rankx, if_num, 1);
|
||
|
||
do {
|
||
rl_rank.u64 =
|
||
lmc_rd(priv,
|
||
CVMX_LMCX_RLEVEL_RANKX(rankx,
|
||
if_num));
|
||
} while (rl_rank.cn78xx.status != 3);
|
||
|
||
rl_rank.u64 =
|
||
lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx,
|
||
if_num));
|
||
|
||
rl_mask[4].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 4);
|
||
rl_mask[5].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 5);
|
||
rl_mask[6].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 6);
|
||
rl_mask[7].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 7);
|
||
/* B-side complete */
|
||
|
||
upd_rl_rank(&rl_rank, 0, rl_rank_aside.s.byte0);
|
||
upd_rl_rank(&rl_rank, 1, rl_rank_aside.s.byte1);
|
||
upd_rl_rank(&rl_rank, 2, rl_rank_aside.s.byte2);
|
||
upd_rl_rank(&rl_rank, 3, rl_rank_aside.s.byte3);
|
||
/* ECC A-side */
|
||
upd_rl_rank(&rl_rank, 8, rl_rank_aside.s.byte8);
|
||
|
||
mp0.u64 =
|
||
lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num));
|
||
mp0.s.mprloc = 0; /* MPR Page 0 Location 0 */
|
||
lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num),
|
||
mp0.u64);
|
||
}
|
||
|
||
/*
|
||
* Evaluate the quality of the read-leveling delays from the
|
||
* bitmasks. Also save off a software computed read-leveling
|
||
* mask that may be used later to qualify the delay results
|
||
* from Octeon.
|
||
*/
|
||
for (i = 0; i < (8 + ecc_ena); ++i) {
|
||
int bmerr;
|
||
|
||
if (!(if_bytemask & (1 << i)))
|
||
continue;
|
||
if (!(rl_separate_ab && spd_rdimm &&
|
||
ddr_type == DDR4_DRAM)) {
|
||
rl_mask[i].bm =
|
||
lmc_ddr3_rl_dbg_read(priv, if_num, i);
|
||
}
|
||
bmerr = validate_ddr3_rlevel_bitmask(&rl_mask[i],
|
||
ddr_type);
|
||
rl_mask[i].errs = bmerr;
|
||
rl_mask_err += bmerr;
|
||
// count only the "perfect" bitmasks
|
||
if (ddr_type == DDR4_DRAM && !bmerr) {
|
||
int delay;
|
||
// FIXME: for now, simple filtering:
|
||
// do NOT count PBMs for RODTs in skip mask
|
||
if ((1U << rodt_ctl) & pbm_rodt_skip)
|
||
continue;
|
||
// FIXME: could optimize this a bit?
|
||
delay = get_rl_rank(&rl_rank, i);
|
||
rank_perf[rankx].count[i][delay] += 1;
|
||
rank_perf[rankx].mask[i] |=
|
||
(1ULL << delay);
|
||
rodt_perfect_counts.count[i][delay] += 1;
|
||
rodt_perfect_counts.mask[i] |= (1ULL << delay);
|
||
}
|
||
}
|
||
|
||
/* Set delays for unused bytes to match byte 0. */
|
||
for (i = 0; i < 9; ++i) {
|
||
if (if_bytemask & (1 << i))
|
||
continue;
|
||
upd_rl_rank(&rl_rank, i, rl_rank.s.byte0);
|
||
}
|
||
|
||
/*
|
||
* Save a copy of the byte delays in physical
|
||
* order for sequential evaluation.
|
||
*/
|
||
unpack_rlevel_settings(if_bytemask, ecc_ena, rl_byte, rl_rank);
|
||
|
||
redo_nonseq_errs:
|
||
|
||
rl_nonseq_err = 0;
|
||
if (!disable_sequential_delay_check) {
|
||
for (i = 0; i < 9; ++i)
|
||
rl_byte[i].sqerrs = 0;
|
||
|
||
if ((if_bytemask & 0xff) == 0xff) {
|
||
/*
|
||
* Evaluate delay sequence across the whole
|
||
* range of bytes for standard dimms.
|
||
*/
|
||
/* 1=RDIMM, 5=Mini-RDIMM */
|
||
if (spd_dimm_type == 1 || spd_dimm_type == 5) {
|
||
int reg_adj_del = abs(rl_byte[4].delay -
|
||
rl_byte[5].delay);
|
||
|
||
/*
|
||
* Registered dimm topology routes
|
||
* from the center.
|
||
*/
|
||
rl_nonseq_err +=
|
||
nonseq_del(rl_byte, 0,
|
||
3 + ecc_ena,
|
||
max_adj_rl_del_inc);
|
||
rl_nonseq_err +=
|
||
nonseq_del(rl_byte, 5,
|
||
7 + ecc_ena,
|
||
max_adj_rl_del_inc);
|
||
// byte 5 sqerrs never gets cleared
|
||
// for RDIMMs
|
||
rl_byte[5].sqerrs = 0;
|
||
if (reg_adj_del > 1) {
|
||
/*
|
||
* Assess proximity of bytes on
|
||
* opposite sides of register
|
||
*/
|
||
rl_nonseq_err += (reg_adj_del -
|
||
1) *
|
||
RLEVEL_ADJACENT_DELAY_ERROR;
|
||
// update byte 5 error
|
||
rl_byte[5].sqerrs +=
|
||
(reg_adj_del - 1) *
|
||
RLEVEL_ADJACENT_DELAY_ERROR;
|
||
}
|
||
}
|
||
|
||
/* 2=UDIMM, 6=Mini-UDIMM */
|
||
if (spd_dimm_type == 2 || spd_dimm_type == 6) {
|
||
/*
|
||
* Unbuffered dimm topology routes
|
||
* from end to end.
|
||
*/
|
||
rl_nonseq_err += nonseq_del(rl_byte, 0,
|
||
7 + ecc_ena,
|
||
max_adj_rl_del_inc);
|
||
}
|
||
} else {
|
||
rl_nonseq_err += nonseq_del(rl_byte, 0,
|
||
3 + ecc_ena,
|
||
max_adj_rl_del_inc);
|
||
}
|
||
} /* if (! disable_sequential_delay_check) */
|
||
|
||
rl_rank_errors = rl_mask_err + rl_nonseq_err;
|
||
|
||
// print original sample here only if we are not really
|
||
// averaging or picking best
|
||
// also do not print if we were redoing the NONSEQ score
|
||
// for using COMPUTED
|
||
if (!redoing_nonseq_errs && rl_samples < 2) {
|
||
if (rl_print > 1) {
|
||
display_rl_bm(if_num, rankx, rl_mask, ecc_ena);
|
||
display_rl_bm_scores(if_num, rankx, rl_mask,
|
||
ecc_ena);
|
||
display_rl_seq_scores(if_num, rankx, rl_byte,
|
||
ecc_ena);
|
||
}
|
||
display_rl_with_score(if_num, rl_rank, rankx,
|
||
rl_rank_errors);
|
||
}
|
||
|
||
if (rl_compute) {
|
||
if (!redoing_nonseq_errs) {
|
||
/* Recompute the delays based on the bitmask */
|
||
for (i = 0; i < (8 + ecc_ena); ++i) {
|
||
if (!(if_bytemask & (1 << i)))
|
||
continue;
|
||
|
||
upd_rl_rank(&rl_rank, i,
|
||
compute_ddr3_rlevel_delay(
|
||
rl_mask[i].mstart,
|
||
rl_mask[i].width,
|
||
rl_ctl));
|
||
}
|
||
|
||
/*
|
||
* Override the copy of byte delays with the
|
||
* computed results.
|
||
*/
|
||
unpack_rlevel_settings(if_bytemask, ecc_ena,
|
||
rl_byte, rl_rank);
|
||
|
||
redoing_nonseq_errs = 1;
|
||
goto redo_nonseq_errs;
|
||
|
||
} else {
|
||
/*
|
||
* now print this if already printed the
|
||
* original sample
|
||
*/
|
||
if (rl_samples < 2 || rl_print) {
|
||
display_rl_with_computed(if_num,
|
||
rl_rank, rankx,
|
||
rl_rank_errors);
|
||
}
|
||
}
|
||
} /* if (rl_compute) */
|
||
|
||
// end bitmask interpretation block
|
||
|
||
// if it is a better (lower) score, then keep it
|
||
if (rl_rank_errors < rl_best_rank_score) {
|
||
rl_best_rank_score = rl_rank_errors;
|
||
|
||
// save the new best delays and best errors
|
||
for (i = 0; i < (8 + ecc_ena); ++i) {
|
||
rl_byte[i].best = rl_byte[i].delay;
|
||
rl_byte[i].bestsq = rl_byte[i].sqerrs;
|
||
// save bitmasks and their scores as well
|
||
// xlate UNPACKED index to PACKED index to
|
||
// get from rl_mask
|
||
rl_byte[i].bm = rl_mask[XUP(i, !!ecc_ena)].bm;
|
||
rl_byte[i].bmerrs =
|
||
rl_mask[XUP(i, !!ecc_ena)].errs;
|
||
}
|
||
}
|
||
|
||
rl_rodt_err += rl_rank_errors;
|
||
}
|
||
|
||
/* We recorded the best score across the averaging loops */
|
||
rl_score[rtt_nom][rodt_ctl][rankx].score = rl_best_rank_score;
|
||
|
||
/*
|
||
* Restore the delays from the best fields that go with the best
|
||
* score
|
||
*/
|
||
for (i = 0; i < 9; ++i) {
|
||
rl_byte[i].delay = rl_byte[i].best;
|
||
rl_byte[i].sqerrs = rl_byte[i].bestsq;
|
||
}
|
||
|
||
rl_rank.u64 = lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num));
|
||
|
||
pack_rlevel_settings(if_bytemask, ecc_ena, rl_byte, &rl_rank);
|
||
|
||
if (rl_samples > 1) {
|
||
// restore the "best" bitmasks and their scores for printing
|
||
for (i = 0; i < 9; ++i) {
|
||
if ((if_bytemask & (1 << i)) == 0)
|
||
continue;
|
||
// xlate PACKED index to UNPACKED index to get from
|
||
// rl_byte
|
||
rl_mask[i].bm = rl_byte[XPU(i, !!ecc_ena)].bm;
|
||
rl_mask[i].errs = rl_byte[XPU(i, !!ecc_ena)].bmerrs;
|
||
}
|
||
|
||
// maybe print bitmasks/scores here
|
||
if (rl_print > 1) {
|
||
display_rl_bm(if_num, rankx, rl_mask, ecc_ena);
|
||
display_rl_bm_scores(if_num, rankx, rl_mask, ecc_ena);
|
||
display_rl_seq_scores(if_num, rankx, rl_byte, ecc_ena);
|
||
|
||
display_rl_with_rodt(if_num, rl_rank, rankx,
|
||
rl_score[rtt_nom][rodt_ctl][rankx].score,
|
||
print_nom_ohms,
|
||
imp_val->rodt_ohms[rodt_ctl],
|
||
WITH_RODT_BESTSCORE);
|
||
|
||
debug("-----------\n");
|
||
}
|
||
}
|
||
|
||
rl_score[rtt_nom][rodt_ctl][rankx].setting = rl_rank.u64;
|
||
|
||
// print out the PBMs for the current RODT
|
||
if (ddr_type == DDR4_DRAM && rl_print > 1) { // verbosity?
|
||
// FIXME: change verbosity level after debug complete...
|
||
|
||
for (i = 0; i < 9; i++) {
|
||
u64 temp_mask;
|
||
int num_values;
|
||
|
||
// FIXME: PBM skip for RODTs in mask
|
||
if ((1U << rodt_ctl) & pbm_rodt_skip)
|
||
continue;
|
||
|
||
temp_mask = rodt_perfect_counts.mask[i];
|
||
num_values = __builtin_popcountll(temp_mask);
|
||
i = __builtin_ffsll(temp_mask) - 1;
|
||
|
||
debug("N%d.LMC%d.R%d: PERFECT: RODT %3d: Byte %d: mask 0x%02llx (%d): ",
|
||
node, if_num, rankx,
|
||
imp_val->rodt_ohms[rodt_ctl],
|
||
i, temp_mask >> i, num_values);
|
||
|
||
while (temp_mask != 0) {
|
||
i = __builtin_ffsll(temp_mask) - 1;
|
||
debug("%2d(%2d) ", i,
|
||
rodt_perfect_counts.count[i][i]);
|
||
temp_mask &= ~(1UL << i);
|
||
} /* while (temp_mask != 0) */
|
||
debug("\n");
|
||
}
|
||
}
|
||
}
|
||
|
||
static void rank_major_loop(struct ddr_priv *priv, int rankx, struct rl_score
|
||
rl_score[RTT_NOM_OHMS_COUNT][RODT_OHMS_COUNT][4])
|
||
{
|
||
/* Start with an arbitrarily high score */
|
||
int best_rank_score = DEFAULT_BEST_RANK_SCORE;
|
||
int best_rank_rtt_nom = 0;
|
||
int best_rank_ctl = 0;
|
||
int best_rank_ohms = 0;
|
||
int best_rankx = 0;
|
||
int dimm_rank_mask;
|
||
int max_rank_score;
|
||
union cvmx_lmcx_rlevel_rankx saved_rl_rank;
|
||
int next_ohms;
|
||
int orankx;
|
||
int next_score = 0;
|
||
int best_byte, new_byte, temp_byte, orig_best_byte;
|
||
int rank_best_bytes[9];
|
||
int byte_sh;
|
||
int avg_byte;
|
||
int avg_diff;
|
||
int i;
|
||
|
||
if (!(rank_mask & (1 << rankx)))
|
||
return;
|
||
|
||
// some of the rank-related loops below need to operate only on
|
||
// the ranks of a single DIMM,
|
||
// so create a mask for their use here
|
||
if (num_ranks == 4) {
|
||
dimm_rank_mask = rank_mask; // should be 1111
|
||
} else {
|
||
dimm_rank_mask = rank_mask & 3; // should be 01 or 11
|
||
if (rankx >= 2) {
|
||
// doing a rank on the second DIMM, should be
|
||
// 0100 or 1100
|
||
dimm_rank_mask <<= 2;
|
||
}
|
||
}
|
||
debug("DIMM rank mask: 0x%x, rank mask: 0x%x, rankx: %d\n",
|
||
dimm_rank_mask, rank_mask, rankx);
|
||
|
||
// this is the start of the BEST ROW SCORE LOOP
|
||
|
||
for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx; ++rtt_idx) {
|
||
rtt_nom = imp_val->rtt_nom_table[rtt_idx];
|
||
|
||
debug("N%d.LMC%d.R%d: starting RTT_NOM %d (%d)\n",
|
||
node, if_num, rankx, rtt_nom,
|
||
imp_val->rtt_nom_ohms[rtt_nom]);
|
||
|
||
for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl;
|
||
--rodt_ctl) {
|
||
next_ohms = imp_val->rodt_ohms[rodt_ctl];
|
||
|
||
// skip RODT rows in mask, but *NOT* rows with too
|
||
// high a score;
|
||
// we will not use the skipped ones for printing or
|
||
// evaluating, but we need to allow all the
|
||
// non-skipped ones to be candidates for "best"
|
||
if (((1 << rodt_ctl) & rodt_row_skip_mask) != 0) {
|
||
debug("N%d.LMC%d.R%d: SKIPPING rodt:%d (%d) with rank_score:%d\n",
|
||
node, if_num, rankx, rodt_ctl,
|
||
next_ohms, next_score);
|
||
continue;
|
||
}
|
||
|
||
// this is ROFFIX-0528
|
||
for (orankx = 0; orankx < dimm_count * 4; orankx++) {
|
||
// stay on the same DIMM
|
||
if (!(dimm_rank_mask & (1 << orankx)))
|
||
continue;
|
||
|
||
next_score = rl_score[rtt_nom][rodt_ctl][orankx].score;
|
||
|
||
// always skip a higher score
|
||
if (next_score > best_rank_score)
|
||
continue;
|
||
|
||
// if scores are equal
|
||
if (next_score == best_rank_score) {
|
||
// always skip lower ohms
|
||
if (next_ohms < best_rank_ohms)
|
||
continue;
|
||
|
||
// if same ohms
|
||
if (next_ohms == best_rank_ohms) {
|
||
// always skip the other rank(s)
|
||
if (orankx != rankx)
|
||
continue;
|
||
}
|
||
// else next_ohms are greater,
|
||
// always choose it
|
||
}
|
||
// else next_score is less than current best,
|
||
// so always choose it
|
||
debug("N%d.LMC%d.R%d: new best score: rank %d, rodt %d(%3d), new best %d, previous best %d(%d)\n",
|
||
node, if_num, rankx, orankx, rodt_ctl, next_ohms, next_score,
|
||
best_rank_score, best_rank_ohms);
|
||
best_rank_score = next_score;
|
||
best_rank_rtt_nom = rtt_nom;
|
||
//best_rank_nom_ohms = rtt_nom_ohms;
|
||
best_rank_ctl = rodt_ctl;
|
||
best_rank_ohms = next_ohms;
|
||
best_rankx = orankx;
|
||
rl_rank.u64 =
|
||
rl_score[rtt_nom][rodt_ctl][orankx].setting;
|
||
}
|
||
}
|
||
}
|
||
|
||
// this is the end of the BEST ROW SCORE LOOP
|
||
|
||
// DANGER, Will Robinson!! Abort now if we did not find a best
|
||
// score at all...
|
||
if (best_rank_score == DEFAULT_BEST_RANK_SCORE) {
|
||
printf("N%d.LMC%d.R%d: WARNING: no best rank score found - resetting node...\n",
|
||
node, if_num, rankx);
|
||
mdelay(500);
|
||
do_reset(NULL, 0, 0, NULL);
|
||
}
|
||
|
||
// FIXME: relative now, but still arbitrary...
|
||
max_rank_score = best_rank_score;
|
||
if (ddr_type == DDR4_DRAM) {
|
||
// halve the range if 2 DIMMs unless they are single rank...
|
||
max_rank_score += (MAX_RANK_SCORE_LIMIT / ((num_ranks > 1) ?
|
||
dimm_count : 1));
|
||
} else {
|
||
// Since DDR3 typically has a wider score range,
|
||
// keep more of them always
|
||
max_rank_score += MAX_RANK_SCORE_LIMIT;
|
||
}
|
||
|
||
if (!ecc_ena) {
|
||
/* ECC is not used */
|
||
rl_rank.s.byte8 = rl_rank.s.byte0;
|
||
}
|
||
|
||
// at the end, write the best row settings to the current rank
|
||
lmc_wr(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num), rl_rank.u64);
|
||
rl_rank.u64 = lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num));
|
||
|
||
saved_rl_rank.u64 = rl_rank.u64;
|
||
|
||
// this is the start of the PRINT LOOP
|
||
int pass;
|
||
|
||
// for pass==0, print current rank, pass==1 print other rank(s)
|
||
// this is done because we want to show each ranks RODT values
|
||
// together, not interlaced
|
||
// keep separates for ranks - pass=0 target rank, pass=1 other
|
||
// rank on DIMM
|
||
int mask_skipped[2] = {0, 0};
|
||
int score_skipped[2] = {0, 0};
|
||
int selected_rows[2] = {0, 0};
|
||
int zero_scores[2] = {0, 0};
|
||
for (pass = 0; pass < 2; pass++) {
|
||
for (orankx = 0; orankx < dimm_count * 4; orankx++) {
|
||
// stay on the same DIMM
|
||
if (!(dimm_rank_mask & (1 << orankx)))
|
||
continue;
|
||
|
||
if ((pass == 0 && orankx != rankx) ||
|
||
(pass != 0 && orankx == rankx))
|
||
continue;
|
||
|
||
for (rtt_idx = min_rtt_nom_idx;
|
||
rtt_idx <= max_rtt_nom_idx; ++rtt_idx) {
|
||
rtt_nom = imp_val->rtt_nom_table[rtt_idx];
|
||
if (dyn_rtt_nom_mask == 0) {
|
||
print_nom_ohms = -1;
|
||
} else {
|
||
print_nom_ohms =
|
||
imp_val->rtt_nom_ohms[rtt_nom];
|
||
}
|
||
|
||
// cycle through all the RODT values...
|
||
for (rodt_ctl = max_rodt_ctl;
|
||
rodt_ctl >= min_rodt_ctl; --rodt_ctl) {
|
||
union cvmx_lmcx_rlevel_rankx
|
||
temp_rl_rank;
|
||
int temp_score =
|
||
rl_score[rtt_nom][rodt_ctl][orankx].score;
|
||
int skip_row;
|
||
|
||
temp_rl_rank.u64 =
|
||
rl_score[rtt_nom][rodt_ctl][orankx].setting;
|
||
|
||
// skip RODT rows in mask, or rows
|
||
// with too high a score;
|
||
// we will not use them for printing
|
||
// or evaluating...
|
||
if ((1 << rodt_ctl) &
|
||
rodt_row_skip_mask) {
|
||
skip_row = WITH_RODT_SKIPPING;
|
||
++mask_skipped[pass];
|
||
} else if (temp_score >
|
||
max_rank_score) {
|
||
skip_row = WITH_RODT_SKIPPING;
|
||
++score_skipped[pass];
|
||
} else {
|
||
skip_row = WITH_RODT_BLANK;
|
||
++selected_rows[pass];
|
||
if (temp_score == 0)
|
||
++zero_scores[pass];
|
||
}
|
||
|
||
// identify and print the BEST ROW
|
||
// when it comes up
|
||
if (skip_row == WITH_RODT_BLANK &&
|
||
best_rankx == orankx &&
|
||
best_rank_rtt_nom == rtt_nom &&
|
||
best_rank_ctl == rodt_ctl)
|
||
skip_row = WITH_RODT_BESTROW;
|
||
|
||
if (rl_print) {
|
||
display_rl_with_rodt(if_num,
|
||
temp_rl_rank, orankx, temp_score,
|
||
print_nom_ohms,
|
||
imp_val->rodt_ohms[rodt_ctl],
|
||
skip_row);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
debug("N%d.LMC%d.R%d: RLROWS: selected %d+%d, zero_scores %d+%d, mask_skipped %d+%d, score_skipped %d+%d\n",
|
||
node, if_num, rankx, selected_rows[0], selected_rows[1],
|
||
zero_scores[0], zero_scores[1], mask_skipped[0], mask_skipped[1],
|
||
score_skipped[0], score_skipped[1]);
|
||
// this is the end of the PRINT LOOP
|
||
|
||
// now evaluate which bytes need adjusting
|
||
// collect the new byte values; first init with current best for
|
||
// neighbor use
|
||
for (i = 0, byte_sh = 0; i < 8 + ecc_ena; i++, byte_sh += 6) {
|
||
rank_best_bytes[i] = (int)(rl_rank.u64 >> byte_sh) &
|
||
RLEVEL_BYTE_MSK;
|
||
}
|
||
|
||
// this is the start of the BEST BYTE LOOP
|
||
|
||
for (i = 0, byte_sh = 0; i < 8 + ecc_ena; i++, byte_sh += 6) {
|
||
int sum = 0, count = 0;
|
||
int count_less = 0, count_same = 0, count_more = 0;
|
||
int count_byte; // save the value we counted around
|
||
// for rank majority use
|
||
int rank_less = 0, rank_same = 0, rank_more = 0;
|
||
int neighbor;
|
||
int neigh_byte;
|
||
|
||
best_byte = rank_best_bytes[i];
|
||
orig_best_byte = rank_best_bytes[i];
|
||
|
||
// this is the start of the BEST BYTE AVERAGING LOOP
|
||
|
||
// validate the initial "best" byte by looking at the
|
||
// average of the unskipped byte-column entries
|
||
// we want to do this before we go further, so we can
|
||
// try to start with a better initial value
|
||
// this is the so-called "BESTBUY" patch set
|
||
|
||
for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx;
|
||
++rtt_idx) {
|
||
rtt_nom = imp_val->rtt_nom_table[rtt_idx];
|
||
|
||
for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl;
|
||
--rodt_ctl) {
|
||
union cvmx_lmcx_rlevel_rankx temp_rl_rank;
|
||
int temp_score;
|
||
|
||
// average over all the ranks
|
||
for (orankx = 0; orankx < dimm_count * 4;
|
||
orankx++) {
|
||
// stay on the same DIMM
|
||
if (!(dimm_rank_mask & (1 << orankx)))
|
||
continue;
|
||
|
||
temp_score =
|
||
rl_score[rtt_nom][rodt_ctl][orankx].score;
|
||
// skip RODT rows in mask, or rows with
|
||
// too high a score;
|
||
// we will not use them for printing or
|
||
// evaluating...
|
||
|
||
if (!((1 << rodt_ctl) &
|
||
rodt_row_skip_mask) &&
|
||
temp_score <= max_rank_score) {
|
||
temp_rl_rank.u64 =
|
||
rl_score[rtt_nom][rodt_ctl][orankx].setting;
|
||
temp_byte =
|
||
(int)(temp_rl_rank.u64 >> byte_sh) &
|
||
RLEVEL_BYTE_MSK;
|
||
sum += temp_byte;
|
||
count++;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// this is the end of the BEST BYTE AVERAGING LOOP
|
||
|
||
// FIXME: validate count and sum??
|
||
avg_byte = (int)divide_nint(sum, count);
|
||
avg_diff = best_byte - avg_byte;
|
||
new_byte = best_byte;
|
||
if (avg_diff != 0) {
|
||
// bump best up/dn by 1, not necessarily all the
|
||
// way to avg
|
||
new_byte = best_byte + ((avg_diff > 0) ? -1 : 1);
|
||
}
|
||
|
||
if (rl_print) {
|
||
debug("N%d.LMC%d.R%d: START: Byte %d: best %d is different by %d from average %d, using %d.\n",
|
||
node, if_num, rankx,
|
||
i, best_byte, avg_diff, avg_byte, new_byte);
|
||
}
|
||
best_byte = new_byte;
|
||
count_byte = new_byte; // save the value we will count around
|
||
|
||
// At this point best_byte is either:
|
||
// 1. the original byte-column value from the best scoring
|
||
// RODT row, OR
|
||
// 2. that value bumped toward the average of all the
|
||
// byte-column values
|
||
//
|
||
// best_byte will not change from here on...
|
||
|
||
// this is the start of the BEST BYTE COUNTING LOOP
|
||
|
||
// NOTE: we do this next loop separately from above, because
|
||
// we count relative to "best_byte"
|
||
// which may have been modified by the above averaging
|
||
// operation...
|
||
|
||
for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx;
|
||
++rtt_idx) {
|
||
rtt_nom = imp_val->rtt_nom_table[rtt_idx];
|
||
|
||
for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl;
|
||
--rodt_ctl) {
|
||
union cvmx_lmcx_rlevel_rankx temp_rl_rank;
|
||
int temp_score;
|
||
|
||
for (orankx = 0; orankx < dimm_count * 4;
|
||
orankx++) { // count over all the ranks
|
||
// stay on the same DIMM
|
||
if (!(dimm_rank_mask & (1 << orankx)))
|
||
continue;
|
||
|
||
temp_score =
|
||
rl_score[rtt_nom][rodt_ctl][orankx].score;
|
||
// skip RODT rows in mask, or rows
|
||
// with too high a score;
|
||
// we will not use them for printing
|
||
// or evaluating...
|
||
if (((1 << rodt_ctl) &
|
||
rodt_row_skip_mask) ||
|
||
temp_score > max_rank_score)
|
||
continue;
|
||
|
||
temp_rl_rank.u64 =
|
||
rl_score[rtt_nom][rodt_ctl][orankx].setting;
|
||
temp_byte = (temp_rl_rank.u64 >>
|
||
byte_sh) & RLEVEL_BYTE_MSK;
|
||
|
||
if (temp_byte == 0)
|
||
; // do not count it if illegal
|
||
else if (temp_byte == best_byte)
|
||
count_same++;
|
||
else if (temp_byte == best_byte - 1)
|
||
count_less++;
|
||
else if (temp_byte == best_byte + 1)
|
||
count_more++;
|
||
// else do not count anything more
|
||
// than 1 away from the best
|
||
|
||
// no rank counting if disabled
|
||
if (disable_rank_majority)
|
||
continue;
|
||
|
||
// FIXME? count is relative to
|
||
// best_byte; should it be rank-based?
|
||
// rank counts only on main rank
|
||
if (orankx != rankx)
|
||
continue;
|
||
else if (temp_byte == best_byte)
|
||
rank_same++;
|
||
else if (temp_byte == best_byte - 1)
|
||
rank_less++;
|
||
else if (temp_byte == best_byte + 1)
|
||
rank_more++;
|
||
}
|
||
}
|
||
}
|
||
|
||
if (rl_print) {
|
||
debug("N%d.LMC%d.R%d: COUNT: Byte %d: orig %d now %d, more %d same %d less %d (%d/%d/%d)\n",
|
||
node, if_num, rankx,
|
||
i, orig_best_byte, best_byte,
|
||
count_more, count_same, count_less,
|
||
rank_more, rank_same, rank_less);
|
||
}
|
||
|
||
// this is the end of the BEST BYTE COUNTING LOOP
|
||
|
||
// choose the new byte value
|
||
// we need to check that there is no gap greater than 2
|
||
// between adjacent bytes (adjacency depends on DIMM type)
|
||
// use the neighbor value to help decide
|
||
// initially, the rank_best_bytes[] will contain values from
|
||
// the chosen lowest score rank
|
||
new_byte = 0;
|
||
|
||
// neighbor is index-1 unless we are index 0 or index 8 (ECC)
|
||
neighbor = (i == 8) ? 3 : ((i == 0) ? 1 : i - 1);
|
||
neigh_byte = rank_best_bytes[neighbor];
|
||
|
||
// can go up or down or stay the same, so look at a numeric
|
||
// average to help
|
||
new_byte = (int)divide_nint(((count_more * (best_byte + 1)) +
|
||
(count_same * (best_byte + 0)) +
|
||
(count_less * (best_byte - 1))),
|
||
max(1, (count_more + count_same +
|
||
count_less)));
|
||
|
||
// use neighbor to help choose with average
|
||
if (i > 0 && (abs(neigh_byte - new_byte) > 2) &&
|
||
!disable_sequential_delay_check) {
|
||
// but not for byte 0
|
||
int avg_pick = new_byte;
|
||
|
||
if ((new_byte - best_byte) != 0) {
|
||
// back to best, average did not get better
|
||
new_byte = best_byte;
|
||
} else {
|
||
// avg was the same, still too far, now move
|
||
// it towards the neighbor
|
||
new_byte += (neigh_byte > new_byte) ? 1 : -1;
|
||
}
|
||
|
||
if (rl_print) {
|
||
debug("N%d.LMC%d.R%d: AVERAGE: Byte %d: neighbor %d too different %d from average %d, picking %d.\n",
|
||
node, if_num, rankx,
|
||
i, neighbor, neigh_byte, avg_pick,
|
||
new_byte);
|
||
}
|
||
} else {
|
||
// NOTE:
|
||
// For now, we let the neighbor processing above trump
|
||
// the new simple majority processing here.
|
||
// This is mostly because we have seen no smoking gun
|
||
// for a neighbor bad choice (yet?).
|
||
// Also note that we will ALWAYS be using byte 0
|
||
// majority, because of the if clause above.
|
||
|
||
// majority is dependent on the counts, which are
|
||
// relative to best_byte, so start there
|
||
int maj_byte = best_byte;
|
||
int rank_maj;
|
||
int rank_sum;
|
||
|
||
if (count_more > count_same &&
|
||
count_more > count_less) {
|
||
maj_byte++;
|
||
} else if (count_less > count_same &&
|
||
count_less > count_more) {
|
||
maj_byte--;
|
||
}
|
||
|
||
if (maj_byte != new_byte) {
|
||
// print only when majority choice is
|
||
// different from average
|
||
if (rl_print) {
|
||
debug("N%d.LMC%d.R%d: MAJORTY: Byte %d: picking majority of %d over average %d.\n",
|
||
node, if_num, rankx, i, maj_byte,
|
||
new_byte);
|
||
}
|
||
new_byte = maj_byte;
|
||
} else {
|
||
if (rl_print) {
|
||
debug("N%d.LMC%d.R%d: AVERAGE: Byte %d: picking average of %d.\n",
|
||
node, if_num, rankx, i, new_byte);
|
||
}
|
||
}
|
||
|
||
if (!disable_rank_majority) {
|
||
// rank majority is dependent on the rank
|
||
// counts, which are relative to best_byte,
|
||
// so start there, and adjust according to the
|
||
// rank counts majority
|
||
rank_maj = best_byte;
|
||
if (rank_more > rank_same &&
|
||
rank_more > rank_less) {
|
||
rank_maj++;
|
||
} else if (rank_less > rank_same &&
|
||
rank_less > rank_more) {
|
||
rank_maj--;
|
||
}
|
||
rank_sum = rank_more + rank_same + rank_less;
|
||
|
||
// now, let rank majority possibly rule over
|
||
// the current new_byte however we got it
|
||
if (rank_maj != new_byte) { // only if different
|
||
// Here is where we decide whether to
|
||
// completely apply RANK_MAJORITY or not
|
||
// ignore if less than
|
||
if (rank_maj < new_byte) {
|
||
if (rl_print) {
|
||
debug("N%d.LMC%d.R%d: RANKMAJ: Byte %d: LESS: NOT using %d over %d.\n",
|
||
node, if_num,
|
||
rankx, i,
|
||
rank_maj,
|
||
new_byte);
|
||
}
|
||
} else {
|
||
// For the moment, we do it
|
||
// ONLY when running 2-slot
|
||
// configs
|
||
// OR when rank_sum is big
|
||
// enough
|
||
if (dimm_count > 1 ||
|
||
rank_sum > 2) {
|
||
// print only when rank
|
||
// majority choice is
|
||
// selected
|
||
if (rl_print) {
|
||
debug("N%d.LMC%d.R%d: RANKMAJ: Byte %d: picking %d over %d.\n",
|
||
node,
|
||
if_num,
|
||
rankx,
|
||
i,
|
||
rank_maj,
|
||
new_byte);
|
||
}
|
||
new_byte = rank_maj;
|
||
} else {
|
||
// FIXME: print some
|
||
// info when we could
|
||
// have chosen RANKMAJ
|
||
// but did not
|
||
if (rl_print) {
|
||
debug("N%d.LMC%d.R%d: RANKMAJ: Byte %d: NOT using %d over %d (best=%d,sum=%d).\n",
|
||
node,
|
||
if_num,
|
||
rankx,
|
||
i,
|
||
rank_maj,
|
||
new_byte,
|
||
best_byte,
|
||
rank_sum);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
} /* if (!disable_rank_majority) */
|
||
}
|
||
// one last check:
|
||
// if new_byte is still count_byte, BUT there was no count
|
||
// for that value, DO SOMETHING!!!
|
||
// FIXME: go back to original best byte from the best row
|
||
if (new_byte == count_byte && count_same == 0) {
|
||
new_byte = orig_best_byte;
|
||
if (rl_print) {
|
||
debug("N%d.LMC%d.R%d: FAILSAF: Byte %d: going back to original %d.\n",
|
||
node, if_num, rankx, i, new_byte);
|
||
}
|
||
}
|
||
// Look at counts for "perfect" bitmasks (PBMs) if we had
|
||
// any for this byte-lane.
|
||
// Remember, we only counted for DDR4, so zero means none
|
||
// or DDR3, and we bypass this...
|
||
value_mask = rank_perf[rankx].mask[i];
|
||
disable_rlv_bump_this_byte = 0;
|
||
|
||
if (value_mask != 0 && rl_ctl.cn78xx.offset == 1) {
|
||
int i, delay_count, delay_max = 0, del_val = 0;
|
||
int num_values = __builtin_popcountll(value_mask);
|
||
int sum_counts = 0;
|
||
u64 temp_mask = value_mask;
|
||
|
||
disable_rlv_bump_this_byte = 1;
|
||
i = __builtin_ffsll(temp_mask) - 1;
|
||
if (rl_print)
|
||
debug("N%d.LMC%d.R%d: PERFECT: Byte %d: OFF1: mask 0x%02llx (%d): ",
|
||
node, if_num, rankx, i, value_mask >> i,
|
||
num_values);
|
||
|
||
while (temp_mask != 0) {
|
||
i = __builtin_ffsll(temp_mask) - 1;
|
||
delay_count = rank_perf[rankx].count[i][i];
|
||
sum_counts += delay_count;
|
||
if (rl_print)
|
||
debug("%2d(%2d) ", i, delay_count);
|
||
if (delay_count >= delay_max) {
|
||
delay_max = delay_count;
|
||
del_val = i;
|
||
}
|
||
temp_mask &= ~(1UL << i);
|
||
} /* while (temp_mask != 0) */
|
||
|
||
// if sum_counts is small, just use NEW_BYTE
|
||
if (sum_counts < pbm_lowsum_limit) {
|
||
if (rl_print)
|
||
debug(": LOWSUM (%2d), choose ORIG ",
|
||
sum_counts);
|
||
del_val = new_byte;
|
||
delay_max = rank_perf[rankx].count[i][del_val];
|
||
}
|
||
|
||
// finish printing here...
|
||
if (rl_print) {
|
||
debug(": USING %2d (%2d) D%d\n", del_val,
|
||
delay_max, disable_rlv_bump_this_byte);
|
||
}
|
||
|
||
new_byte = del_val; // override with best PBM choice
|
||
|
||
} else if ((value_mask != 0) && (rl_ctl.cn78xx.offset == 2)) {
|
||
// if (value_mask != 0) {
|
||
int i, delay_count, del_val;
|
||
int num_values = __builtin_popcountll(value_mask);
|
||
int sum_counts = 0;
|
||
u64 temp_mask = value_mask;
|
||
|
||
i = __builtin_ffsll(temp_mask) - 1;
|
||
if (rl_print)
|
||
debug("N%d.LMC%d.R%d: PERFECT: Byte %d: mask 0x%02llx (%d): ",
|
||
node, if_num, rankx, i, value_mask >> i,
|
||
num_values);
|
||
while (temp_mask != 0) {
|
||
i = __builtin_ffsll(temp_mask) - 1;
|
||
delay_count = rank_perf[rankx].count[i][i];
|
||
sum_counts += delay_count;
|
||
if (rl_print)
|
||
debug("%2d(%2d) ", i, delay_count);
|
||
temp_mask &= ~(1UL << i);
|
||
} /* while (temp_mask != 0) */
|
||
|
||
del_val = __builtin_ffsll(value_mask) - 1;
|
||
delay_count =
|
||
rank_perf[rankx].count[i][del_val];
|
||
|
||
// overkill, normally only 1-4 bits
|
||
i = (value_mask >> del_val) & 0x1F;
|
||
|
||
// if sum_counts is small, treat as special and use
|
||
// NEW_BYTE
|
||
if (sum_counts < pbm_lowsum_limit) {
|
||
if (rl_print)
|
||
debug(": LOWSUM (%2d), choose ORIG",
|
||
sum_counts);
|
||
i = 99; // SPECIAL case...
|
||
}
|
||
|
||
switch (i) {
|
||
case 0x01 /* 00001b */:
|
||
// allow BUMP
|
||
break;
|
||
|
||
case 0x13 /* 10011b */:
|
||
case 0x0B /* 01011b */:
|
||
case 0x03 /* 00011b */:
|
||
del_val += 1; // take the second
|
||
disable_rlv_bump_this_byte = 1; // allow no BUMP
|
||
break;
|
||
|
||
case 0x0D /* 01101b */:
|
||
case 0x05 /* 00101b */:
|
||
// test count of lowest and all
|
||
if (delay_count >= 5 || sum_counts <= 5)
|
||
del_val += 1; // take the hole
|
||
else
|
||
del_val += 2; // take the next set
|
||
disable_rlv_bump_this_byte = 1; // allow no BUMP
|
||
break;
|
||
|
||
case 0x0F /* 01111b */:
|
||
case 0x17 /* 10111b */:
|
||
case 0x07 /* 00111b */:
|
||
del_val += 1; // take the second
|
||
if (delay_count < 5) { // lowest count is small
|
||
int second =
|
||
rank_perf[rankx].count[i][del_val];
|
||
int third =
|
||
rank_perf[rankx].count[i][del_val + 1];
|
||
// test if middle is more than 1 OR
|
||
// top is more than 1;
|
||
// this means if they are BOTH 1,
|
||
// then we keep the second...
|
||
if (second > 1 || third > 1) {
|
||
// if middle is small OR top
|
||
// is large
|
||
if (second < 5 ||
|
||
third > 1) {
|
||
// take the top
|
||
del_val += 1;
|
||
if (rl_print)
|
||
debug(": TOP7 ");
|
||
}
|
||
}
|
||
}
|
||
disable_rlv_bump_this_byte = 1; // allow no BUMP
|
||
break;
|
||
|
||
default: // all others...
|
||
if (rl_print)
|
||
debug(": ABNORMAL, choose ORIG");
|
||
|
||
case 99: // special
|
||
// FIXME: choose original choice?
|
||
del_val = new_byte;
|
||
disable_rlv_bump_this_byte = 1; // allow no BUMP
|
||
break;
|
||
}
|
||
delay_count =
|
||
rank_perf[rankx].count[i][del_val];
|
||
|
||
// finish printing here...
|
||
if (rl_print)
|
||
debug(": USING %2d (%2d) D%d\n", del_val,
|
||
delay_count, disable_rlv_bump_this_byte);
|
||
new_byte = del_val; // override with best PBM choice
|
||
} else {
|
||
if (ddr_type == DDR4_DRAM) { // only report when DDR4
|
||
// FIXME: remove or increase VBL for this
|
||
// output...
|
||
if (rl_print)
|
||
debug("N%d.LMC%d.R%d: PERFECT: Byte %d: ZERO PBMs, USING %d\n",
|
||
node, if_num, rankx, i,
|
||
new_byte);
|
||
// prevent ODD bump, rely on original
|
||
disable_rlv_bump_this_byte = 1;
|
||
}
|
||
} /* if (value_mask != 0) */
|
||
|
||
// optionally bump the delay value
|
||
if (enable_rldelay_bump && !disable_rlv_bump_this_byte) {
|
||
if ((new_byte & enable_rldelay_bump) ==
|
||
enable_rldelay_bump) {
|
||
int bump_value = new_byte + rldelay_bump_incr;
|
||
|
||
if (rl_print) {
|
||
debug("N%d.LMC%d.R%d: RLVBUMP: Byte %d: CHANGING %d to %d (%s)\n",
|
||
node, if_num, rankx, i,
|
||
new_byte, bump_value,
|
||
(value_mask &
|
||
(1 << bump_value)) ?
|
||
"PBM" : "NOPBM");
|
||
}
|
||
new_byte = bump_value;
|
||
}
|
||
}
|
||
|
||
// last checks for count-related purposes
|
||
if (new_byte == best_byte && count_more > 0 &&
|
||
count_less == 0) {
|
||
// we really should take best_byte + 1
|
||
if (rl_print) {
|
||
debug("N%d.LMC%d.R%d: CADJMOR: Byte %d: CHANGING %d to %d\n",
|
||
node, if_num, rankx, i,
|
||
new_byte, best_byte + 1);
|
||
new_byte = best_byte + 1;
|
||
}
|
||
} else if ((new_byte < best_byte) && (count_same > 0)) {
|
||
// we really should take best_byte
|
||
if (rl_print) {
|
||
debug("N%d.LMC%d.R%d: CADJSAM: Byte %d: CHANGING %d to %d\n",
|
||
node, if_num, rankx, i,
|
||
new_byte, best_byte);
|
||
new_byte = best_byte;
|
||
}
|
||
} else if (new_byte > best_byte) {
|
||
if ((new_byte == (best_byte + 1)) &&
|
||
count_more == 0 && count_less > 0) {
|
||
// we really should take best_byte
|
||
if (rl_print) {
|
||
debug("N%d.LMC%d.R%d: CADJLE1: Byte %d: CHANGING %d to %d\n",
|
||
node, if_num, rankx, i,
|
||
new_byte, best_byte);
|
||
new_byte = best_byte;
|
||
}
|
||
} else if ((new_byte >= (best_byte + 2)) &&
|
||
((count_more > 0) || (count_same > 0))) {
|
||
if (rl_print) {
|
||
debug("N%d.LMC%d.R%d: CADJLE2: Byte %d: CHANGING %d to %d\n",
|
||
node, if_num, rankx, i,
|
||
new_byte, best_byte + 1);
|
||
new_byte = best_byte + 1;
|
||
}
|
||
}
|
||
}
|
||
|
||
if (rl_print) {
|
||
debug("N%d.LMC%d.R%d: SUMMARY: Byte %d: orig %d now %d, more %d same %d less %d, using %d\n",
|
||
node, if_num, rankx, i, orig_best_byte,
|
||
best_byte, count_more, count_same, count_less,
|
||
new_byte);
|
||
}
|
||
|
||
// update the byte with the new value (NOTE: orig value in
|
||
// the CSR may not be current "best")
|
||
upd_rl_rank(&rl_rank, i, new_byte);
|
||
|
||
// save new best for neighbor use
|
||
rank_best_bytes[i] = new_byte;
|
||
} /* for (i = 0; i < 8+ecc_ena; i++) */
|
||
|
||
////////////////// this is the end of the BEST BYTE LOOP
|
||
|
||
if (saved_rl_rank.u64 != rl_rank.u64) {
|
||
lmc_wr(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num),
|
||
rl_rank.u64);
|
||
rl_rank.u64 = lmc_rd(priv,
|
||
CVMX_LMCX_RLEVEL_RANKX(rankx, if_num));
|
||
debug("Adjusting Read-Leveling per-RANK settings.\n");
|
||
} else {
|
||
debug("Not Adjusting Read-Leveling per-RANK settings.\n");
|
||
}
|
||
display_rl_with_final(if_num, rl_rank, rankx);
|
||
|
||
// FIXME: does this help make the output a little easier to focus?
|
||
if (rl_print > 0)
|
||
debug("-----------\n");
|
||
|
||
#define RLEVEL_RANKX_EXTRAS_INCR 0
|
||
// if there are unused entries to be filled
|
||
if ((rank_mask & 0x0f) != 0x0f) {
|
||
// copy the current rank
|
||
union cvmx_lmcx_rlevel_rankx temp_rl_rank = rl_rank;
|
||
|
||
if (rankx < 3) {
|
||
#if RLEVEL_RANKX_EXTRAS_INCR > 0
|
||
int byte, delay;
|
||
|
||
// modify the copy in prep for writing to empty slot(s)
|
||
for (byte = 0; byte < 9; byte++) {
|
||
delay = get_rl_rank(&temp_rl_rank, byte) +
|
||
RLEVEL_RANKX_EXTRAS_INCR;
|
||
if (delay > RLEVEL_BYTE_MSK)
|
||
delay = RLEVEL_BYTE_MSK;
|
||
upd_rl_rank(&temp_rl_rank, byte, delay);
|
||
}
|
||
#endif
|
||
|
||
// if rank 0, write rank 1 and rank 2 here if empty
|
||
if (rankx == 0) {
|
||
// check that rank 1 is empty
|
||
if (!(rank_mask & (1 << 1))) {
|
||
debug("N%d.LMC%d.R%d: writing RLEVEL_RANK unused entry R%d.\n",
|
||
node, if_num, rankx, 1);
|
||
lmc_wr(priv,
|
||
CVMX_LMCX_RLEVEL_RANKX(1,
|
||
if_num),
|
||
temp_rl_rank.u64);
|
||
}
|
||
|
||
// check that rank 2 is empty
|
||
if (!(rank_mask & (1 << 2))) {
|
||
debug("N%d.LMC%d.R%d: writing RLEVEL_RANK unused entry R%d.\n",
|
||
node, if_num, rankx, 2);
|
||
lmc_wr(priv,
|
||
CVMX_LMCX_RLEVEL_RANKX(2,
|
||
if_num),
|
||
temp_rl_rank.u64);
|
||
}
|
||
}
|
||
|
||
// if ranks 0, 1 or 2, write rank 3 here if empty
|
||
// check that rank 3 is empty
|
||
if (!(rank_mask & (1 << 3))) {
|
||
debug("N%d.LMC%d.R%d: writing RLEVEL_RANK unused entry R%d.\n",
|
||
node, if_num, rankx, 3);
|
||
lmc_wr(priv, CVMX_LMCX_RLEVEL_RANKX(3, if_num),
|
||
temp_rl_rank.u64);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
static void lmc_read_leveling(struct ddr_priv *priv)
|
||
{
|
||
struct rl_score rl_score[RTT_NOM_OHMS_COUNT][RODT_OHMS_COUNT][4];
|
||
union cvmx_lmcx_control ctl;
|
||
union cvmx_lmcx_config cfg;
|
||
int rankx;
|
||
char *s;
|
||
int i;
|
||
|
||
/*
|
||
* 4.8.10 LMC Read Leveling
|
||
*
|
||
* LMC supports an automatic read-leveling separately per byte-lane
|
||
* using the DDR3 multipurpose register predefined pattern for system
|
||
* calibration defined in the JEDEC DDR3 specifications.
|
||
*
|
||
* All of DDR PLL, LMC CK, and LMC DRESET, and early LMC initializations
|
||
* must be completed prior to starting this LMC read-leveling sequence.
|
||
*
|
||
* Software could simply write the desired read-leveling values into
|
||
* LMC(0)_RLEVEL_RANK(0..3). This section describes a sequence that uses
|
||
* LMC's autoread-leveling capabilities.
|
||
*
|
||
* When LMC does the read-leveling sequence for a rank, it first enables
|
||
* the DDR3 multipurpose register predefined pattern for system
|
||
* calibration on the selected DRAM rank via a DDR3 MR3 write, then
|
||
* executes 64 RD operations at different internal delay settings, then
|
||
* disables the predefined pattern via another DDR3 MR3 write
|
||
* operation. LMC determines the pass or fail of each of the 64 settings
|
||
* independently for each byte lane, then writes appropriate
|
||
* LMC(0)_RLEVEL_RANK(0..3)[BYTE*] values for the rank.
|
||
*
|
||
* After read-leveling for a rank, software can read the 64 pass/fail
|
||
* indications for one byte lane via LMC(0)_RLEVEL_DBG[BITMASK].
|
||
* Software can observe all pass/fail results for all byte lanes in a
|
||
* rank via separate read-leveling sequences on the rank with different
|
||
* LMC(0)_RLEVEL_CTL[BYTE] values.
|
||
*
|
||
* The 64 pass/fail results will typically have failures for the low
|
||
* delays, followed by a run of some passing settings, followed by more
|
||
* failures in the remaining high delays. LMC sets
|
||
* LMC(0)_RLEVEL_RANK(0..3)[BYTE*] to one of the passing settings.
|
||
* First, LMC selects the longest run of successes in the 64 results.
|
||
* (In the unlikely event that there is more than one longest run, LMC
|
||
* selects the first one.) Then if LMC(0)_RLEVEL_CTL[OFFSET_EN] = 1 and
|
||
* the selected run has more than LMC(0)_RLEVEL_CTL[OFFSET] successes,
|
||
* LMC selects the last passing setting in the run minus
|
||
* LMC(0)_RLEVEL_CTL[OFFSET]. Otherwise LMC selects the middle setting
|
||
* in the run (rounding earlier when necessary). We expect the
|
||
* read-leveling sequence to produce good results with the reset values
|
||
* LMC(0)_RLEVEL_CTL [OFFSET_EN]=1, LMC(0)_RLEVEL_CTL[OFFSET] = 2.
|
||
*
|
||
* The read-leveling sequence has the following steps:
|
||
*
|
||
* 1. Select desired LMC(0)_RLEVEL_CTL[OFFSET_EN,OFFSET,BYTE] settings.
|
||
* Do the remaining substeps 2-4 separately for each rank i with
|
||
* attached DRAM.
|
||
*
|
||
* 2. Without changing any other fields in LMC(0)_CONFIG,
|
||
*
|
||
* o write LMC(0)_SEQ_CTL[SEQ_SEL] to select read-leveling
|
||
*
|
||
* o write LMC(0)_CONFIG[RANKMASK] = (1 << i)
|
||
*
|
||
* o write LMC(0)_SEQ_CTL[INIT_START] = 1
|
||
*
|
||
* This initiates the previously-described read-leveling.
|
||
*
|
||
* 3. Wait until LMC(0)_RLEVEL_RANKi[STATUS] != 2
|
||
*
|
||
* LMC will have updated LMC(0)_RLEVEL_RANKi[BYTE*] for all byte
|
||
* lanes at this point.
|
||
*
|
||
* If ECC DRAM is not present (i.e. when DRAM is not attached to the
|
||
* DDR_CBS_0_* and DDR_CB<7:0> chip signals, or the DDR_DQS_<4>_* and
|
||
* DDR_DQ<35:32> chip signals), write LMC(0)_RLEVEL_RANK*[BYTE8] =
|
||
* LMC(0)_RLEVEL_RANK*[BYTE0]. Write LMC(0)_RLEVEL_RANK*[BYTE4] =
|
||
* LMC(0)_RLEVEL_RANK*[BYTE0].
|
||
*
|
||
* 4. If desired, consult LMC(0)_RLEVEL_DBG[BITMASK] and compare to
|
||
* LMC(0)_RLEVEL_RANKi[BYTE*] for the lane selected by
|
||
* LMC(0)_RLEVEL_CTL[BYTE]. If desired, modify
|
||
* LMC(0)_RLEVEL_CTL[BYTE] to a new value and repeat so that all
|
||
* BITMASKs can be observed.
|
||
*
|
||
* 5. Initialize LMC(0)_RLEVEL_RANK* values for all unused ranks.
|
||
*
|
||
* Let rank i be a rank with attached DRAM.
|
||
*
|
||
* For all ranks j that do not have attached DRAM, set
|
||
* LMC(0)_RLEVEL_RANKj = LMC(0)_RLEVEL_RANKi.
|
||
*
|
||
* This read-leveling sequence can help select the proper CN70XX ODT
|
||
* resistance value (LMC(0)_COMP_CTL2[RODT_CTL]). A hardware-generated
|
||
* LMC(0)_RLEVEL_RANKi[BYTEj] value (for a used byte lane j) that is
|
||
* drastically different from a neighboring LMC(0)_RLEVEL_RANKi[BYTEk]
|
||
* (for a used byte lane k) can indicate that the CN70XX ODT value is
|
||
* bad. It is possible to simultaneously optimize both
|
||
* LMC(0)_COMP_CTL2[RODT_CTL] and LMC(0)_RLEVEL_RANKn[BYTE*] values by
|
||
* performing this read-leveling sequence for several
|
||
* LMC(0)_COMP_CTL2[RODT_CTL] values and selecting the one with the
|
||
* best LMC(0)_RLEVEL_RANKn[BYTE*] profile for the ranks.
|
||
*/
|
||
|
||
rl_rodt_err = 0;
|
||
rl_dbg_loops = 1;
|
||
saved_int_zqcs_dis = 0;
|
||
max_adj_rl_del_inc = 0;
|
||
rl_print = RLEVEL_PRINTALL_DEFAULT;
|
||
|
||
#ifdef ENABLE_HARDCODED_RLEVEL
|
||
part_number[21] = {0};
|
||
#endif /* ENABLE_HARDCODED_RLEVEL */
|
||
|
||
pbm_lowsum_limit = 5; // FIXME: is this a good default?
|
||
// FIXME: PBM skip for RODT 240 and 34
|
||
pbm_rodt_skip = (1U << ddr4_rodt_ctl_240_ohm) |
|
||
(1U << ddr4_rodt_ctl_34_ohm);
|
||
|
||
disable_rank_majority = 0; // control rank majority processing
|
||
|
||
// default to mask 11b ODDs for DDR4 (except 73xx), else DISABLE
|
||
// for DDR3
|
||
rldelay_bump_incr = 0;
|
||
disable_rlv_bump_this_byte = 0;
|
||
|
||
enable_rldelay_bump = (ddr_type == DDR4_DRAM) ?
|
||
((octeon_is_cpuid(OCTEON_CN73XX)) ? 1 : 3) : 0;
|
||
|
||
s = lookup_env(priv, "ddr_disable_rank_majority");
|
||
if (s)
|
||
disable_rank_majority = !!simple_strtoul(s, NULL, 0);
|
||
|
||
s = lookup_env(priv, "ddr_pbm_lowsum_limit");
|
||
if (s)
|
||
pbm_lowsum_limit = simple_strtoul(s, NULL, 0);
|
||
|
||
s = lookup_env(priv, "ddr_pbm_rodt_skip");
|
||
if (s)
|
||
pbm_rodt_skip = simple_strtoul(s, NULL, 0);
|
||
memset(rank_perf, 0, sizeof(rank_perf));
|
||
|
||
ctl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
|
||
save_ddr2t = ctl.cn78xx.ddr2t;
|
||
|
||
cfg.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
|
||
ecc_ena = cfg.cn78xx.ecc_ena;
|
||
|
||
s = lookup_env(priv, "ddr_rlevel_2t");
|
||
if (s)
|
||
ctl.cn78xx.ddr2t = simple_strtoul(s, NULL, 0);
|
||
|
||
lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctl.u64);
|
||
|
||
debug("LMC%d: Performing Read-Leveling\n", if_num);
|
||
|
||
rl_ctl.u64 = lmc_rd(priv, CVMX_LMCX_RLEVEL_CTL(if_num));
|
||
|
||
rl_samples = c_cfg->rlevel_average_loops;
|
||
if (rl_samples == 0) {
|
||
rl_samples = RLEVEL_SAMPLES_DEFAULT;
|
||
// up the samples for these cases
|
||
if (dimm_count == 1 || num_ranks == 1)
|
||
rl_samples = rl_samples * 2 + 1;
|
||
}
|
||
|
||
rl_compute = c_cfg->rlevel_compute;
|
||
rl_ctl.cn78xx.offset_en = c_cfg->offset_en;
|
||
rl_ctl.cn78xx.offset = spd_rdimm
|
||
? c_cfg->offset_rdimm
|
||
: c_cfg->offset_udimm;
|
||
|
||
int value = 1; // should ALWAYS be set
|
||
|
||
s = lookup_env(priv, "ddr_rlevel_delay_unload");
|
||
if (s)
|
||
value = !!simple_strtoul(s, NULL, 0);
|
||
rl_ctl.cn78xx.delay_unload_0 = value;
|
||
rl_ctl.cn78xx.delay_unload_1 = value;
|
||
rl_ctl.cn78xx.delay_unload_2 = value;
|
||
rl_ctl.cn78xx.delay_unload_3 = value;
|
||
|
||
// use OR_DIS=1 to try for better results
|
||
rl_ctl.cn78xx.or_dis = 1;
|
||
|
||
/*
|
||
* If we will be switching to 32bit mode level based on only
|
||
* four bits because there are only 4 ECC bits.
|
||
*/
|
||
rl_ctl.cn78xx.bitmask = (if_64b) ? 0xFF : 0x0F;
|
||
|
||
// allow overrides
|
||
s = lookup_env(priv, "ddr_rlevel_ctl_or_dis");
|
||
if (s)
|
||
rl_ctl.cn78xx.or_dis = simple_strtoul(s, NULL, 0);
|
||
|
||
s = lookup_env(priv, "ddr_rlevel_ctl_bitmask");
|
||
if (s)
|
||
rl_ctl.cn78xx.bitmask = simple_strtoul(s, NULL, 0);
|
||
|
||
rl_comp_offs = spd_rdimm
|
||
? c_cfg->rlevel_comp_offset_rdimm
|
||
: c_cfg->rlevel_comp_offset_udimm;
|
||
s = lookup_env(priv, "ddr_rlevel_comp_offset");
|
||
if (s)
|
||
rl_comp_offs = strtoul(s, NULL, 0);
|
||
|
||
s = lookup_env(priv, "ddr_rlevel_offset");
|
||
if (s)
|
||
rl_ctl.cn78xx.offset = simple_strtoul(s, NULL, 0);
|
||
|
||
s = lookup_env(priv, "ddr_rlevel_offset_en");
|
||
if (s)
|
||
rl_ctl.cn78xx.offset_en = simple_strtoul(s, NULL, 0);
|
||
|
||
s = lookup_env(priv, "ddr_rlevel_ctl");
|
||
if (s)
|
||
rl_ctl.u64 = simple_strtoul(s, NULL, 0);
|
||
|
||
lmc_wr(priv,
|
||
CVMX_LMCX_RLEVEL_CTL(if_num),
|
||
rl_ctl.u64);
|
||
|
||
// do this here so we can look at final RLEVEL_CTL[offset] setting...
|
||
s = lookup_env(priv, "ddr_enable_rldelay_bump");
|
||
if (s) {
|
||
// also use as mask bits
|
||
enable_rldelay_bump = strtoul(s, NULL, 0);
|
||
}
|
||
|
||
if (enable_rldelay_bump != 0)
|
||
rldelay_bump_incr = (rl_ctl.cn78xx.offset == 1) ? -1 : 1;
|
||
|
||
s = lookup_env(priv, "ddr%d_rlevel_debug_loops", if_num);
|
||
if (s)
|
||
rl_dbg_loops = simple_strtoul(s, NULL, 0);
|
||
|
||
s = lookup_env(priv, "ddr_rtt_nom_auto");
|
||
if (s)
|
||
ddr_rtt_nom_auto = !!simple_strtoul(s, NULL, 0);
|
||
|
||
s = lookup_env(priv, "ddr_rlevel_average");
|
||
if (s)
|
||
rl_samples = simple_strtoul(s, NULL, 0);
|
||
|
||
s = lookup_env(priv, "ddr_rlevel_compute");
|
||
if (s)
|
||
rl_compute = simple_strtoul(s, NULL, 0);
|
||
|
||
s = lookup_env(priv, "ddr_rlevel_printall");
|
||
if (s)
|
||
rl_print = simple_strtoul(s, NULL, 0);
|
||
|
||
debug("RLEVEL_CTL : 0x%016llx\n",
|
||
rl_ctl.u64);
|
||
debug("RLEVEL_OFFSET : %6d\n",
|
||
rl_ctl.cn78xx.offset);
|
||
debug("RLEVEL_OFFSET_EN : %6d\n",
|
||
rl_ctl.cn78xx.offset_en);
|
||
|
||
/*
|
||
* The purpose for the indexed table is to sort the settings
|
||
* by the ohm value to simplify the testing when incrementing
|
||
* through the settings. (index => ohms) 1=120, 2=60, 3=40,
|
||
* 4=30, 5=20
|
||
*/
|
||
min_rtt_nom_idx = (c_cfg->min_rtt_nom_idx == 0) ?
|
||
1 : c_cfg->min_rtt_nom_idx;
|
||
max_rtt_nom_idx = (c_cfg->max_rtt_nom_idx == 0) ?
|
||
5 : c_cfg->max_rtt_nom_idx;
|
||
|
||
min_rodt_ctl = (c_cfg->min_rodt_ctl == 0) ? 1 : c_cfg->min_rodt_ctl;
|
||
max_rodt_ctl = (c_cfg->max_rodt_ctl == 0) ? 5 : c_cfg->max_rodt_ctl;
|
||
|
||
s = lookup_env(priv, "ddr_min_rodt_ctl");
|
||
if (s)
|
||
min_rodt_ctl = simple_strtoul(s, NULL, 0);
|
||
|
||
s = lookup_env(priv, "ddr_max_rodt_ctl");
|
||
if (s)
|
||
max_rodt_ctl = simple_strtoul(s, NULL, 0);
|
||
|
||
s = lookup_env(priv, "ddr_min_rtt_nom_idx");
|
||
if (s)
|
||
min_rtt_nom_idx = simple_strtoul(s, NULL, 0);
|
||
|
||
s = lookup_env(priv, "ddr_max_rtt_nom_idx");
|
||
if (s)
|
||
max_rtt_nom_idx = simple_strtoul(s, NULL, 0);
|
||
|
||
#ifdef ENABLE_HARDCODED_RLEVEL
|
||
if (c_cfg->rl_tbl) {
|
||
/* Check for hard-coded read-leveling settings */
|
||
get_dimm_part_number(part_number, &dimm_config_table[0],
|
||
0, ddr_type);
|
||
for (rankx = 0; rankx < dimm_count * 4; rankx++) {
|
||
if (!(rank_mask & (1 << rankx)))
|
||
continue;
|
||
|
||
rl_rank.u64 = lmc_rd(priv,
|
||
CVMX_LMCX_RLEVEL_RANKX(rankx,
|
||
if_num));
|
||
|
||
i = 0;
|
||
while (c_cfg->rl_tbl[i].part) {
|
||
debug("DIMM part number:\"%s\", SPD: \"%s\"\n",
|
||
c_cfg->rl_tbl[i].part, part_number);
|
||
if ((strcmp(part_number,
|
||
c_cfg->rl_tbl[i].part) == 0) &&
|
||
(abs(c_cfg->rl_tbl[i].speed -
|
||
2 * ddr_hertz / (1000 * 1000)) < 10)) {
|
||
debug("Using hard-coded read leveling for DIMM part number: \"%s\"\n",
|
||
part_number);
|
||
rl_rank.u64 =
|
||
c_cfg->rl_tbl[i].rl_rank[if_num][rankx];
|
||
lmc_wr(priv,
|
||
CVMX_LMCX_RLEVEL_RANKX(rankx,
|
||
if_num),
|
||
rl_rank.u64);
|
||
rl_rank.u64 =
|
||
lmc_rd(priv,
|
||
CVMX_LMCX_RLEVEL_RANKX(rankx,
|
||
if_num));
|
||
display_rl(if_num, rl_rank, rankx);
|
||
/* Disable h/w read-leveling */
|
||
rl_dbg_loops = 0;
|
||
break;
|
||
}
|
||
++i;
|
||
}
|
||
}
|
||
}
|
||
#endif /* ENABLE_HARDCODED_RLEVEL */
|
||
|
||
max_adj_rl_del_inc = c_cfg->maximum_adjacent_rlevel_delay_increment;
|
||
s = lookup_env(priv, "ddr_maximum_adjacent_rlevel_delay_increment");
|
||
if (s)
|
||
max_adj_rl_del_inc = strtoul(s, NULL, 0);
|
||
|
||
while (rl_dbg_loops--) {
|
||
union cvmx_lmcx_modereg_params1 mp1;
|
||
union cvmx_lmcx_comp_ctl2 cc2;
|
||
|
||
/* Initialize the error scoreboard */
|
||
memset(rl_score, 0, sizeof(rl_score));
|
||
|
||
cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
|
||
saved_ddr__ptune = cc2.cn78xx.ddr__ptune;
|
||
saved_ddr__ntune = cc2.cn78xx.ddr__ntune;
|
||
|
||
/* Disable dynamic compensation settings */
|
||
if (rl_comp_offs != 0) {
|
||
cc2.cn78xx.ptune = saved_ddr__ptune;
|
||
cc2.cn78xx.ntune = saved_ddr__ntune;
|
||
|
||
/*
|
||
* Round up the ptune calculation to bias the odd
|
||
* cases toward ptune
|
||
*/
|
||
cc2.cn78xx.ptune += divide_roundup(rl_comp_offs, 2);
|
||
cc2.cn78xx.ntune -= rl_comp_offs / 2;
|
||
|
||
ctl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
|
||
saved_int_zqcs_dis = ctl.s.int_zqcs_dis;
|
||
/* Disable ZQCS while in bypass. */
|
||
ctl.s.int_zqcs_dis = 1;
|
||
lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctl.u64);
|
||
|
||
cc2.cn78xx.byp = 1; /* Enable bypass mode */
|
||
lmc_wr(priv, CVMX_LMCX_COMP_CTL2(if_num), cc2.u64);
|
||
lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
|
||
/* Read again */
|
||
cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
|
||
debug("DDR__PTUNE/DDR__NTUNE : %d/%d\n",
|
||
cc2.cn78xx.ddr__ptune, cc2.cn78xx.ddr__ntune);
|
||
}
|
||
|
||
mp1.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS1(if_num));
|
||
|
||
for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx;
|
||
++rtt_idx) {
|
||
rtt_nom = imp_val->rtt_nom_table[rtt_idx];
|
||
|
||
/*
|
||
* When the read ODT mask is zero the dyn_rtt_nom_mask
|
||
* is zero than RTT_NOM will not be changing during
|
||
* read-leveling. Since the value is fixed we only need
|
||
* to test it once.
|
||
*/
|
||
if (dyn_rtt_nom_mask == 0) {
|
||
// flag not to print NOM ohms
|
||
print_nom_ohms = -1;
|
||
} else {
|
||
if (dyn_rtt_nom_mask & 1)
|
||
mp1.s.rtt_nom_00 = rtt_nom;
|
||
if (dyn_rtt_nom_mask & 2)
|
||
mp1.s.rtt_nom_01 = rtt_nom;
|
||
if (dyn_rtt_nom_mask & 4)
|
||
mp1.s.rtt_nom_10 = rtt_nom;
|
||
if (dyn_rtt_nom_mask & 8)
|
||
mp1.s.rtt_nom_11 = rtt_nom;
|
||
// FIXME? rank 0 ohms always?
|
||
print_nom_ohms =
|
||
imp_val->rtt_nom_ohms[mp1.s.rtt_nom_00];
|
||
}
|
||
|
||
lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS1(if_num),
|
||
mp1.u64);
|
||
|
||
if (print_nom_ohms >= 0 && rl_print > 1) {
|
||
debug("\n");
|
||
debug("RTT_NOM %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n",
|
||
imp_val->rtt_nom_ohms[mp1.s.rtt_nom_11],
|
||
imp_val->rtt_nom_ohms[mp1.s.rtt_nom_10],
|
||
imp_val->rtt_nom_ohms[mp1.s.rtt_nom_01],
|
||
imp_val->rtt_nom_ohms[mp1.s.rtt_nom_00],
|
||
mp1.s.rtt_nom_11,
|
||
mp1.s.rtt_nom_10,
|
||
mp1.s.rtt_nom_01,
|
||
mp1.s.rtt_nom_00);
|
||
}
|
||
|
||
ddr_init_seq(priv, rank_mask, if_num);
|
||
|
||
// Try RANK outside RODT to rearrange the output...
|
||
for (rankx = 0; rankx < dimm_count * 4; rankx++) {
|
||
if (!(rank_mask & (1 << rankx)))
|
||
continue;
|
||
|
||
for (rodt_ctl = max_rodt_ctl;
|
||
rodt_ctl >= min_rodt_ctl; --rodt_ctl)
|
||
rodt_loop(priv, rankx, rl_score);
|
||
}
|
||
}
|
||
|
||
/* Re-enable dynamic compensation settings. */
|
||
if (rl_comp_offs != 0) {
|
||
cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
|
||
|
||
cc2.cn78xx.ptune = 0;
|
||
cc2.cn78xx.ntune = 0;
|
||
cc2.cn78xx.byp = 0; /* Disable bypass mode */
|
||
lmc_wr(priv, CVMX_LMCX_COMP_CTL2(if_num), cc2.u64);
|
||
/* Read once */
|
||
lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
|
||
|
||
/* Read again */
|
||
cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
|
||
debug("DDR__PTUNE/DDR__NTUNE : %d/%d\n",
|
||
cc2.cn78xx.ddr__ptune, cc2.cn78xx.ddr__ntune);
|
||
|
||
ctl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
|
||
/* Restore original setting */
|
||
ctl.s.int_zqcs_dis = saved_int_zqcs_dis;
|
||
lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctl.u64);
|
||
}
|
||
|
||
int override_compensation = 0;
|
||
|
||
s = lookup_env(priv, "ddr__ptune");
|
||
if (s)
|
||
saved_ddr__ptune = strtoul(s, NULL, 0);
|
||
|
||
s = lookup_env(priv, "ddr__ntune");
|
||
if (s) {
|
||
saved_ddr__ntune = strtoul(s, NULL, 0);
|
||
override_compensation = 1;
|
||
}
|
||
|
||
if (override_compensation) {
|
||
cc2.cn78xx.ptune = saved_ddr__ptune;
|
||
cc2.cn78xx.ntune = saved_ddr__ntune;
|
||
|
||
ctl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
|
||
saved_int_zqcs_dis = ctl.s.int_zqcs_dis;
|
||
/* Disable ZQCS while in bypass. */
|
||
ctl.s.int_zqcs_dis = 1;
|
||
lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctl.u64);
|
||
|
||
cc2.cn78xx.byp = 1; /* Enable bypass mode */
|
||
lmc_wr(priv, CVMX_LMCX_COMP_CTL2(if_num), cc2.u64);
|
||
/* Read again */
|
||
cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
|
||
|
||
debug("DDR__PTUNE/DDR__NTUNE : %d/%d\n",
|
||
cc2.cn78xx.ptune, cc2.cn78xx.ntune);
|
||
}
|
||
|
||
/* Evaluation block */
|
||
/* Still at initial value? */
|
||
int best_rodt_score = DEFAULT_BEST_RANK_SCORE;
|
||
int auto_rodt_ctl = 0;
|
||
int auto_rtt_nom = 0;
|
||
int rodt_score;
|
||
|
||
rodt_row_skip_mask = 0;
|
||
|
||
// just add specific RODT rows to the skip mask for DDR4
|
||
// at this time...
|
||
if (ddr_type == DDR4_DRAM) {
|
||
// skip RODT row 34 ohms for all DDR4 types
|
||
rodt_row_skip_mask |= (1 << ddr4_rodt_ctl_34_ohm);
|
||
// skip RODT row 40 ohms for all DDR4 types
|
||
rodt_row_skip_mask |= (1 << ddr4_rodt_ctl_40_ohm);
|
||
// For now, do not skip RODT row 40 or 48 ohm when
|
||
// ddr_hertz is above 1075 MHz
|
||
if (ddr_hertz > 1075000000) {
|
||
// noskip RODT row 40 ohms
|
||
rodt_row_skip_mask &=
|
||
~(1 << ddr4_rodt_ctl_40_ohm);
|
||
// noskip RODT row 48 ohms
|
||
rodt_row_skip_mask &=
|
||
~(1 << ddr4_rodt_ctl_48_ohm);
|
||
}
|
||
// For now, do not skip RODT row 48 ohm for 2Rx4
|
||
// stacked die DIMMs
|
||
if (is_stacked_die && num_ranks == 2 &&
|
||
dram_width == 4) {
|
||
// noskip RODT row 48 ohms
|
||
rodt_row_skip_mask &=
|
||
~(1 << ddr4_rodt_ctl_48_ohm);
|
||
}
|
||
// for now, leave all rows eligible when we have
|
||
// mini-DIMMs...
|
||
if (spd_dimm_type == 5 || spd_dimm_type == 6)
|
||
rodt_row_skip_mask = 0;
|
||
// for now, leave all rows eligible when we have
|
||
// a 2-slot 1-rank config
|
||
if (dimm_count == 2 && num_ranks == 1)
|
||
rodt_row_skip_mask = 0;
|
||
|
||
debug("Evaluating Read-Leveling Scoreboard for AUTO settings.\n");
|
||
for (rtt_idx = min_rtt_nom_idx;
|
||
rtt_idx <= max_rtt_nom_idx; ++rtt_idx) {
|
||
rtt_nom = imp_val->rtt_nom_table[rtt_idx];
|
||
|
||
for (rodt_ctl = max_rodt_ctl;
|
||
rodt_ctl >= min_rodt_ctl; --rodt_ctl) {
|
||
rodt_score = 0;
|
||
for (rankx = 0; rankx < dimm_count * 4;
|
||
rankx++) {
|
||
if (!(rank_mask & (1 << rankx)))
|
||
continue;
|
||
|
||
debug("rl_score[rtt_nom=%d][rodt_ctl=%d][rankx=%d].score:%d\n",
|
||
rtt_nom, rodt_ctl, rankx,
|
||
rl_score[rtt_nom][rodt_ctl][rankx].score);
|
||
rodt_score +=
|
||
rl_score[rtt_nom][rodt_ctl][rankx].score;
|
||
}
|
||
// FIXME: do we need to skip RODT rows
|
||
// here, like we do below in the
|
||
// by-RANK settings?
|
||
|
||
/*
|
||
* When using automatic ODT settings use
|
||
* the ODT settings associated with the
|
||
* best score for all of the tested ODT
|
||
* combinations.
|
||
*/
|
||
|
||
if (rodt_score < best_rodt_score ||
|
||
(rodt_score == best_rodt_score &&
|
||
(imp_val->rodt_ohms[rodt_ctl] >
|
||
imp_val->rodt_ohms[auto_rodt_ctl]))) {
|
||
debug("AUTO: new best score for rodt:%d (%d), new score:%d, previous score:%d\n",
|
||
rodt_ctl,
|
||
imp_val->rodt_ohms[rodt_ctl],
|
||
rodt_score,
|
||
best_rodt_score);
|
||
best_rodt_score = rodt_score;
|
||
auto_rodt_ctl = rodt_ctl;
|
||
auto_rtt_nom = rtt_nom;
|
||
}
|
||
}
|
||
}
|
||
|
||
mp1.u64 = lmc_rd(priv,
|
||
CVMX_LMCX_MODEREG_PARAMS1(if_num));
|
||
|
||
if (ddr_rtt_nom_auto) {
|
||
/* Store the automatically set RTT_NOM value */
|
||
if (dyn_rtt_nom_mask & 1)
|
||
mp1.s.rtt_nom_00 = auto_rtt_nom;
|
||
if (dyn_rtt_nom_mask & 2)
|
||
mp1.s.rtt_nom_01 = auto_rtt_nom;
|
||
if (dyn_rtt_nom_mask & 4)
|
||
mp1.s.rtt_nom_10 = auto_rtt_nom;
|
||
if (dyn_rtt_nom_mask & 8)
|
||
mp1.s.rtt_nom_11 = auto_rtt_nom;
|
||
} else {
|
||
/*
|
||
* restore the manual settings to the register
|
||
*/
|
||
mp1.s.rtt_nom_00 = default_rtt_nom[0];
|
||
mp1.s.rtt_nom_01 = default_rtt_nom[1];
|
||
mp1.s.rtt_nom_10 = default_rtt_nom[2];
|
||
mp1.s.rtt_nom_11 = default_rtt_nom[3];
|
||
}
|
||
|
||
lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS1(if_num),
|
||
mp1.u64);
|
||
debug("RTT_NOM %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n",
|
||
imp_val->rtt_nom_ohms[mp1.s.rtt_nom_11],
|
||
imp_val->rtt_nom_ohms[mp1.s.rtt_nom_10],
|
||
imp_val->rtt_nom_ohms[mp1.s.rtt_nom_01],
|
||
imp_val->rtt_nom_ohms[mp1.s.rtt_nom_00],
|
||
mp1.s.rtt_nom_11,
|
||
mp1.s.rtt_nom_10,
|
||
mp1.s.rtt_nom_01,
|
||
mp1.s.rtt_nom_00);
|
||
|
||
debug("RTT_WR %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n",
|
||
imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 3)],
|
||
imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 2)],
|
||
imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 1)],
|
||
imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 0)],
|
||
extr_wr(mp1.u64, 3),
|
||
extr_wr(mp1.u64, 2),
|
||
extr_wr(mp1.u64, 1),
|
||
extr_wr(mp1.u64, 0));
|
||
|
||
debug("DIC %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n",
|
||
imp_val->dic_ohms[mp1.s.dic_11],
|
||
imp_val->dic_ohms[mp1.s.dic_10],
|
||
imp_val->dic_ohms[mp1.s.dic_01],
|
||
imp_val->dic_ohms[mp1.s.dic_00],
|
||
mp1.s.dic_11,
|
||
mp1.s.dic_10,
|
||
mp1.s.dic_01,
|
||
mp1.s.dic_00);
|
||
|
||
if (ddr_type == DDR4_DRAM) {
|
||
union cvmx_lmcx_modereg_params2 mp2;
|
||
/*
|
||
* We must read the CSR, and not depend on
|
||
* odt_config[odt_idx].odt_mask2, since we could
|
||
* have overridden values with envvars.
|
||
* NOTE: this corrects the printout, since the
|
||
* CSR is not written with the old values...
|
||
*/
|
||
mp2.u64 = lmc_rd(priv,
|
||
CVMX_LMCX_MODEREG_PARAMS2(if_num));
|
||
|
||
debug("RTT_PARK %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n",
|
||
imp_val->rtt_nom_ohms[mp2.s.rtt_park_11],
|
||
imp_val->rtt_nom_ohms[mp2.s.rtt_park_10],
|
||
imp_val->rtt_nom_ohms[mp2.s.rtt_park_01],
|
||
imp_val->rtt_nom_ohms[mp2.s.rtt_park_00],
|
||
mp2.s.rtt_park_11,
|
||
mp2.s.rtt_park_10,
|
||
mp2.s.rtt_park_01,
|
||
mp2.s.rtt_park_00);
|
||
|
||
debug("%-45s : 0x%x,0x%x,0x%x,0x%x\n",
|
||
"VREF_RANGE",
|
||
mp2.s.vref_range_11,
|
||
mp2.s.vref_range_10,
|
||
mp2.s.vref_range_01,
|
||
mp2.s.vref_range_00);
|
||
|
||
debug("%-45s : 0x%x,0x%x,0x%x,0x%x\n",
|
||
"VREF_VALUE",
|
||
mp2.s.vref_value_11,
|
||
mp2.s.vref_value_10,
|
||
mp2.s.vref_value_01,
|
||
mp2.s.vref_value_00);
|
||
}
|
||
|
||
cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
|
||
if (ddr_rodt_ctl_auto) {
|
||
cc2.cn78xx.rodt_ctl = auto_rodt_ctl;
|
||
} else {
|
||
// back to the original setting
|
||
cc2.cn78xx.rodt_ctl = default_rodt_ctl;
|
||
}
|
||
lmc_wr(priv, CVMX_LMCX_COMP_CTL2(if_num), cc2.u64);
|
||
cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
|
||
debug("Read ODT_CTL : 0x%x (%d ohms)\n",
|
||
cc2.cn78xx.rodt_ctl,
|
||
imp_val->rodt_ohms[cc2.cn78xx.rodt_ctl]);
|
||
|
||
/*
|
||
* Use the delays associated with the best score for
|
||
* each individual rank
|
||
*/
|
||
debug("Evaluating Read-Leveling Scoreboard for per-RANK settings.\n");
|
||
|
||
// this is the the RANK MAJOR LOOP
|
||
for (rankx = 0; rankx < dimm_count * 4; rankx++)
|
||
rank_major_loop(priv, rankx, rl_score);
|
||
} /* Evaluation block */
|
||
} /* while(rl_dbg_loops--) */
|
||
|
||
ctl.cn78xx.ddr2t = save_ddr2t;
|
||
lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctl.u64);
|
||
ctl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
|
||
/* Display final 2T value */
|
||
debug("DDR2T : %6d\n",
|
||
ctl.cn78xx.ddr2t);
|
||
|
||
ddr_init_seq(priv, rank_mask, if_num);
|
||
|
||
for (rankx = 0; rankx < dimm_count * 4; rankx++) {
|
||
u64 value;
|
||
int parameter_set = 0;
|
||
|
||
if (!(rank_mask & (1 << rankx)))
|
||
continue;
|
||
|
||
rl_rank.u64 = lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx,
|
||
if_num));
|
||
|
||
for (i = 0; i < 9; ++i) {
|
||
s = lookup_env(priv, "ddr%d_rlevel_rank%d_byte%d",
|
||
if_num, rankx, i);
|
||
if (s) {
|
||
parameter_set |= 1;
|
||
value = simple_strtoul(s, NULL, 0);
|
||
|
||
upd_rl_rank(&rl_rank, i, value);
|
||
}
|
||
}
|
||
|
||
s = lookup_env_ull(priv, "ddr%d_rlevel_rank%d", if_num, rankx);
|
||
if (s) {
|
||
parameter_set |= 1;
|
||
value = simple_strtoull(s, NULL, 0);
|
||
rl_rank.u64 = value;
|
||
}
|
||
|
||
if (parameter_set) {
|
||
lmc_wr(priv,
|
||
CVMX_LMCX_RLEVEL_RANKX(rankx, if_num),
|
||
rl_rank.u64);
|
||
rl_rank.u64 = lmc_rd(priv,
|
||
CVMX_LMCX_RLEVEL_RANKX(rankx,
|
||
if_num));
|
||
display_rl(if_num, rl_rank, rankx);
|
||
}
|
||
}
|
||
}
|
||
|
||
int init_octeon3_ddr3_interface(struct ddr_priv *priv,
|
||
struct ddr_conf *_ddr_conf, u32 _ddr_hertz,
|
||
u32 cpu_hertz, u32 ddr_ref_hertz, int _if_num,
|
||
u32 _if_mask)
|
||
{
|
||
union cvmx_lmcx_control ctrl;
|
||
int ret;
|
||
char *s;
|
||
int i;
|
||
|
||
if_num = _if_num;
|
||
ddr_hertz = _ddr_hertz;
|
||
ddr_conf = _ddr_conf;
|
||
if_mask = _if_mask;
|
||
odt_1rank_config = ddr_conf->odt_1rank_config;
|
||
odt_2rank_config = ddr_conf->odt_2rank_config;
|
||
odt_4rank_config = ddr_conf->odt_4rank_config;
|
||
dimm_config_table = ddr_conf->dimm_config_table;
|
||
c_cfg = &ddr_conf->custom_lmc_config;
|
||
|
||
/*
|
||
* Compute clock rates to the nearest picosecond.
|
||
*/
|
||
tclk_psecs = hertz_to_psecs(ddr_hertz); /* Clock in psecs */
|
||
eclk_psecs = hertz_to_psecs(cpu_hertz); /* Clock in psecs */
|
||
|
||
dimm_count = 0;
|
||
/* Accumulate and report all the errors before giving up */
|
||
fatal_error = 0;
|
||
|
||
/* Flag that indicates safe DDR settings should be used */
|
||
safe_ddr_flag = 0;
|
||
if_64b = 1; /* Octeon II Default: 64bit interface width */
|
||
mem_size_mbytes = 0;
|
||
bank_bits = 0;
|
||
column_bits_start = 1;
|
||
use_ecc = 1;
|
||
min_cas_latency = 0, max_cas_latency = 0, override_cas_latency = 0;
|
||
spd_package = 0;
|
||
spd_rawcard = 0;
|
||
spd_rawcard_aorb = 0;
|
||
spd_rdimm_registers = 0;
|
||
is_stacked_die = 0;
|
||
is_3ds_dimm = 0; // 3DS
|
||
lranks_per_prank = 1; // 3DS: logical ranks per package rank
|
||
lranks_bits = 0; // 3DS: logical ranks bits
|
||
die_capacity = 0; // in Mbits; only used for 3DS
|
||
|
||
wl_mask_err = 0;
|
||
dyn_rtt_nom_mask = 0;
|
||
ddr_disable_chip_reset = 1;
|
||
match_wl_rtt_nom = 0;
|
||
|
||
internal_retries = 0;
|
||
|
||
disable_deskew_training = 0;
|
||
restart_if_dsk_incomplete = 0;
|
||
last_lane = ((if_64b) ? 8 : 4) + use_ecc;
|
||
|
||
disable_sequential_delay_check = 0;
|
||
wl_print = WLEVEL_PRINTALL_DEFAULT;
|
||
|
||
enable_by_rank_init = 1; // FIXME: default by-rank ON
|
||
saved_rank_mask = 0;
|
||
|
||
node = 0;
|
||
|
||
memset(hwl_alts, 0, sizeof(hwl_alts));
|
||
|
||
/*
|
||
* Initialize these to shut up the compiler. They are configured
|
||
* and used only for DDR4
|
||
*/
|
||
ddr4_trrd_lmin = 6000;
|
||
ddr4_tccd_lmin = 6000;
|
||
|
||
debug("\nInitializing node %d DDR interface %d, DDR Clock %d, DDR Reference Clock %d, CPUID 0x%08x\n",
|
||
node, if_num, ddr_hertz, ddr_ref_hertz, read_c0_prid());
|
||
|
||
if (dimm_config_table[0].spd_addrs[0] == 0 &&
|
||
!dimm_config_table[0].spd_ptrs[0]) {
|
||
printf("ERROR: No dimms specified in the dimm_config_table.\n");
|
||
return -1;
|
||
}
|
||
|
||
// allow some overrides to be done
|
||
|
||
// this one controls several things related to DIMM geometry: HWL and RL
|
||
disable_sequential_delay_check = c_cfg->disable_sequential_delay_check;
|
||
s = lookup_env(priv, "ddr_disable_sequential_delay_check");
|
||
if (s)
|
||
disable_sequential_delay_check = strtoul(s, NULL, 0);
|
||
|
||
// this one controls whether chip RESET is done, or LMC init restarted
|
||
// from step 6.9.6
|
||
s = lookup_env(priv, "ddr_disable_chip_reset");
|
||
if (s)
|
||
ddr_disable_chip_reset = !!strtoul(s, NULL, 0);
|
||
|
||
// this one controls whether Deskew Training is performed
|
||
s = lookup_env(priv, "ddr_disable_deskew_training");
|
||
if (s)
|
||
disable_deskew_training = !!strtoul(s, NULL, 0);
|
||
|
||
if (ddr_verbose(priv)) {
|
||
printf("DDR SPD Table:");
|
||
for (didx = 0; didx < DDR_CFG_T_MAX_DIMMS; ++didx) {
|
||
if (dimm_config_table[didx].spd_addrs[0] == 0)
|
||
break;
|
||
|
||
printf(" --ddr%dspd=0x%02x", if_num,
|
||
dimm_config_table[didx].spd_addrs[0]);
|
||
if (dimm_config_table[didx].spd_addrs[1] != 0)
|
||
printf(",0x%02x",
|
||
dimm_config_table[didx].spd_addrs[1]);
|
||
}
|
||
printf("\n");
|
||
}
|
||
|
||
/*
|
||
* Walk the DRAM Socket Configuration Table to see what is installed.
|
||
*/
|
||
for (didx = 0; didx < DDR_CFG_T_MAX_DIMMS; ++didx) {
|
||
/* Check for lower DIMM socket populated */
|
||
if (validate_dimm(priv, &dimm_config_table[didx], 0)) {
|
||
if (ddr_verbose(priv))
|
||
report_dimm(&dimm_config_table[didx], 0,
|
||
dimm_count, if_num);
|
||
++dimm_count;
|
||
} else {
|
||
break;
|
||
} /* Finished when there is no lower DIMM */
|
||
}
|
||
|
||
initialize_ddr_clock(priv, ddr_conf, cpu_hertz, ddr_hertz,
|
||
ddr_ref_hertz, if_num, if_mask);
|
||
|
||
if (!odt_1rank_config)
|
||
odt_1rank_config = disable_odt_config;
|
||
if (!odt_2rank_config)
|
||
odt_2rank_config = disable_odt_config;
|
||
if (!odt_4rank_config)
|
||
odt_4rank_config = disable_odt_config;
|
||
|
||
s = env_get("ddr_safe");
|
||
if (s) {
|
||
safe_ddr_flag = !!simple_strtoul(s, NULL, 0);
|
||
printf("Parameter found in environment. ddr_safe = %d\n",
|
||
safe_ddr_flag);
|
||
}
|
||
|
||
if (dimm_count == 0) {
|
||
printf("ERROR: DIMM 0 not detected.\n");
|
||
return (-1);
|
||
}
|
||
|
||
if (c_cfg->mode32b)
|
||
if_64b = 0;
|
||
|
||
s = lookup_env(priv, "if_64b");
|
||
if (s)
|
||
if_64b = !!simple_strtoul(s, NULL, 0);
|
||
|
||
if (if_64b == 1) {
|
||
if (octeon_is_cpuid(OCTEON_CN70XX)) {
|
||
printf("64-bit interface width is not supported for this Octeon model\n");
|
||
++fatal_error;
|
||
}
|
||
}
|
||
|
||
/* ddr_type only indicates DDR4 or DDR3 */
|
||
ddr_type = (read_spd(&dimm_config_table[0], 0,
|
||
DDR4_SPD_KEY_BYTE_DEVICE_TYPE) == 0x0C) ? 4 : 3;
|
||
debug("DRAM Device Type: DDR%d\n", ddr_type);
|
||
|
||
if (ddr_type == DDR4_DRAM) {
|
||
int spd_module_type;
|
||
int asymmetric;
|
||
const char *signal_load[4] = { "", "MLS", "3DS", "RSV" };
|
||
|
||
imp_val = &ddr4_impedence_val;
|
||
|
||
spd_addr =
|
||
read_spd(&dimm_config_table[0], 0,
|
||
DDR4_SPD_ADDRESSING_ROW_COL_BITS);
|
||
spd_org =
|
||
read_spd(&dimm_config_table[0], 0,
|
||
DDR4_SPD_MODULE_ORGANIZATION);
|
||
spd_banks =
|
||
0xFF & read_spd(&dimm_config_table[0], 0,
|
||
DDR4_SPD_DENSITY_BANKS);
|
||
|
||
bank_bits =
|
||
(2 + ((spd_banks >> 4) & 0x3)) + ((spd_banks >> 6) & 0x3);
|
||
/* Controller can only address 4 bits. */
|
||
bank_bits = min((int)bank_bits, 4);
|
||
|
||
spd_package =
|
||
0XFF & read_spd(&dimm_config_table[0], 0,
|
||
DDR4_SPD_PACKAGE_TYPE);
|
||
if (spd_package & 0x80) { // non-monolithic device
|
||
is_stacked_die = ((spd_package & 0x73) == 0x11);
|
||
debug("DDR4: Package Type 0x%02x (%s), %d die\n",
|
||
spd_package, signal_load[(spd_package & 3)],
|
||
((spd_package >> 4) & 7) + 1);
|
||
is_3ds_dimm = ((spd_package & 3) == 2); // is it 3DS?
|
||
if (is_3ds_dimm) { // is it 3DS?
|
||
lranks_per_prank = ((spd_package >> 4) & 7) + 1;
|
||
// FIXME: should make sure it is only 2H or 4H
|
||
// or 8H?
|
||
lranks_bits = lranks_per_prank >> 1;
|
||
if (lranks_bits == 4)
|
||
lranks_bits = 3;
|
||
}
|
||
} else if (spd_package != 0) {
|
||
// FIXME: print non-zero monolithic device definition
|
||
debug("DDR4: Package Type MONOLITHIC: %d die, signal load %d\n",
|
||
((spd_package >> 4) & 7) + 1, (spd_package & 3));
|
||
}
|
||
|
||
asymmetric = (spd_org >> 6) & 1;
|
||
if (asymmetric) {
|
||
int spd_secondary_pkg =
|
||
read_spd(&dimm_config_table[0], 0,
|
||
DDR4_SPD_SECONDARY_PACKAGE_TYPE);
|
||
debug("DDR4: Module Organization: ASYMMETRICAL: Secondary Package Type 0x%02x\n",
|
||
spd_secondary_pkg);
|
||
} else {
|
||
u64 bus_width =
|
||
8 << (0x07 &
|
||
read_spd(&dimm_config_table[0], 0,
|
||
DDR4_SPD_MODULE_MEMORY_BUS_WIDTH));
|
||
u64 ddr_width = 4 << ((spd_org >> 0) & 0x7);
|
||
u64 module_cap;
|
||
int shift = (spd_banks & 0x0F);
|
||
|
||
die_capacity = (shift < 8) ? (256UL << shift) :
|
||
((12UL << (shift & 1)) << 10);
|
||
debug("DDR4: Module Organization: SYMMETRICAL: capacity per die %d %cbit\n",
|
||
(die_capacity > 512) ? (die_capacity >> 10) :
|
||
die_capacity, (die_capacity > 512) ? 'G' : 'M');
|
||
module_cap = ((u64)die_capacity << 20) / 8UL *
|
||
bus_width / ddr_width *
|
||
(1UL + ((spd_org >> 3) & 0x7));
|
||
|
||
// is it 3DS?
|
||
if (is_3ds_dimm) {
|
||
module_cap *= (u64)(((spd_package >> 4) & 7) +
|
||
1);
|
||
}
|
||
debug("DDR4: Module Organization: SYMMETRICAL: capacity per module %lld GB\n",
|
||
module_cap >> 30);
|
||
}
|
||
|
||
spd_rawcard =
|
||
0xFF & read_spd(&dimm_config_table[0], 0,
|
||
DDR4_SPD_REFERENCE_RAW_CARD);
|
||
debug("DDR4: Reference Raw Card 0x%02x\n", spd_rawcard);
|
||
|
||
spd_module_type =
|
||
read_spd(&dimm_config_table[0], 0,
|
||
DDR4_SPD_KEY_BYTE_MODULE_TYPE);
|
||
if (spd_module_type & 0x80) { // HYBRID module
|
||
debug("DDR4: HYBRID module, type %s\n",
|
||
((spd_module_type & 0x70) ==
|
||
0x10) ? "NVDIMM" : "UNKNOWN");
|
||
}
|
||
spd_thermal_sensor =
|
||
read_spd(&dimm_config_table[0], 0,
|
||
DDR4_SPD_MODULE_THERMAL_SENSOR);
|
||
spd_dimm_type = spd_module_type & 0x0F;
|
||
spd_rdimm = (spd_dimm_type == 1) || (spd_dimm_type == 5) ||
|
||
(spd_dimm_type == 8);
|
||
if (spd_rdimm) {
|
||
u16 spd_mfgr_id, spd_register_rev, spd_mod_attr;
|
||
static const u16 manu_ids[4] = {
|
||
0xb380, 0x3286, 0x9780, 0xb304
|
||
};
|
||
static const char *manu_names[4] = {
|
||
"XXX", "XXXXXXX", "XX", "XXXXX"
|
||
};
|
||
int mc;
|
||
|
||
spd_mfgr_id =
|
||
(0xFFU &
|
||
read_spd(&dimm_config_table[0], 0,
|
||
DDR4_SPD_REGISTER_MANUFACTURER_ID_LSB)) |
|
||
((0xFFU &
|
||
read_spd(&dimm_config_table[0], 0,
|
||
DDR4_SPD_REGISTER_MANUFACTURER_ID_MSB))
|
||
<< 8);
|
||
spd_register_rev =
|
||
0xFFU & read_spd(&dimm_config_table[0], 0,
|
||
DDR4_SPD_REGISTER_REVISION_NUMBER);
|
||
for (mc = 0; mc < 4; mc++)
|
||
if (manu_ids[mc] == spd_mfgr_id)
|
||
break;
|
||
|
||
debug("DDR4: RDIMM Register Manufacturer ID: %s, Revision: 0x%02x\n",
|
||
(mc >= 4) ? "UNKNOWN" : manu_names[mc],
|
||
spd_register_rev);
|
||
|
||
// RAWCARD A or B must be bit 7=0 and bits 4-0
|
||
// either 00000(A) or 00001(B)
|
||
spd_rawcard_aorb = ((spd_rawcard & 0x9fUL) <= 1);
|
||
// RDIMM Module Attributes
|
||
spd_mod_attr =
|
||
0xFFU & read_spd(&dimm_config_table[0], 0,
|
||
DDR4_SPD_UDIMM_ADDR_MAPPING_FROM_EDGE);
|
||
spd_rdimm_registers = ((1 << (spd_mod_attr & 3)) >> 1);
|
||
debug("DDR4: RDIMM Module Attributes (0x%02x): Register Type DDR4RCD%02d, DRAM rows %d, Registers %d\n",
|
||
spd_mod_attr, (spd_mod_attr >> 4) + 1,
|
||
((1 << ((spd_mod_attr >> 2) & 3)) >> 1),
|
||
spd_rdimm_registers);
|
||
}
|
||
dimm_type_name = ddr4_dimm_types[spd_dimm_type];
|
||
} else { /* if (ddr_type == DDR4_DRAM) */
|
||
const char *signal_load[4] = { "UNK", "MLS", "SLS", "RSV" };
|
||
|
||
imp_val = &ddr3_impedence_val;
|
||
|
||
spd_addr =
|
||
read_spd(&dimm_config_table[0], 0,
|
||
DDR3_SPD_ADDRESSING_ROW_COL_BITS);
|
||
spd_org =
|
||
read_spd(&dimm_config_table[0], 0,
|
||
DDR3_SPD_MODULE_ORGANIZATION);
|
||
spd_banks =
|
||
read_spd(&dimm_config_table[0], 0,
|
||
DDR3_SPD_DENSITY_BANKS) & 0xff;
|
||
|
||
bank_bits = 3 + ((spd_banks >> 4) & 0x7);
|
||
/* Controller can only address 3 bits. */
|
||
bank_bits = min((int)bank_bits, 3);
|
||
spd_dimm_type =
|
||
0x0f & read_spd(&dimm_config_table[0], 0,
|
||
DDR3_SPD_KEY_BYTE_MODULE_TYPE);
|
||
spd_rdimm = (spd_dimm_type == 1) || (spd_dimm_type == 5) ||
|
||
(spd_dimm_type == 9);
|
||
|
||
spd_package =
|
||
0xFF & read_spd(&dimm_config_table[0], 0,
|
||
DDR3_SPD_SDRAM_DEVICE_TYPE);
|
||
if (spd_package & 0x80) { // non-standard device
|
||
debug("DDR3: Device Type 0x%02x (%s), %d die\n",
|
||
spd_package, signal_load[(spd_package & 3)],
|
||
((1 << ((spd_package >> 4) & 7)) >> 1));
|
||
} else if (spd_package != 0) {
|
||
// FIXME: print non-zero monolithic device definition
|
||
debug("DDR3: Device Type MONOLITHIC: %d die, signal load %d\n",
|
||
((1 << (spd_package >> 4) & 7) >> 1),
|
||
(spd_package & 3));
|
||
}
|
||
|
||
spd_rawcard =
|
||
0xFF & read_spd(&dimm_config_table[0], 0,
|
||
DDR3_SPD_REFERENCE_RAW_CARD);
|
||
debug("DDR3: Reference Raw Card 0x%02x\n", spd_rawcard);
|
||
spd_thermal_sensor =
|
||
read_spd(&dimm_config_table[0], 0,
|
||
DDR3_SPD_MODULE_THERMAL_SENSOR);
|
||
|
||
if (spd_rdimm) {
|
||
int spd_mfgr_id, spd_register_rev, spd_mod_attr;
|
||
|
||
spd_mfgr_id =
|
||
(0xFFU &
|
||
read_spd(&dimm_config_table[0], 0,
|
||
DDR3_SPD_REGISTER_MANUFACTURER_ID_LSB)) |
|
||
((0xFFU &
|
||
read_spd(&dimm_config_table[0], 0,
|
||
DDR3_SPD_REGISTER_MANUFACTURER_ID_MSB))
|
||
<< 8);
|
||
spd_register_rev =
|
||
0xFFU & read_spd(&dimm_config_table[0], 0,
|
||
DDR3_SPD_REGISTER_REVISION_NUMBER);
|
||
debug("DDR3: RDIMM Register Manufacturer ID 0x%x Revision 0x%02x\n",
|
||
spd_mfgr_id, spd_register_rev);
|
||
// Module Attributes
|
||
spd_mod_attr =
|
||
0xFFU & read_spd(&dimm_config_table[0], 0,
|
||
DDR3_SPD_ADDRESS_MAPPING);
|
||
spd_rdimm_registers = ((1 << (spd_mod_attr & 3)) >> 1);
|
||
debug("DDR3: RDIMM Module Attributes (0x%02x): DRAM rows %d, Registers %d\n",
|
||
spd_mod_attr,
|
||
((1 << ((spd_mod_attr >> 2) & 3)) >> 1),
|
||
spd_rdimm_registers);
|
||
}
|
||
dimm_type_name = ddr3_dimm_types[spd_dimm_type];
|
||
}
|
||
|
||
if (spd_thermal_sensor & 0x80) {
|
||
debug("DDR%d: SPD: Thermal Sensor PRESENT\n",
|
||
(ddr_type == DDR4_DRAM) ? 4 : 3);
|
||
}
|
||
|
||
debug("spd_addr : %#06x\n", spd_addr);
|
||
debug("spd_org : %#06x\n", spd_org);
|
||
debug("spd_banks : %#06x\n", spd_banks);
|
||
|
||
row_bits = 12 + ((spd_addr >> 3) & 0x7);
|
||
col_bits = 9 + ((spd_addr >> 0) & 0x7);
|
||
|
||
num_ranks = 1 + ((spd_org >> 3) & 0x7);
|
||
dram_width = 4 << ((spd_org >> 0) & 0x7);
|
||
num_banks = 1 << bank_bits;
|
||
|
||
s = lookup_env(priv, "ddr_num_ranks");
|
||
if (s)
|
||
num_ranks = simple_strtoul(s, NULL, 0);
|
||
|
||
s = lookup_env(priv, "ddr_enable_by_rank_init");
|
||
if (s)
|
||
enable_by_rank_init = !!simple_strtoul(s, NULL, 0);
|
||
|
||
// FIXME: for now, we can only handle a DDR4 2rank-1slot config
|
||
// FIXME: also, by-rank init does not work correctly if 32-bit mode...
|
||
if (enable_by_rank_init && (ddr_type != DDR4_DRAM ||
|
||
dimm_count != 1 || if_64b != 1 ||
|
||
num_ranks != 2))
|
||
enable_by_rank_init = 0;
|
||
|
||
if (enable_by_rank_init) {
|
||
struct dimm_odt_config *odt_config;
|
||
union cvmx_lmcx_modereg_params1 mp1;
|
||
union cvmx_lmcx_modereg_params2 modereg_params2;
|
||
int by_rank_rodt, by_rank_wr, by_rank_park;
|
||
|
||
// Do ODT settings changes which work best for 2R-1S configs
|
||
debug("DDR4: 2R-1S special BY-RANK init ODT settings updated\n");
|
||
|
||
// setup for modifying config table values - 2 ranks and 1 DIMM
|
||
odt_config =
|
||
(struct dimm_odt_config *)&ddr_conf->odt_2rank_config[0];
|
||
|
||
// original was 80, first try was 60
|
||
by_rank_rodt = ddr4_rodt_ctl_48_ohm;
|
||
s = lookup_env(priv, "ddr_by_rank_rodt");
|
||
if (s)
|
||
by_rank_rodt = strtoul(s, NULL, 0);
|
||
|
||
odt_config->qs_dic = /*RODT_CTL */ by_rank_rodt;
|
||
|
||
// this is for MODEREG_PARAMS1 fields
|
||
// fetch the original settings
|
||
mp1.u64 = odt_config->modereg_params1.u64;
|
||
|
||
by_rank_wr = ddr4_rttwr_80ohm; // originals were 240
|
||
s = lookup_env(priv, "ddr_by_rank_wr");
|
||
if (s)
|
||
by_rank_wr = simple_strtoul(s, NULL, 0);
|
||
|
||
// change specific settings here...
|
||
insrt_wr(&mp1.u64, /*rank */ 00, by_rank_wr);
|
||
insrt_wr(&mp1.u64, /*rank */ 01, by_rank_wr);
|
||
|
||
// save final settings
|
||
odt_config->modereg_params1.u64 = mp1.u64;
|
||
|
||
// this is for MODEREG_PARAMS2 fields
|
||
// fetch the original settings
|
||
modereg_params2.u64 = odt_config->modereg_params2.u64;
|
||
|
||
by_rank_park = ddr4_rttpark_none; // originals were 120
|
||
s = lookup_env(priv, "ddr_by_rank_park");
|
||
if (s)
|
||
by_rank_park = simple_strtoul(s, NULL, 0);
|
||
|
||
// change specific settings here...
|
||
modereg_params2.s.rtt_park_00 = by_rank_park;
|
||
modereg_params2.s.rtt_park_01 = by_rank_park;
|
||
|
||
// save final settings
|
||
odt_config->modereg_params2.u64 = modereg_params2.u64;
|
||
}
|
||
|
||
/*
|
||
* FIX
|
||
* Check that values are within some theoretical limits.
|
||
* col_bits(min) = row_lsb(min) - bank_bits(max) - bus_bits(max) =
|
||
* 14 - 3 - 4 = 7
|
||
* col_bits(max) = row_lsb(max) - bank_bits(min) - bus_bits(min) =
|
||
* 18 - 2 - 3 = 13
|
||
*/
|
||
if (col_bits > 13 || col_bits < 7) {
|
||
printf("Unsupported number of Col Bits: %d\n", col_bits);
|
||
++fatal_error;
|
||
}
|
||
|
||
/*
|
||
* FIX
|
||
* Check that values are within some theoretical limits.
|
||
* row_bits(min) = pbank_lsb(min) - row_lsb(max) - rank_bits =
|
||
* 26 - 18 - 1 = 7
|
||
* row_bits(max) = pbank_lsb(max) - row_lsb(min) - rank_bits =
|
||
* 33 - 14 - 1 = 18
|
||
*/
|
||
if (row_bits > 18 || row_bits < 7) {
|
||
printf("Unsupported number of Row Bits: %d\n", row_bits);
|
||
++fatal_error;
|
||
}
|
||
|
||
s = lookup_env(priv, "ddr_rdimm_ena");
|
||
if (s)
|
||
spd_rdimm = !!simple_strtoul(s, NULL, 0);
|
||
|
||
wl_loops = WLEVEL_LOOPS_DEFAULT;
|
||
// accept generic or interface-specific override
|
||
s = lookup_env(priv, "ddr_wlevel_loops");
|
||
if (!s)
|
||
s = lookup_env(priv, "ddr%d_wlevel_loops", if_num);
|
||
|
||
if (s)
|
||
wl_loops = strtoul(s, NULL, 0);
|
||
|
||
s = lookup_env(priv, "ddr_ranks");
|
||
if (s)
|
||
num_ranks = simple_strtoul(s, NULL, 0);
|
||
|
||
bunk_enable = (num_ranks > 1);
|
||
|
||
if (octeon_is_cpuid(OCTEON_CN7XXX))
|
||
column_bits_start = 3;
|
||
else
|
||
printf("ERROR: Unsupported Octeon model: 0x%x\n",
|
||
read_c0_prid());
|
||
|
||
row_lsb = column_bits_start + col_bits + bank_bits - (!if_64b);
|
||
debug("row_lsb = column_bits_start + col_bits + bank_bits = %d\n",
|
||
row_lsb);
|
||
|
||
pbank_lsb = row_lsb + row_bits + bunk_enable;
|
||
debug("pbank_lsb = row_lsb + row_bits + bunk_enable = %d\n", pbank_lsb);
|
||
|
||
if (lranks_per_prank > 1) {
|
||
pbank_lsb = row_lsb + row_bits + lranks_bits + bunk_enable;
|
||
debug("DDR4: 3DS: pbank_lsb = (%d row_lsb) + (%d row_bits) + (%d lranks_bits) + (%d bunk_enable) = %d\n",
|
||
row_lsb, row_bits, lranks_bits, bunk_enable, pbank_lsb);
|
||
}
|
||
|
||
mem_size_mbytes = dimm_count * ((1ull << pbank_lsb) >> 20);
|
||
if (num_ranks == 4) {
|
||
/*
|
||
* Quad rank dimm capacity is equivalent to two dual-rank
|
||
* dimms.
|
||
*/
|
||
mem_size_mbytes *= 2;
|
||
}
|
||
|
||
/*
|
||
* Mask with 1 bits set for for each active rank, allowing 2 bits
|
||
* per dimm. This makes later calculations simpler, as a variety
|
||
* of CSRs use this layout. This init needs to be updated for dual
|
||
* configs (ie non-identical DIMMs).
|
||
*
|
||
* Bit 0 = dimm0, rank 0
|
||
* Bit 1 = dimm0, rank 1
|
||
* Bit 2 = dimm1, rank 0
|
||
* Bit 3 = dimm1, rank 1
|
||
* ...
|
||
*/
|
||
rank_mask = 0x1;
|
||
if (num_ranks > 1)
|
||
rank_mask = 0x3;
|
||
if (num_ranks > 2)
|
||
rank_mask = 0xf;
|
||
|
||
for (i = 1; i < dimm_count; i++)
|
||
rank_mask |= ((rank_mask & 0x3) << (2 * i));
|
||
|
||
/*
|
||
* If we are booting from RAM, the DRAM controller is
|
||
* already set up. Just return the memory size
|
||
*/
|
||
if (priv->flags & FLAG_RAM_RESIDENT) {
|
||
debug("Ram Boot: Skipping LMC config\n");
|
||
return mem_size_mbytes;
|
||
}
|
||
|
||
if (ddr_type == DDR4_DRAM) {
|
||
spd_ecc =
|
||
!!(read_spd
|
||
(&dimm_config_table[0], 0,
|
||
DDR4_SPD_MODULE_MEMORY_BUS_WIDTH) & 8);
|
||
} else {
|
||
spd_ecc =
|
||
!!(read_spd
|
||
(&dimm_config_table[0], 0,
|
||
DDR3_SPD_MEMORY_BUS_WIDTH) & 8);
|
||
}
|
||
|
||
char rank_spec[8];
|
||
|
||
printable_rank_spec(rank_spec, num_ranks, dram_width, spd_package);
|
||
debug("Summary: %d %s%s %s %s, row bits=%d, col bits=%d, bank bits=%d\n",
|
||
dimm_count, dimm_type_name, (dimm_count > 1) ? "s" : "",
|
||
rank_spec,
|
||
(spd_ecc) ? "ECC" : "non-ECC", row_bits, col_bits, bank_bits);
|
||
|
||
if (ddr_type == DDR4_DRAM) {
|
||
spd_cas_latency =
|
||
((0xff &
|
||
read_spd(&dimm_config_table[0], 0,
|
||
DDR4_SPD_CAS_LATENCIES_BYTE0)) << 0);
|
||
spd_cas_latency |=
|
||
((0xff &
|
||
read_spd(&dimm_config_table[0], 0,
|
||
DDR4_SPD_CAS_LATENCIES_BYTE1)) << 8);
|
||
spd_cas_latency |=
|
||
((0xff &
|
||
read_spd(&dimm_config_table[0], 0,
|
||
DDR4_SPD_CAS_LATENCIES_BYTE2)) << 16);
|
||
spd_cas_latency |=
|
||
((0xff &
|
||
read_spd(&dimm_config_table[0], 0,
|
||
DDR4_SPD_CAS_LATENCIES_BYTE3)) << 24);
|
||
} else {
|
||
spd_cas_latency =
|
||
0xff & read_spd(&dimm_config_table[0], 0,
|
||
DDR3_SPD_CAS_LATENCIES_LSB);
|
||
spd_cas_latency |=
|
||
((0xff &
|
||
read_spd(&dimm_config_table[0], 0,
|
||
DDR3_SPD_CAS_LATENCIES_MSB)) << 8);
|
||
}
|
||
debug("spd_cas_latency : %#06x\n", spd_cas_latency);
|
||
|
||
if (ddr_type == DDR4_DRAM) {
|
||
/*
|
||
* No other values for DDR4 MTB and FTB are specified at the
|
||
* current time so don't bother reading them. Can't speculate
|
||
* how new values will be represented.
|
||
*/
|
||
int spdmtb = 125;
|
||
int spdftb = 1;
|
||
|
||
taamin = spdmtb * read_spd(&dimm_config_table[0], 0,
|
||
DDR4_SPD_MIN_CAS_LATENCY_TAAMIN) +
|
||
spdftb * (signed char)read_spd(&dimm_config_table[0],
|
||
0, DDR4_SPD_MIN_CAS_LATENCY_FINE_TAAMIN);
|
||
|
||
ddr4_tckavgmin = spdmtb * read_spd(&dimm_config_table[0], 0,
|
||
DDR4_SPD_MINIMUM_CYCLE_TIME_TCKAVGMIN) +
|
||
spdftb * (signed char)read_spd(&dimm_config_table[0], 0,
|
||
DDR4_SPD_MIN_CYCLE_TIME_FINE_TCKAVGMIN);
|
||
|
||
ddr4_tckavgmax = spdmtb * read_spd(&dimm_config_table[0], 0,
|
||
DDR4_SPD_MAXIMUM_CYCLE_TIME_TCKAVGMAX) +
|
||
spdftb * (signed char)read_spd(&dimm_config_table[0], 0,
|
||
DDR4_SPD_MAX_CYCLE_TIME_FINE_TCKAVGMAX);
|
||
|
||
ddr4_trdcmin = spdmtb * read_spd(&dimm_config_table[0], 0,
|
||
DDR4_SPD_MIN_RAS_CAS_DELAY_TRCDMIN) +
|
||
spdftb * (signed char)read_spd(&dimm_config_table[0], 0,
|
||
DDR4_SPD_MIN_RAS_TO_CAS_DELAY_FINE_TRCDMIN);
|
||
|
||
ddr4_trpmin = spdmtb * read_spd(&dimm_config_table[0], 0,
|
||
DDR4_SPD_MIN_ROW_PRECHARGE_DELAY_TRPMIN) +
|
||
spdftb * (signed char)read_spd(&dimm_config_table[0], 0,
|
||
DDR4_SPD_MIN_ROW_PRECHARGE_DELAY_FINE_TRPMIN);
|
||
|
||
ddr4_trasmin = spdmtb *
|
||
(((read_spd
|
||
(&dimm_config_table[0], 0,
|
||
DDR4_SPD_UPPER_NIBBLES_TRAS_TRC) & 0xf) << 8) +
|
||
(read_spd
|
||
(&dimm_config_table[0], 0,
|
||
DDR4_SPD_MIN_ACTIVE_PRECHARGE_LSB_TRASMIN) & 0xff));
|
||
|
||
ddr4_trcmin = spdmtb *
|
||
((((read_spd
|
||
(&dimm_config_table[0], 0,
|
||
DDR4_SPD_UPPER_NIBBLES_TRAS_TRC) >> 4) & 0xf) <<
|
||
8) + (read_spd
|
||
(&dimm_config_table[0], 0,
|
||
DDR4_SPD_MIN_ACTIVE_REFRESH_LSB_TRCMIN) &
|
||
0xff))
|
||
+ spdftb * (signed char)read_spd(&dimm_config_table[0],
|
||
0,
|
||
DDR4_SPD_MIN_ACT_TO_ACT_REFRESH_DELAY_FINE_TRCMIN);
|
||
|
||
ddr4_trfc1min = spdmtb * (((read_spd(&dimm_config_table[0], 0,
|
||
DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC1MIN) & 0xff) <<
|
||
8) + (read_spd(&dimm_config_table[0], 0,
|
||
DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC1MIN) & 0xff));
|
||
|
||
ddr4_trfc2min = spdmtb * (((read_spd(&dimm_config_table[0], 0,
|
||
DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC2MIN) & 0xff) <<
|
||
8) + (read_spd(&dimm_config_table[0], 0,
|
||
DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC2MIN) & 0xff));
|
||
|
||
ddr4_trfc4min = spdmtb * (((read_spd(&dimm_config_table[0], 0,
|
||
DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC4MIN) & 0xff) <<
|
||
8) + (read_spd(&dimm_config_table[0], 0,
|
||
DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC4MIN) & 0xff));
|
||
|
||
ddr4_tfawmin = spdmtb * (((read_spd(&dimm_config_table[0], 0,
|
||
DDR4_SPD_MIN_FOUR_ACTIVE_WINDOW_MSN_TFAWMIN) & 0xf) <<
|
||
8) + (read_spd(&dimm_config_table[0], 0,
|
||
DDR4_SPD_MIN_FOUR_ACTIVE_WINDOW_LSB_TFAWMIN) & 0xff));
|
||
|
||
ddr4_trrd_smin = spdmtb * read_spd(&dimm_config_table[0], 0,
|
||
DDR4_SPD_MIN_ROW_ACTIVE_DELAY_SAME_TRRD_SMIN) +
|
||
spdftb * (signed char)read_spd(&dimm_config_table[0], 0,
|
||
DDR4_SPD_MIN_ACT_TO_ACT_DELAY_DIFF_FINE_TRRD_SMIN);
|
||
|
||
ddr4_trrd_lmin = spdmtb * read_spd(&dimm_config_table[0], 0,
|
||
DDR4_SPD_MIN_ROW_ACTIVE_DELAY_DIFF_TRRD_LMIN) +
|
||
spdftb * (signed char)read_spd(&dimm_config_table[0], 0,
|
||
DDR4_SPD_MIN_ACT_TO_ACT_DELAY_SAME_FINE_TRRD_LMIN);
|
||
|
||
ddr4_tccd_lmin = spdmtb * read_spd(&dimm_config_table[0], 0,
|
||
DDR4_SPD_MIN_CAS_TO_CAS_DELAY_TCCD_LMIN) +
|
||
spdftb * (signed char)read_spd(&dimm_config_table[0], 0,
|
||
DDR4_SPD_MIN_CAS_TO_CAS_DELAY_FINE_TCCD_LMIN);
|
||
|
||
debug("%-45s : %6d ps\n", "Medium Timebase (MTB)", spdmtb);
|
||
debug("%-45s : %6d ps\n", "Fine Timebase (FTB)", spdftb);
|
||
|
||
debug("%-45s : %6d ps (%ld MT/s)\n",
|
||
"SDRAM Minimum Cycle Time (tCKAVGmin)", ddr4_tckavgmin,
|
||
pretty_psecs_to_mts(ddr4_tckavgmin));
|
||
debug("%-45s : %6d ps\n",
|
||
"SDRAM Maximum Cycle Time (tCKAVGmax)", ddr4_tckavgmax);
|
||
debug("%-45s : %6d ps\n", "Minimum CAS Latency Time (taamin)",
|
||
taamin);
|
||
debug("%-45s : %6d ps\n",
|
||
"Minimum RAS to CAS Delay Time (tRCDmin)", ddr4_trdcmin);
|
||
debug("%-45s : %6d ps\n",
|
||
"Minimum Row Precharge Delay Time (tRPmin)", ddr4_trpmin);
|
||
debug("%-45s : %6d ps\n",
|
||
"Minimum Active to Precharge Delay (tRASmin)",
|
||
ddr4_trasmin);
|
||
debug("%-45s : %6d ps\n",
|
||
"Minimum Active to Active/Refr. Delay (tRCmin)",
|
||
ddr4_trcmin);
|
||
debug("%-45s : %6d ps\n",
|
||
"Minimum Refresh Recovery Delay (tRFC1min)",
|
||
ddr4_trfc1min);
|
||
debug("%-45s : %6d ps\n",
|
||
"Minimum Refresh Recovery Delay (tRFC2min)",
|
||
ddr4_trfc2min);
|
||
debug("%-45s : %6d ps\n",
|
||
"Minimum Refresh Recovery Delay (tRFC4min)",
|
||
ddr4_trfc4min);
|
||
debug("%-45s : %6d ps\n",
|
||
"Minimum Four Activate Window Time (tFAWmin)",
|
||
ddr4_tfawmin);
|
||
debug("%-45s : %6d ps\n",
|
||
"Minimum Act. to Act. Delay (tRRD_Smin)", ddr4_trrd_smin);
|
||
debug("%-45s : %6d ps\n",
|
||
"Minimum Act. to Act. Delay (tRRD_Lmin)", ddr4_trrd_lmin);
|
||
debug("%-45s : %6d ps\n",
|
||
"Minimum CAS to CAS Delay Time (tCCD_Lmin)",
|
||
ddr4_tccd_lmin);
|
||
|
||
#define DDR4_TWR 15000
|
||
#define DDR4_TWTR_S 2500
|
||
|
||
tckmin = ddr4_tckavgmin;
|
||
twr = DDR4_TWR;
|
||
trcd = ddr4_trdcmin;
|
||
trrd = ddr4_trrd_smin;
|
||
trp = ddr4_trpmin;
|
||
tras = ddr4_trasmin;
|
||
trc = ddr4_trcmin;
|
||
trfc = ddr4_trfc1min;
|
||
twtr = DDR4_TWTR_S;
|
||
tfaw = ddr4_tfawmin;
|
||
|
||
if (spd_rdimm) {
|
||
spd_addr_mirror = read_spd(&dimm_config_table[0], 0,
|
||
DDR4_SPD_RDIMM_ADDR_MAPPING_FROM_REGISTER_TO_DRAM) &
|
||
0x1;
|
||
} else {
|
||
spd_addr_mirror = read_spd(&dimm_config_table[0], 0,
|
||
DDR4_SPD_UDIMM_ADDR_MAPPING_FROM_EDGE) & 0x1;
|
||
}
|
||
debug("spd_addr_mirror : %#06x\n", spd_addr_mirror);
|
||
} else {
|
||
spd_mtb_dividend =
|
||
0xff & read_spd(&dimm_config_table[0], 0,
|
||
DDR3_SPD_MEDIUM_TIMEBASE_DIVIDEND);
|
||
spd_mtb_divisor =
|
||
0xff & read_spd(&dimm_config_table[0], 0,
|
||
DDR3_SPD_MEDIUM_TIMEBASE_DIVISOR);
|
||
spd_tck_min =
|
||
0xff & read_spd(&dimm_config_table[0], 0,
|
||
DDR3_SPD_MINIMUM_CYCLE_TIME_TCKMIN);
|
||
spd_taa_min =
|
||
0xff & read_spd(&dimm_config_table[0], 0,
|
||
DDR3_SPD_MIN_CAS_LATENCY_TAAMIN);
|
||
|
||
spd_twr =
|
||
0xff & read_spd(&dimm_config_table[0], 0,
|
||
DDR3_SPD_MIN_WRITE_RECOVERY_TWRMIN);
|
||
spd_trcd =
|
||
0xff & read_spd(&dimm_config_table[0], 0,
|
||
DDR3_SPD_MIN_RAS_CAS_DELAY_TRCDMIN);
|
||
spd_trrd =
|
||
0xff & read_spd(&dimm_config_table[0], 0,
|
||
DDR3_SPD_MIN_ROW_ACTIVE_DELAY_TRRDMIN);
|
||
spd_trp =
|
||
0xff & read_spd(&dimm_config_table[0], 0,
|
||
DDR3_SPD_MIN_ROW_PRECHARGE_DELAY_TRPMIN);
|
||
spd_tras =
|
||
0xff & read_spd(&dimm_config_table[0], 0,
|
||
DDR3_SPD_MIN_ACTIVE_PRECHARGE_LSB_TRASMIN);
|
||
spd_tras |=
|
||
((0xff &
|
||
read_spd(&dimm_config_table[0], 0,
|
||
DDR3_SPD_UPPER_NIBBLES_TRAS_TRC) & 0xf) << 8);
|
||
spd_trc =
|
||
0xff & read_spd(&dimm_config_table[0], 0,
|
||
DDR3_SPD_MIN_ACTIVE_REFRESH_LSB_TRCMIN);
|
||
spd_trc |=
|
||
((0xff &
|
||
read_spd(&dimm_config_table[0], 0,
|
||
DDR3_SPD_UPPER_NIBBLES_TRAS_TRC) & 0xf0) << 4);
|
||
spd_trfc =
|
||
0xff & read_spd(&dimm_config_table[0], 0,
|
||
DDR3_SPD_MIN_REFRESH_RECOVERY_LSB_TRFCMIN);
|
||
spd_trfc |=
|
||
((0xff &
|
||
read_spd(&dimm_config_table[0], 0,
|
||
DDR3_SPD_MIN_REFRESH_RECOVERY_MSB_TRFCMIN)) <<
|
||
8);
|
||
spd_twtr =
|
||
0xff & read_spd(&dimm_config_table[0], 0,
|
||
DDR3_SPD_MIN_INTERNAL_WRITE_READ_CMD_TWTRMIN);
|
||
spd_trtp =
|
||
0xff & read_spd(&dimm_config_table[0], 0,
|
||
DDR3_SPD_MIN_INTERNAL_READ_PRECHARGE_CMD_TRTPMIN);
|
||
spd_tfaw =
|
||
0xff & read_spd(&dimm_config_table[0], 0,
|
||
DDR3_SPD_MIN_FOUR_ACTIVE_WINDOW_TFAWMIN);
|
||
spd_tfaw |=
|
||
((0xff &
|
||
read_spd(&dimm_config_table[0], 0,
|
||
DDR3_SPD_UPPER_NIBBLE_TFAW) & 0xf) << 8);
|
||
spd_addr_mirror =
|
||
0xff & read_spd(&dimm_config_table[0], 0,
|
||
DDR3_SPD_ADDRESS_MAPPING) & 0x1;
|
||
/* Only address mirror unbuffered dimms. */
|
||
spd_addr_mirror = spd_addr_mirror && !spd_rdimm;
|
||
ftb_dividend =
|
||
read_spd(&dimm_config_table[0], 0,
|
||
DDR3_SPD_FINE_TIMEBASE_DIVIDEND_DIVISOR) >> 4;
|
||
ftb_divisor =
|
||
read_spd(&dimm_config_table[0], 0,
|
||
DDR3_SPD_FINE_TIMEBASE_DIVIDEND_DIVISOR) & 0xf;
|
||
/* Make sure that it is not 0 */
|
||
ftb_divisor = (ftb_divisor == 0) ? 1 : ftb_divisor;
|
||
|
||
debug("spd_twr : %#06x\n", spd_twr);
|
||
debug("spd_trcd : %#06x\n", spd_trcd);
|
||
debug("spd_trrd : %#06x\n", spd_trrd);
|
||
debug("spd_trp : %#06x\n", spd_trp);
|
||
debug("spd_tras : %#06x\n", spd_tras);
|
||
debug("spd_trc : %#06x\n", spd_trc);
|
||
debug("spd_trfc : %#06x\n", spd_trfc);
|
||
debug("spd_twtr : %#06x\n", spd_twtr);
|
||
debug("spd_trtp : %#06x\n", spd_trtp);
|
||
debug("spd_tfaw : %#06x\n", spd_tfaw);
|
||
debug("spd_addr_mirror : %#06x\n", spd_addr_mirror);
|
||
|
||
mtb_psec = spd_mtb_dividend * 1000 / spd_mtb_divisor;
|
||
taamin = mtb_psec * spd_taa_min;
|
||
taamin += ftb_dividend *
|
||
(signed char)read_spd(&dimm_config_table[0],
|
||
0, DDR3_SPD_MIN_CAS_LATENCY_FINE_TAAMIN) /
|
||
ftb_divisor;
|
||
tckmin = mtb_psec * spd_tck_min;
|
||
tckmin += ftb_dividend *
|
||
(signed char)read_spd(&dimm_config_table[0],
|
||
0, DDR3_SPD_MINIMUM_CYCLE_TIME_FINE_TCKMIN) /
|
||
ftb_divisor;
|
||
|
||
twr = spd_twr * mtb_psec;
|
||
trcd = spd_trcd * mtb_psec;
|
||
trrd = spd_trrd * mtb_psec;
|
||
trp = spd_trp * mtb_psec;
|
||
tras = spd_tras * mtb_psec;
|
||
trc = spd_trc * mtb_psec;
|
||
trfc = spd_trfc * mtb_psec;
|
||
if (octeon_is_cpuid(OCTEON_CN78XX_PASS2_X) && trfc < 260000) {
|
||
// default to this - because it works...
|
||
int new_trfc = 260000;
|
||
|
||
s = env_get("ddr_trfc");
|
||
if (s) {
|
||
new_trfc = simple_strtoul(s, NULL, 0);
|
||
printf("Parameter found in environment. ddr_trfc = %d\n",
|
||
new_trfc);
|
||
if (new_trfc < 160000 || new_trfc > 260000) {
|
||
// back to default if out of range
|
||
new_trfc = 260000;
|
||
}
|
||
}
|
||
debug("N%d.LMC%d: Adjusting tRFC from %d to %d, for CN78XX Pass 2.x\n",
|
||
node, if_num, trfc, new_trfc);
|
||
trfc = new_trfc;
|
||
}
|
||
|
||
twtr = spd_twtr * mtb_psec;
|
||
trtp = spd_trtp * mtb_psec;
|
||
tfaw = spd_tfaw * mtb_psec;
|
||
|
||
debug("Medium Timebase (MTB) : %6d ps\n",
|
||
mtb_psec);
|
||
debug("Minimum Cycle Time (tckmin) : %6d ps (%ld MT/s)\n",
|
||
tckmin, pretty_psecs_to_mts(tckmin));
|
||
debug("Minimum CAS Latency Time (taamin) : %6d ps\n",
|
||
taamin);
|
||
debug("Write Recovery Time (tWR) : %6d ps\n",
|
||
twr);
|
||
debug("Minimum RAS to CAS delay (tRCD) : %6d ps\n",
|
||
trcd);
|
||
debug("Minimum Row Active to Row Active delay (tRRD) : %6d ps\n",
|
||
trrd);
|
||
debug("Minimum Row Precharge Delay (tRP) : %6d ps\n",
|
||
trp);
|
||
debug("Minimum Active to Precharge (tRAS) : %6d ps\n",
|
||
tras);
|
||
debug("Minimum Active to Active/Refresh Delay (tRC) : %6d ps\n",
|
||
trc);
|
||
debug("Minimum Refresh Recovery Delay (tRFC) : %6d ps\n",
|
||
trfc);
|
||
debug("Internal write to read command delay (tWTR) : %6d ps\n",
|
||
twtr);
|
||
debug("Min Internal Rd to Precharge Cmd Delay (tRTP) : %6d ps\n",
|
||
trtp);
|
||
debug("Minimum Four Activate Window Delay (tFAW) : %6d ps\n",
|
||
tfaw);
|
||
}
|
||
|
||
/*
|
||
* When the cycle time is within 1 psec of the minimum accept it
|
||
* as a slight rounding error and adjust it to exactly the minimum
|
||
* cycle time. This avoids an unnecessary warning.
|
||
*/
|
||
if (abs(tclk_psecs - tckmin) < 2)
|
||
tclk_psecs = tckmin;
|
||
|
||
if (tclk_psecs < (u64)tckmin) {
|
||
printf("WARNING!!!!: DDR Clock Rate (tCLK: %ld) exceeds DIMM specifications (tckmin: %ld)!!!!\n",
|
||
tclk_psecs, (ulong)tckmin);
|
||
}
|
||
|
||
debug("DDR Clock Rate (tCLK) : %6ld ps\n",
|
||
tclk_psecs);
|
||
debug("Core Clock Rate (eCLK) : %6ld ps\n",
|
||
eclk_psecs);
|
||
|
||
s = env_get("ddr_use_ecc");
|
||
if (s) {
|
||
use_ecc = !!simple_strtoul(s, NULL, 0);
|
||
printf("Parameter found in environment. ddr_use_ecc = %d\n",
|
||
use_ecc);
|
||
}
|
||
use_ecc = use_ecc && spd_ecc;
|
||
|
||
if_bytemask = if_64b ? (use_ecc ? 0x1ff : 0xff)
|
||
: (use_ecc ? 0x01f : 0x0f);
|
||
|
||
debug("DRAM Interface width: %d bits %s bytemask 0x%03x\n",
|
||
if_64b ? 64 : 32, use_ecc ? "+ECC" : "", if_bytemask);
|
||
|
||
debug("\n------ Board Custom Configuration Settings ------\n");
|
||
debug("%-45s : %d\n", "MIN_RTT_NOM_IDX ", c_cfg->min_rtt_nom_idx);
|
||
debug("%-45s : %d\n", "MAX_RTT_NOM_IDX ", c_cfg->max_rtt_nom_idx);
|
||
debug("%-45s : %d\n", "MIN_RODT_CTL ", c_cfg->min_rodt_ctl);
|
||
debug("%-45s : %d\n", "MAX_RODT_CTL ", c_cfg->max_rodt_ctl);
|
||
debug("%-45s : %d\n", "MIN_CAS_LATENCY ", c_cfg->min_cas_latency);
|
||
debug("%-45s : %d\n", "OFFSET_EN ", c_cfg->offset_en);
|
||
debug("%-45s : %d\n", "OFFSET_UDIMM ", c_cfg->offset_udimm);
|
||
debug("%-45s : %d\n", "OFFSET_RDIMM ", c_cfg->offset_rdimm);
|
||
debug("%-45s : %d\n", "DDR_RTT_NOM_AUTO ", c_cfg->ddr_rtt_nom_auto);
|
||
debug("%-45s : %d\n", "DDR_RODT_CTL_AUTO ", c_cfg->ddr_rodt_ctl_auto);
|
||
if (spd_rdimm)
|
||
debug("%-45s : %d\n", "RLEVEL_COMP_OFFSET",
|
||
c_cfg->rlevel_comp_offset_rdimm);
|
||
else
|
||
debug("%-45s : %d\n", "RLEVEL_COMP_OFFSET",
|
||
c_cfg->rlevel_comp_offset_udimm);
|
||
debug("%-45s : %d\n", "RLEVEL_COMPUTE ", c_cfg->rlevel_compute);
|
||
debug("%-45s : %d\n", "DDR2T_UDIMM ", c_cfg->ddr2t_udimm);
|
||
debug("%-45s : %d\n", "DDR2T_RDIMM ", c_cfg->ddr2t_rdimm);
|
||
debug("%-45s : %d\n", "FPRCH2 ", c_cfg->fprch2);
|
||
debug("%-45s : %d\n", "PTUNE_OFFSET ", c_cfg->ptune_offset);
|
||
debug("%-45s : %d\n", "NTUNE_OFFSET ", c_cfg->ntune_offset);
|
||
debug("-------------------------------------------------\n");
|
||
|
||
cl = divide_roundup(taamin, tclk_psecs);
|
||
|
||
debug("Desired CAS Latency : %6d\n", cl);
|
||
|
||
min_cas_latency = c_cfg->min_cas_latency;
|
||
|
||
s = lookup_env(priv, "ddr_min_cas_latency");
|
||
if (s)
|
||
min_cas_latency = simple_strtoul(s, NULL, 0);
|
||
|
||
debug("CAS Latencies supported in DIMM :");
|
||
base_cl = (ddr_type == DDR4_DRAM) ? 7 : 4;
|
||
for (i = 0; i < 32; ++i) {
|
||
if ((spd_cas_latency >> i) & 1) {
|
||
debug(" %d", i + base_cl);
|
||
max_cas_latency = i + base_cl;
|
||
if (min_cas_latency == 0)
|
||
min_cas_latency = i + base_cl;
|
||
}
|
||
}
|
||
debug("\n");
|
||
|
||
/*
|
||
* Use relaxed timing when running slower than the minimum
|
||
* supported speed. Adjust timing to match the smallest supported
|
||
* CAS Latency.
|
||
*/
|
||
if (min_cas_latency > cl) {
|
||
ulong adjusted_tclk = taamin / min_cas_latency;
|
||
|
||
cl = min_cas_latency;
|
||
debug("Slow clock speed. Adjusting timing: tClk = %ld, Adjusted tClk = %ld\n",
|
||
tclk_psecs, adjusted_tclk);
|
||
tclk_psecs = adjusted_tclk;
|
||
}
|
||
|
||
s = env_get("ddr_cas_latency");
|
||
if (s) {
|
||
override_cas_latency = simple_strtoul(s, NULL, 0);
|
||
printf("Parameter found in environment. ddr_cas_latency = %d\n",
|
||
override_cas_latency);
|
||
}
|
||
|
||
/* Make sure that the selected cas latency is legal */
|
||
for (i = (cl - base_cl); i < 32; ++i) {
|
||
if ((spd_cas_latency >> i) & 1) {
|
||
cl = i + base_cl;
|
||
break;
|
||
}
|
||
}
|
||
|
||
if (max_cas_latency < cl)
|
||
cl = max_cas_latency;
|
||
|
||
if (override_cas_latency != 0)
|
||
cl = override_cas_latency;
|
||
|
||
debug("CAS Latency : %6d\n", cl);
|
||
|
||
if ((cl * tckmin) > 20000) {
|
||
debug("(CLactual * tckmin) = %d exceeds 20 ns\n",
|
||
(cl * tckmin));
|
||
}
|
||
|
||
if (tclk_psecs < (ulong)tckmin) {
|
||
printf("WARNING!!!!!!: DDR3 Clock Rate (tCLK: %ld) exceeds DIMM specifications (tckmin:%ld)!!!!!!!!\n",
|
||
tclk_psecs, (ulong)tckmin);
|
||
}
|
||
|
||
if (num_banks != 4 && num_banks != 8 && num_banks != 16) {
|
||
printf("Unsupported number of banks %d. Must be 4 or 8.\n",
|
||
num_banks);
|
||
++fatal_error;
|
||
}
|
||
|
||
if (num_ranks != 1 && num_ranks != 2 && num_ranks != 4) {
|
||
printf("Unsupported number of ranks: %d\n", num_ranks);
|
||
++fatal_error;
|
||
}
|
||
|
||
if (octeon_is_cpuid(OCTEON_CN78XX) ||
|
||
octeon_is_cpuid(OCTEON_CN73XX) ||
|
||
octeon_is_cpuid(OCTEON_CNF75XX)) {
|
||
if (dram_width != 8 && dram_width != 16 && dram_width != 4) {
|
||
printf("Unsupported SDRAM Width, %d. Must be 4, 8 or 16.\n",
|
||
dram_width);
|
||
++fatal_error;
|
||
}
|
||
} else if (dram_width != 8 && dram_width != 16) {
|
||
printf("Unsupported SDRAM Width, %d. Must be 8 or 16.\n",
|
||
dram_width);
|
||
++fatal_error;
|
||
}
|
||
|
||
/*
|
||
** Bail out here if things are not copasetic.
|
||
*/
|
||
if (fatal_error)
|
||
return (-1);
|
||
|
||
/*
|
||
* 4.8.4 LMC RESET Initialization
|
||
*
|
||
* The purpose of this step is to assert/deassert the RESET# pin at the
|
||
* DDR3/DDR4 parts.
|
||
*
|
||
* This LMC RESET step is done for all enabled LMCs.
|
||
*/
|
||
perform_lmc_reset(priv, node, if_num);
|
||
|
||
// Make sure scrambling is disabled during init...
|
||
ctrl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
|
||
ctrl.s.scramble_ena = 0;
|
||
lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctrl.u64);
|
||
|
||
lmc_wr(priv, CVMX_LMCX_SCRAMBLE_CFG0(if_num), 0);
|
||
lmc_wr(priv, CVMX_LMCX_SCRAMBLE_CFG1(if_num), 0);
|
||
if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X))
|
||
lmc_wr(priv, CVMX_LMCX_SCRAMBLE_CFG2(if_num), 0);
|
||
|
||
odt_idx = min(dimm_count - 1, 3);
|
||
|
||
switch (num_ranks) {
|
||
case 1:
|
||
odt_config = odt_1rank_config;
|
||
break;
|
||
case 2:
|
||
odt_config = odt_2rank_config;
|
||
break;
|
||
case 4:
|
||
odt_config = odt_4rank_config;
|
||
break;
|
||
default:
|
||
odt_config = disable_odt_config;
|
||
printf("Unsupported number of ranks: %d\n", num_ranks);
|
||
++fatal_error;
|
||
}
|
||
|
||
/*
|
||
* 4.8.5 Early LMC Initialization
|
||
*
|
||
* All of DDR PLL, LMC CK, and LMC DRESET initializations must be
|
||
* completed prior to starting this LMC initialization sequence.
|
||
*
|
||
* Perform the following five substeps for early LMC initialization:
|
||
*
|
||
* 1. Software must ensure there are no pending DRAM transactions.
|
||
*
|
||
* 2. Write LMC(0)_CONFIG, LMC(0)_CONTROL, LMC(0)_TIMING_PARAMS0,
|
||
* LMC(0)_TIMING_PARAMS1, LMC(0)_MODEREG_PARAMS0,
|
||
* LMC(0)_MODEREG_PARAMS1, LMC(0)_DUAL_MEMCFG, LMC(0)_NXM,
|
||
* LMC(0)_WODT_MASK, LMC(0)_RODT_MASK, LMC(0)_COMP_CTL2,
|
||
* LMC(0)_PHY_CTL, LMC(0)_DIMM0/1_PARAMS, and LMC(0)_DIMM_CTL with
|
||
* appropriate values. All sections in this chapter can be used to
|
||
* derive proper register settings.
|
||
*/
|
||
|
||
/* LMC(0)_CONFIG */
|
||
lmc_config(priv);
|
||
|
||
/* LMC(0)_CONTROL */
|
||
lmc_control(priv);
|
||
|
||
/* LMC(0)_TIMING_PARAMS0 */
|
||
lmc_timing_params0(priv);
|
||
|
||
/* LMC(0)_TIMING_PARAMS1 */
|
||
lmc_timing_params1(priv);
|
||
|
||
/* LMC(0)_TIMING_PARAMS2 */
|
||
lmc_timing_params2(priv);
|
||
|
||
/* LMC(0)_MODEREG_PARAMS0 */
|
||
lmc_modereg_params0(priv);
|
||
|
||
/* LMC(0)_MODEREG_PARAMS1 */
|
||
lmc_modereg_params1(priv);
|
||
|
||
/* LMC(0)_MODEREG_PARAMS2 */
|
||
lmc_modereg_params2(priv);
|
||
|
||
/* LMC(0)_MODEREG_PARAMS3 */
|
||
lmc_modereg_params3(priv);
|
||
|
||
/* LMC(0)_NXM */
|
||
lmc_nxm(priv);
|
||
|
||
/* LMC(0)_WODT_MASK */
|
||
lmc_wodt_mask(priv);
|
||
|
||
/* LMC(0)_RODT_MASK */
|
||
lmc_rodt_mask(priv);
|
||
|
||
/* LMC(0)_COMP_CTL2 */
|
||
lmc_comp_ctl2(priv);
|
||
|
||
/* LMC(0)_PHY_CTL */
|
||
lmc_phy_ctl(priv);
|
||
|
||
/* LMC(0)_EXT_CONFIG */
|
||
lmc_ext_config(priv);
|
||
|
||
/* LMC(0)_EXT_CONFIG2 */
|
||
lmc_ext_config2(priv);
|
||
|
||
/* LMC(0)_DIMM0/1_PARAMS */
|
||
lmc_dimm01_params(priv);
|
||
|
||
ret = lmc_rank_init(priv);
|
||
if (ret < 0)
|
||
return 0; /* 0 indicates problem */
|
||
|
||
lmc_config_2(priv);
|
||
|
||
lmc_write_leveling(priv);
|
||
|
||
lmc_read_leveling(priv);
|
||
|
||
lmc_workaround(priv);
|
||
|
||
ret = lmc_sw_write_leveling(priv);
|
||
if (ret < 0)
|
||
return 0; /* 0 indicates problem */
|
||
|
||
// this sometimes causes stack overflow crashes..
|
||
// display only for DDR4 RDIMMs.
|
||
if (ddr_type == DDR4_DRAM && spd_rdimm) {
|
||
int i;
|
||
|
||
for (i = 0; i < 3; i += 2) // just pages 0 and 2 for now..
|
||
display_mpr_page(priv, rank_mask, if_num, i);
|
||
}
|
||
|
||
lmc_dll(priv);
|
||
|
||
lmc_workaround_2(priv);
|
||
|
||
lmc_final(priv);
|
||
|
||
lmc_scrambling(priv);
|
||
|
||
return mem_size_mbytes;
|
||
}
|
||
|
||
///// HW-assist byte DLL offset tuning //////
|
||
|
||
static int cvmx_dram_get_num_lmc(struct ddr_priv *priv)
|
||
{
|
||
union cvmx_lmcx_dll_ctl2 lmcx_dll_ctl2;
|
||
|
||
if (octeon_is_cpuid(OCTEON_CN70XX))
|
||
return 1;
|
||
|
||
if (octeon_is_cpuid(OCTEON_CN73XX) || octeon_is_cpuid(OCTEON_CNF75XX)) {
|
||
// sample LMC1
|
||
lmcx_dll_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL2(1));
|
||
if (lmcx_dll_ctl2.cn78xx.intf_en)
|
||
return 2;
|
||
else
|
||
return 1;
|
||
}
|
||
|
||
// for CN78XX, LMCs are always active in pairs, and always LMC0/1
|
||
// so, we sample LMC2 to see if 2 and 3 are active
|
||
lmcx_dll_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL2(2));
|
||
if (lmcx_dll_ctl2.cn78xx.intf_en)
|
||
return 4;
|
||
else
|
||
return 2;
|
||
}
|
||
|
||
// got to do these here, even though already defined in BDK
|
||
|
||
// all DDR3, and DDR4 x16 today, use only 3 bank bits;
|
||
// DDR4 x4 and x8 always have 4 bank bits
|
||
// NOTE: this will change in the future, when DDR4 x16 devices can
|
||
// come with 16 banks!! FIXME!!
|
||
static int cvmx_dram_get_num_bank_bits(struct ddr_priv *priv, int lmc)
|
||
{
|
||
union cvmx_lmcx_dll_ctl2 lmcx_dll_ctl2;
|
||
union cvmx_lmcx_config lmcx_config;
|
||
union cvmx_lmcx_ddr_pll_ctl lmcx_ddr_pll_ctl;
|
||
int bank_width;
|
||
|
||
// can always read this
|
||
lmcx_dll_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL2(lmc));
|
||
|
||
if (lmcx_dll_ctl2.cn78xx.dreset) // check LMCn
|
||
return 0;
|
||
|
||
lmcx_config.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL2(lmc));
|
||
lmcx_ddr_pll_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(lmc));
|
||
|
||
bank_width = ((lmcx_ddr_pll_ctl.s.ddr4_mode != 0) &&
|
||
(lmcx_config.s.bg2_enable)) ? 4 : 3;
|
||
|
||
return bank_width;
|
||
}
|
||
|
||
#define EXTRACT(v, lsb, width) (((v) >> (lsb)) & ((1ull << (width)) - 1))
|
||
#define ADDRESS_HOLE 0x10000000ULL
|
||
|
||
static void cvmx_dram_address_extract_info(struct ddr_priv *priv, u64 address,
|
||
int *node, int *lmc, int *dimm,
|
||
int *prank, int *lrank, int *bank,
|
||
int *row, int *col)
|
||
{
|
||
int bank_lsb, xbits;
|
||
union cvmx_l2c_ctl l2c_ctl;
|
||
union cvmx_lmcx_config lmcx_config;
|
||
union cvmx_lmcx_control lmcx_control;
|
||
union cvmx_lmcx_ext_config ext_config;
|
||
int bitno = (octeon_is_cpuid(OCTEON_CN7XXX)) ? 20 : 18;
|
||
int bank_width;
|
||
int dimm_lsb;
|
||
int dimm_width;
|
||
int prank_lsb, lrank_lsb;
|
||
int prank_width, lrank_width;
|
||
int row_lsb;
|
||
int row_width;
|
||
int col_hi_lsb;
|
||
int col_hi_width;
|
||
int col_hi;
|
||
|
||
if (octeon_is_cpuid(OCTEON_CN73XX) || octeon_is_cpuid(OCTEON_CNF75XX))
|
||
bitno = 18;
|
||
|
||
*node = EXTRACT(address, 40, 2); /* Address bits [41:40] */
|
||
|
||
address &= (1ULL << 40) - 1; // lop off any node bits or above
|
||
if (address >= ADDRESS_HOLE) // adjust down if at HOLE or above
|
||
address -= ADDRESS_HOLE;
|
||
|
||
/* Determine the LMC controllers */
|
||
l2c_ctl.u64 = l2c_rd(priv, CVMX_L2C_CTL);
|
||
|
||
/* xbits depends on number of LMCs */
|
||
xbits = cvmx_dram_get_num_lmc(priv) >> 1; // 4->2, 2->1, 1->0
|
||
bank_lsb = 7 + xbits;
|
||
|
||
/* LMC number is probably aliased */
|
||
if (l2c_ctl.s.disidxalias) {
|
||
*lmc = EXTRACT(address, 7, xbits);
|
||
} else {
|
||
*lmc = EXTRACT(address, 7, xbits) ^
|
||
EXTRACT(address, bitno, xbits) ^
|
||
EXTRACT(address, 12, xbits);
|
||
}
|
||
|
||
/* Figure out the bank field width */
|
||
lmcx_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(*lmc));
|
||
ext_config.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG(*lmc));
|
||
bank_width = cvmx_dram_get_num_bank_bits(priv, *lmc);
|
||
|
||
/* Extract additional info from the LMC_CONFIG CSR */
|
||
dimm_lsb = 28 + lmcx_config.s.pbank_lsb + xbits;
|
||
dimm_width = 40 - dimm_lsb;
|
||
prank_lsb = dimm_lsb - lmcx_config.s.rank_ena;
|
||
prank_width = dimm_lsb - prank_lsb;
|
||
lrank_lsb = prank_lsb - ext_config.s.dimm0_cid;
|
||
lrank_width = prank_lsb - lrank_lsb;
|
||
row_lsb = 14 + lmcx_config.s.row_lsb + xbits;
|
||
row_width = lrank_lsb - row_lsb;
|
||
col_hi_lsb = bank_lsb + bank_width;
|
||
col_hi_width = row_lsb - col_hi_lsb;
|
||
|
||
/* Extract the parts of the address */
|
||
*dimm = EXTRACT(address, dimm_lsb, dimm_width);
|
||
*prank = EXTRACT(address, prank_lsb, prank_width);
|
||
*lrank = EXTRACT(address, lrank_lsb, lrank_width);
|
||
*row = EXTRACT(address, row_lsb, row_width);
|
||
|
||
/* bank calculation may be aliased... */
|
||
lmcx_control.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(*lmc));
|
||
if (lmcx_control.s.xor_bank) {
|
||
*bank = EXTRACT(address, bank_lsb, bank_width) ^
|
||
EXTRACT(address, 12 + xbits, bank_width);
|
||
} else {
|
||
*bank = EXTRACT(address, bank_lsb, bank_width);
|
||
}
|
||
|
||
/* LMC number already extracted */
|
||
col_hi = EXTRACT(address, col_hi_lsb, col_hi_width);
|
||
*col = EXTRACT(address, 3, 4) | (col_hi << 4);
|
||
/* Bus byte is address bits [2:0]. Unused here */
|
||
}
|
||
|
||
// end of added workarounds
|
||
|
||
// NOTE: "mode" argument:
|
||
// DBTRAIN_TEST: for testing using GP patterns, includes ECC
|
||
// DBTRAIN_DBI: for DBI deskew training behavior (uses GP patterns)
|
||
// DBTRAIN_LFSR: for testing using LFSR patterns, includes ECC
|
||
// NOTE: trust the caller to specify the correct/supported mode
|
||
//
|
||
static int test_dram_byte_hw(struct ddr_priv *priv, int if_num, u64 p,
|
||
int mode, u64 *xor_data)
|
||
{
|
||
u64 p1;
|
||
u64 k;
|
||
int errors = 0;
|
||
|
||
u64 mpr_data0, mpr_data1;
|
||
u64 bad_bits[2] = { 0, 0 };
|
||
|
||
int node_address, lmc, dimm;
|
||
int prank, lrank;
|
||
int bank, row, col;
|
||
int save_or_dis;
|
||
int byte;
|
||
int ba_loop, ba_bits;
|
||
|
||
union cvmx_lmcx_rlevel_ctl rlevel_ctl;
|
||
union cvmx_lmcx_dbtrain_ctl dbtrain_ctl;
|
||
union cvmx_lmcx_phy_ctl phy_ctl;
|
||
|
||
int biter_errs;
|
||
|
||
// FIXME: K iterations set to 4 for now.
|
||
// FIXME: decrement to increase interations.
|
||
// FIXME: must be no less than 22 to stay above an LMC hash field.
|
||
int kshift = 27;
|
||
|
||
const char *s;
|
||
int node = 0;
|
||
|
||
// allow override default setting for kshift
|
||
s = env_get("ddr_tune_set_kshift");
|
||
if (s) {
|
||
int temp = simple_strtoul(s, NULL, 0);
|
||
|
||
if (temp < 22 || temp > 28) {
|
||
debug("N%d.LMC%d: ILLEGAL override of kshift to %d, using default %d\n",
|
||
node, if_num, temp, kshift);
|
||
} else {
|
||
debug("N%d.LMC%d: overriding kshift (%d) to %d\n",
|
||
node, if_num, kshift, temp);
|
||
kshift = temp;
|
||
}
|
||
}
|
||
|
||
/*
|
||
* 1) Make sure that RLEVEL_CTL[OR_DIS] = 0.
|
||
*/
|
||
rlevel_ctl.u64 = lmc_rd(priv, CVMX_LMCX_RLEVEL_CTL(if_num));
|
||
save_or_dis = rlevel_ctl.s.or_dis;
|
||
/* or_dis must be disabled for this sequence */
|
||
rlevel_ctl.s.or_dis = 0;
|
||
lmc_wr(priv, CVMX_LMCX_RLEVEL_CTL(if_num), rlevel_ctl.u64);
|
||
|
||
/*
|
||
* NOTE: this step done in the calling routine(s)...
|
||
* 3) Setup GENERAL_PURPOSE[0-2] registers with the data pattern
|
||
* of choice.
|
||
* a. GENERAL_PURPOSE0[DATA<63:0>] – sets the initial lower
|
||
* (rising edge) 64 bits of data.
|
||
* b. GENERAL_PURPOSE1[DATA<63:0>] – sets the initial upper
|
||
* (falling edge) 64 bits of data.
|
||
* c. GENERAL_PURPOSE2[DATA<15:0>] – sets the initial lower
|
||
* (rising edge <7:0>) and upper (falling edge <15:8>) ECC data.
|
||
*/
|
||
|
||
// final address must include LMC and node
|
||
p |= (if_num << 7); /* Map address into proper interface */
|
||
p |= (u64)node << CVMX_NODE_MEM_SHIFT; // map to node
|
||
|
||
/*
|
||
* Add base offset to both test regions to not clobber u-boot stuff
|
||
* when running from L2 for NAND boot.
|
||
*/
|
||
p += 0x20000000; // offset to 512MB, ie above THE HOLE!!!
|
||
p |= 1ull << 63; // needed for OCTEON
|
||
|
||
errors = 0;
|
||
|
||
cvmx_dram_address_extract_info(priv, p, &node_address, &lmc, &dimm,
|
||
&prank, &lrank, &bank, &row, &col);
|
||
debug("%s: START at A:0x%012llx, N%d L%d D%d/%d R%d B%1x Row:%05x Col:%05x\n",
|
||
__func__, p, node_address, lmc, dimm, prank, lrank, bank,
|
||
row, col);
|
||
|
||
// only check once per call, and ignore if no match...
|
||
if ((int)node != node_address) {
|
||
printf("ERROR: Node address mismatch\n");
|
||
return 0;
|
||
}
|
||
if (lmc != if_num) {
|
||
printf("ERROR: LMC address mismatch\n");
|
||
return 0;
|
||
}
|
||
|
||
/*
|
||
* 7) Set PHY_CTL[PHY_RESET] = 1 (LMC automatically clears this as
|
||
* it’s a one-shot operation). This is to get into the habit of
|
||
* resetting PHY’s SILO to the original 0 location.
|
||
*/
|
||
phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
|
||
phy_ctl.s.phy_reset = 1;
|
||
lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
|
||
|
||
/*
|
||
* Walk through a range of addresses avoiding bits that alias
|
||
* interfaces on the CN88XX.
|
||
*/
|
||
|
||
// FIXME: want to try to keep the K increment from affecting the
|
||
// LMC via hash, so keep it above bit 21 we also want to keep k
|
||
// less than the base offset of bit 29 (512MB)
|
||
|
||
for (k = 0; k < (1UL << 29); k += (1UL << kshift)) {
|
||
// FIXME: the sequence will interate over 1/2 cacheline
|
||
// FIXME: for each unit specified in "read_cmd_count",
|
||
// FIXME: so, we setup each sequence to do the max cachelines
|
||
// it can
|
||
|
||
p1 = p + k;
|
||
|
||
cvmx_dram_address_extract_info(priv, p1, &node_address, &lmc,
|
||
&dimm, &prank, &lrank, &bank,
|
||
&row, &col);
|
||
|
||
/*
|
||
* 2) Setup the fields of the CSR DBTRAIN_CTL as follows:
|
||
* a. COL, ROW, BA, BG, PRANK points to the starting point
|
||
* of the address.
|
||
* You can just set them to all 0.
|
||
* b. RW_TRAIN – set this to 1.
|
||
* c. TCCD_L – set this to 0.
|
||
* d. READ_CMD_COUNT – instruct the sequence to the how many
|
||
* writes/reads.
|
||
* It is 5 bits field, so set to 31 of maximum # of r/w.
|
||
*/
|
||
dbtrain_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DBTRAIN_CTL(if_num));
|
||
dbtrain_ctl.s.column_a = col;
|
||
dbtrain_ctl.s.row_a = row;
|
||
dbtrain_ctl.s.bg = (bank >> 2) & 3;
|
||
dbtrain_ctl.s.prank = (dimm * 2) + prank; // FIXME?
|
||
dbtrain_ctl.s.lrank = lrank; // FIXME?
|
||
dbtrain_ctl.s.activate = (mode == DBTRAIN_DBI);
|
||
dbtrain_ctl.s.write_ena = 1;
|
||
dbtrain_ctl.s.read_cmd_count = 31; // max count pass 1.x
|
||
if (octeon_is_cpuid(OCTEON_CN78XX_PASS2_X) ||
|
||
octeon_is_cpuid(OCTEON_CNF75XX)) {
|
||
// max count on chips that support it
|
||
dbtrain_ctl.s.cmd_count_ext = 3;
|
||
} else {
|
||
// max count pass 1.x
|
||
dbtrain_ctl.s.cmd_count_ext = 0;
|
||
}
|
||
|
||
dbtrain_ctl.s.rw_train = 1;
|
||
dbtrain_ctl.s.tccd_sel = (mode == DBTRAIN_DBI);
|
||
// LFSR should only be on when chip supports it...
|
||
dbtrain_ctl.s.lfsr_pattern_sel = (mode == DBTRAIN_LFSR) ? 1 : 0;
|
||
|
||
biter_errs = 0;
|
||
|
||
// for each address, iterate over the 4 "banks" in the BA
|
||
for (ba_loop = 0, ba_bits = bank & 3;
|
||
ba_loop < 4; ba_loop++, ba_bits = (ba_bits + 1) & 3) {
|
||
dbtrain_ctl.s.ba = ba_bits;
|
||
lmc_wr(priv, CVMX_LMCX_DBTRAIN_CTL(if_num),
|
||
dbtrain_ctl.u64);
|
||
|
||
/*
|
||
* We will use the RW_TRAINING sequence (14) for
|
||
* this task.
|
||
*
|
||
* 4) Kick off the sequence (SEQ_CTL[SEQ_SEL] = 14,
|
||
* SEQ_CTL[INIT_START] = 1).
|
||
* 5) Poll on SEQ_CTL[SEQ_COMPLETE] for completion.
|
||
*/
|
||
oct3_ddr3_seq(priv, prank, if_num, 14);
|
||
|
||
/*
|
||
* 6) Read MPR_DATA0 and MPR_DATA1 for results.
|
||
* a. MPR_DATA0[MPR_DATA<63:0>] – comparison results
|
||
* for DQ63:DQ0. (1 means MATCH, 0 means FAIL).
|
||
* b. MPR_DATA1[MPR_DATA<7:0>] – comparison results
|
||
* for ECC bit7:0.
|
||
*/
|
||
mpr_data0 = lmc_rd(priv, CVMX_LMCX_MPR_DATA0(if_num));
|
||
mpr_data1 = lmc_rd(priv, CVMX_LMCX_MPR_DATA1(if_num));
|
||
|
||
/*
|
||
* 7) Set PHY_CTL[PHY_RESET] = 1 (LMC automatically
|
||
* clears this as it’s a one-shot operation).
|
||
* This is to get into the habit of resetting PHY’s
|
||
* SILO to the original 0 location.
|
||
*/
|
||
phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
|
||
phy_ctl.s.phy_reset = 1;
|
||
lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
|
||
|
||
// bypass any error checking or updating when DBI mode
|
||
if (mode == DBTRAIN_DBI)
|
||
continue;
|
||
|
||
// data bytes
|
||
if (~mpr_data0) {
|
||
for (byte = 0; byte < 8; byte++) {
|
||
if ((~mpr_data0 >> (8 * byte)) & 0xffUL)
|
||
biter_errs |= (1 << byte);
|
||
}
|
||
// accumulate bad bits
|
||
bad_bits[0] |= ~mpr_data0;
|
||
}
|
||
|
||
// include ECC byte errors
|
||
if (~mpr_data1 & 0xffUL) {
|
||
biter_errs |= (1 << 8);
|
||
bad_bits[1] |= ~mpr_data1 & 0xffUL;
|
||
}
|
||
}
|
||
|
||
errors |= biter_errs;
|
||
} /* end for (k=...) */
|
||
|
||
rlevel_ctl.s.or_dis = save_or_dis;
|
||
lmc_wr(priv, CVMX_LMCX_RLEVEL_CTL(if_num), rlevel_ctl.u64);
|
||
|
||
// send the bad bits back...
|
||
if (mode != DBTRAIN_DBI && xor_data) {
|
||
xor_data[0] = bad_bits[0];
|
||
xor_data[1] = bad_bits[1];
|
||
}
|
||
|
||
return errors;
|
||
}
|
||
|
||
// setup default for byte test pattern array
|
||
// take these from the HRM section 6.9.13
|
||
static const u64 byte_pattern_0[] = {
|
||
0xFFAAFFFFFF55FFFFULL, // GP0
|
||
0x55555555AAAAAAAAULL, // GP1
|
||
0xAA55AAAAULL, // GP2
|
||
};
|
||
|
||
static const u64 byte_pattern_1[] = {
|
||
0xFBF7EFDFBF7FFEFDULL, // GP0
|
||
0x0F1E3C78F0E1C387ULL, // GP1
|
||
0xF0E1BF7FULL, // GP2
|
||
};
|
||
|
||
// this is from Andrew via LFSR with PRBS=0xFFFFAAAA
|
||
static const u64 byte_pattern_2[] = {
|
||
0xEE55AADDEE55AADDULL, // GP0
|
||
0x55AADDEE55AADDEEULL, // GP1
|
||
0x55EEULL, // GP2
|
||
};
|
||
|
||
// this is from Mike via LFSR with PRBS=0x4A519909
|
||
static const u64 byte_pattern_3[] = {
|
||
0x0088CCEE0088CCEEULL, // GP0
|
||
0xBB552211BB552211ULL, // GP1
|
||
0xBB00ULL, // GP2
|
||
};
|
||
|
||
static const u64 *byte_patterns[4] = {
|
||
byte_pattern_0, byte_pattern_1, byte_pattern_2, byte_pattern_3
|
||
};
|
||
|
||
static const u32 lfsr_patterns[4] = {
|
||
0xFFFFAAAAUL, 0x06000000UL, 0xAAAAFFFFUL, 0x4A519909UL
|
||
};
|
||
|
||
#define NUM_BYTE_PATTERNS 4
|
||
|
||
#define DEFAULT_BYTE_BURSTS 32 // compromise between time and rigor
|
||
|
||
static void setup_hw_pattern(struct ddr_priv *priv, int lmc,
|
||
const u64 *pattern_p)
|
||
{
|
||
/*
|
||
* 3) Setup GENERAL_PURPOSE[0-2] registers with the data pattern
|
||
* of choice.
|
||
* a. GENERAL_PURPOSE0[DATA<63:0>] â sets the initial lower
|
||
* (rising edge) 64 bits of data.
|
||
* b. GENERAL_PURPOSE1[DATA<63:0>] â sets the initial upper
|
||
* (falling edge) 64 bits of data.
|
||
* c. GENERAL_PURPOSE2[DATA<15:0>] â sets the initial lower
|
||
* (rising edge <7:0>) and upper
|
||
* (falling edge <15:8>) ECC data.
|
||
*/
|
||
lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE0(lmc), pattern_p[0]);
|
||
lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE1(lmc), pattern_p[1]);
|
||
lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE2(lmc), pattern_p[2]);
|
||
}
|
||
|
||
static void setup_lfsr_pattern(struct ddr_priv *priv, int lmc, u32 data)
|
||
{
|
||
union cvmx_lmcx_char_ctl char_ctl;
|
||
u32 prbs;
|
||
const char *s;
|
||
|
||
s = env_get("ddr_lfsr_prbs");
|
||
if (s)
|
||
prbs = simple_strtoul(s, NULL, 0);
|
||
else
|
||
prbs = data;
|
||
|
||
/*
|
||
* 2) DBTRAIN_CTL[LFSR_PATTERN_SEL] = 1
|
||
* here data comes from the LFSR generating a PRBS pattern
|
||
* CHAR_CTL.EN = 0
|
||
* CHAR_CTL.SEL = 0; // for PRBS
|
||
* CHAR_CTL.DR = 1;
|
||
* CHAR_CTL.PRBS = setup for whatever type of PRBS to send
|
||
* CHAR_CTL.SKEW_ON = 1;
|
||
*/
|
||
char_ctl.u64 = lmc_rd(priv, CVMX_LMCX_CHAR_CTL(lmc));
|
||
char_ctl.s.en = 0;
|
||
char_ctl.s.sel = 0;
|
||
char_ctl.s.dr = 1;
|
||
char_ctl.s.prbs = prbs;
|
||
char_ctl.s.skew_on = 1;
|
||
lmc_wr(priv, CVMX_LMCX_CHAR_CTL(lmc), char_ctl.u64);
|
||
}
|
||
|
||
static int choose_best_hw_patterns(int lmc, int mode)
|
||
{
|
||
int new_mode = mode;
|
||
const char *s;
|
||
|
||
switch (mode) {
|
||
case DBTRAIN_TEST: // always choose LFSR if chip supports it
|
||
if (octeon_is_cpuid(OCTEON_CN78XX_PASS2_X)) {
|
||
int lfsr_enable = 1;
|
||
|
||
s = env_get("ddr_allow_lfsr");
|
||
if (s) {
|
||
// override?
|
||
lfsr_enable = !!strtoul(s, NULL, 0);
|
||
}
|
||
|
||
if (lfsr_enable)
|
||
new_mode = DBTRAIN_LFSR;
|
||
}
|
||
break;
|
||
|
||
case DBTRAIN_DBI: // possibly can allow LFSR use?
|
||
break;
|
||
|
||
case DBTRAIN_LFSR: // forced already
|
||
if (!octeon_is_cpuid(OCTEON_CN78XX_PASS2_X)) {
|
||
debug("ERROR: illegal HW assist mode %d\n", mode);
|
||
new_mode = DBTRAIN_TEST;
|
||
}
|
||
break;
|
||
|
||
default:
|
||
debug("ERROR: unknown HW assist mode %d\n", mode);
|
||
}
|
||
|
||
if (new_mode != mode)
|
||
debug("%s: changing mode %d to %d\n", __func__, mode, new_mode);
|
||
|
||
return new_mode;
|
||
}
|
||
|
||
int run_best_hw_patterns(struct ddr_priv *priv, int lmc, u64 phys_addr,
|
||
int mode, u64 *xor_data)
|
||
{
|
||
int pattern;
|
||
const u64 *pattern_p;
|
||
int errs, errors = 0;
|
||
|
||
// FIXME? always choose LFSR if chip supports it???
|
||
mode = choose_best_hw_patterns(lmc, mode);
|
||
|
||
for (pattern = 0; pattern < NUM_BYTE_PATTERNS; pattern++) {
|
||
if (mode == DBTRAIN_LFSR) {
|
||
setup_lfsr_pattern(priv, lmc, lfsr_patterns[pattern]);
|
||
} else {
|
||
pattern_p = byte_patterns[pattern];
|
||
setup_hw_pattern(priv, lmc, pattern_p);
|
||
}
|
||
errs = test_dram_byte_hw(priv, lmc, phys_addr, mode, xor_data);
|
||
|
||
debug("%s: PATTERN %d at A:0x%012llx errors 0x%x\n",
|
||
__func__, pattern, phys_addr, errs);
|
||
|
||
errors |= errs;
|
||
}
|
||
|
||
return errors;
|
||
}
|
||
|
||
static void hw_assist_test_dll_offset(struct ddr_priv *priv,
|
||
int dll_offset_mode, int lmc,
|
||
int bytelane,
|
||
int if_64b,
|
||
u64 dram_tune_rank_offset,
|
||
int dram_tune_byte_bursts)
|
||
{
|
||
int byte_offset, new_best_offset[9];
|
||
int rank_delay_start[4][9];
|
||
int rank_delay_count[4][9];
|
||
int rank_delay_best_start[4][9];
|
||
int rank_delay_best_count[4][9];
|
||
int errors[4], off_errors, tot_errors;
|
||
int rank_mask, rankx, active_ranks;
|
||
int pattern;
|
||
const u64 *pattern_p;
|
||
int byte;
|
||
char *mode_str = (dll_offset_mode == 2) ? "Read" : "Write";
|
||
int pat_best_offset[9];
|
||
u64 phys_addr;
|
||
int pat_beg, pat_end;
|
||
int rank_beg, rank_end;
|
||
int byte_lo, byte_hi;
|
||
union cvmx_lmcx_config lmcx_config;
|
||
u64 hw_rank_offset;
|
||
int num_lmcs = cvmx_dram_get_num_lmc(priv);
|
||
// FIXME? always choose LFSR if chip supports it???
|
||
int mode = choose_best_hw_patterns(lmc, DBTRAIN_TEST);
|
||
int node = 0;
|
||
|
||
if (bytelane == 0x0A) { // all bytelanes
|
||
byte_lo = 0;
|
||
byte_hi = 8;
|
||
} else { // just 1
|
||
byte_lo = bytelane;
|
||
byte_hi = bytelane;
|
||
}
|
||
|
||
lmcx_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(lmc));
|
||
rank_mask = lmcx_config.s.init_status;
|
||
|
||
// this should be correct for 1 or 2 ranks, 1 or 2 DIMMs
|
||
hw_rank_offset =
|
||
1ull << (28 + lmcx_config.s.pbank_lsb - lmcx_config.s.rank_ena +
|
||
(num_lmcs / 2));
|
||
|
||
debug("N%d: %s: starting LMC%d with rank offset 0x%016llx\n",
|
||
node, __func__, lmc, (unsigned long long)hw_rank_offset);
|
||
|
||
// start of pattern loop
|
||
// we do the set of tests for each pattern supplied...
|
||
|
||
memset(new_best_offset, 0, sizeof(new_best_offset));
|
||
for (pattern = 0; pattern < NUM_BYTE_PATTERNS; pattern++) {
|
||
memset(pat_best_offset, 0, sizeof(pat_best_offset));
|
||
|
||
if (mode == DBTRAIN_TEST) {
|
||
pattern_p = byte_patterns[pattern];
|
||
setup_hw_pattern(priv, lmc, pattern_p);
|
||
} else {
|
||
setup_lfsr_pattern(priv, lmc, lfsr_patterns[pattern]);
|
||
}
|
||
|
||
// now loop through all legal values for the DLL byte offset...
|
||
|
||
#define BYTE_OFFSET_INCR 3 // FIXME: make this tunable?
|
||
|
||
tot_errors = 0;
|
||
|
||
memset(rank_delay_count, 0, sizeof(rank_delay_count));
|
||
memset(rank_delay_start, 0, sizeof(rank_delay_start));
|
||
memset(rank_delay_best_count, 0, sizeof(rank_delay_best_count));
|
||
memset(rank_delay_best_start, 0, sizeof(rank_delay_best_start));
|
||
|
||
for (byte_offset = -63; byte_offset < 64;
|
||
byte_offset += BYTE_OFFSET_INCR) {
|
||
// do the setup on the active LMC
|
||
// set the bytelanes DLL offsets
|
||
change_dll_offset_enable(priv, lmc, 0);
|
||
// FIXME? bytelane?
|
||
load_dll_offset(priv, lmc, dll_offset_mode,
|
||
byte_offset, bytelane);
|
||
change_dll_offset_enable(priv, lmc, 1);
|
||
|
||
//bdk_watchdog_poke();
|
||
|
||
// run the test on each rank
|
||
// only 1 call per rank should be enough, let the
|
||
// bursts, loops, etc, control the load...
|
||
|
||
// errors for this byte_offset, all ranks
|
||
off_errors = 0;
|
||
|
||
active_ranks = 0;
|
||
|
||
for (rankx = 0; rankx < 4; rankx++) {
|
||
if (!(rank_mask & (1 << rankx)))
|
||
continue;
|
||
|
||
phys_addr = hw_rank_offset * active_ranks;
|
||
// FIXME: now done by test_dram_byte_hw()
|
||
//phys_addr |= (lmc << 7);
|
||
//phys_addr |= (u64)node << CVMX_NODE_MEM_SHIFT;
|
||
|
||
active_ranks++;
|
||
|
||
// NOTE: return is a now a bitmask of the
|
||
// erroring bytelanes.
|
||
errors[rankx] =
|
||
test_dram_byte_hw(priv, lmc, phys_addr,
|
||
mode, NULL);
|
||
|
||
// process any errors in the bytelane(s) that
|
||
// are being tested
|
||
for (byte = byte_lo; byte <= byte_hi; byte++) {
|
||
// check errors
|
||
// yes, an error in the byte lane in
|
||
// this rank
|
||
if (errors[rankx] & (1 << byte)) {
|
||
off_errors |= (1 << byte);
|
||
|
||
debug("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test %3d: Address 0x%012llx errors\n",
|
||
node, lmc, rankx, byte,
|
||
mode_str, byte_offset,
|
||
phys_addr);
|
||
|
||
// had started run
|
||
if (rank_delay_count
|
||
[rankx][byte] > 0) {
|
||
debug("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test %3d: stopping a run here\n",
|
||
node, lmc, rankx,
|
||
byte, mode_str,
|
||
byte_offset);
|
||
// stop now
|
||
rank_delay_count
|
||
[rankx][byte] =
|
||
0;
|
||
}
|
||
// FIXME: else had not started
|
||
// run - nothing else to do?
|
||
} else {
|
||
// no error in the byte lane
|
||
// first success, set run start
|
||
if (rank_delay_count[rankx]
|
||
[byte] == 0) {
|
||
debug("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test %3d: starting a run here\n",
|
||
node, lmc, rankx,
|
||
byte, mode_str,
|
||
byte_offset);
|
||
rank_delay_start[rankx]
|
||
[byte] =
|
||
byte_offset;
|
||
}
|
||
// bump run length
|
||
rank_delay_count[rankx][byte]
|
||
+= BYTE_OFFSET_INCR;
|
||
|
||
// is this now the biggest
|
||
// window?
|
||
if (rank_delay_count[rankx]
|
||
[byte] >
|
||
rank_delay_best_count[rankx]
|
||
[byte]) {
|
||
rank_delay_best_count
|
||
[rankx][byte] =
|
||
rank_delay_count
|
||
[rankx][byte];
|
||
rank_delay_best_start
|
||
[rankx][byte] =
|
||
rank_delay_start
|
||
[rankx][byte];
|
||
debug("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test %3d: updating best to %d/%d\n",
|
||
node, lmc, rankx,
|
||
byte, mode_str,
|
||
byte_offset,
|
||
rank_delay_best_start
|
||
[rankx][byte],
|
||
rank_delay_best_count
|
||
[rankx][byte]);
|
||
}
|
||
}
|
||
}
|
||
} /* for (rankx = 0; rankx < 4; rankx++) */
|
||
|
||
tot_errors |= off_errors;
|
||
}
|
||
|
||
// set the bytelanes DLL offsets all back to 0
|
||
change_dll_offset_enable(priv, lmc, 0);
|
||
load_dll_offset(priv, lmc, dll_offset_mode, 0, bytelane);
|
||
change_dll_offset_enable(priv, lmc, 1);
|
||
|
||
// now choose the best byte_offsets for this pattern
|
||
// according to the best windows of the tested ranks
|
||
// calculate offset by constructing an average window
|
||
// from the rank windows
|
||
for (byte = byte_lo; byte <= byte_hi; byte++) {
|
||
pat_beg = -999;
|
||
pat_end = 999;
|
||
|
||
for (rankx = 0; rankx < 4; rankx++) {
|
||
if (!(rank_mask & (1 << rankx)))
|
||
continue;
|
||
|
||
rank_beg = rank_delay_best_start[rankx][byte];
|
||
pat_beg = max(pat_beg, rank_beg);
|
||
rank_end = rank_beg +
|
||
rank_delay_best_count[rankx][byte] -
|
||
BYTE_OFFSET_INCR;
|
||
pat_end = min(pat_end, rank_end);
|
||
|
||
debug("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test: Rank Window %3d:%3d\n",
|
||
node, lmc, rankx, byte, mode_str,
|
||
rank_beg, rank_end);
|
||
|
||
} /* for (rankx = 0; rankx < 4; rankx++) */
|
||
|
||
pat_best_offset[byte] = (pat_end + pat_beg) / 2;
|
||
|
||
// sum the pattern averages
|
||
new_best_offset[byte] += pat_best_offset[byte];
|
||
}
|
||
|
||
// now print them on 1 line, descending order...
|
||
debug("N%d.LMC%d: HW DLL %s Offset Pattern %d :",
|
||
node, lmc, mode_str, pattern);
|
||
for (byte = byte_hi; byte >= byte_lo; --byte)
|
||
debug(" %4d", pat_best_offset[byte]);
|
||
debug("\n");
|
||
}
|
||
// end of pattern loop
|
||
|
||
debug("N%d.LMC%d: HW DLL %s Offset Average : ", node, lmc, mode_str);
|
||
|
||
// print in decending byte index order
|
||
for (byte = byte_hi; byte >= byte_lo; --byte) {
|
||
// create the new average NINT
|
||
new_best_offset[byte] = divide_nint(new_best_offset[byte],
|
||
NUM_BYTE_PATTERNS);
|
||
|
||
// print the best offsets from all patterns
|
||
|
||
// print just the offset of all the bytes
|
||
if (bytelane == 0x0A)
|
||
debug("%4d ", new_best_offset[byte]);
|
||
else // print the bytelanes also
|
||
debug("(byte %d) %4d ", byte, new_best_offset[byte]);
|
||
|
||
// done with testing, load up the best offsets we found...
|
||
// disable offsets while we load...
|
||
change_dll_offset_enable(priv, lmc, 0);
|
||
load_dll_offset(priv, lmc, dll_offset_mode,
|
||
new_best_offset[byte], byte);
|
||
// re-enable the offsets now that we are done loading
|
||
change_dll_offset_enable(priv, lmc, 1);
|
||
}
|
||
|
||
debug("\n");
|
||
}
|
||
|
||
/*
|
||
* Automatically adjust the DLL offset for the selected bytelane using
|
||
* hardware-assist
|
||
*/
|
||
static int perform_HW_dll_offset_tuning(struct ddr_priv *priv,
|
||
int dll_offset_mode, int bytelane)
|
||
{
|
||
int if_64b;
|
||
int save_ecc_ena[4];
|
||
union cvmx_lmcx_config lmc_config;
|
||
int lmc, num_lmcs = cvmx_dram_get_num_lmc(priv);
|
||
const char *s;
|
||
int loops = 1, loop;
|
||
int by;
|
||
u64 dram_tune_rank_offset;
|
||
int dram_tune_byte_bursts = DEFAULT_BYTE_BURSTS;
|
||
int node = 0;
|
||
|
||
// see if we want to do the tuning more than once per LMC...
|
||
s = env_get("ddr_tune_ecc_loops");
|
||
if (s)
|
||
loops = strtoul(s, NULL, 0);
|
||
|
||
// allow override of the test repeats (bursts)
|
||
s = env_get("ddr_tune_byte_bursts");
|
||
if (s)
|
||
dram_tune_byte_bursts = strtoul(s, NULL, 10);
|
||
|
||
// print current working values
|
||
debug("N%d: H/W Tuning for bytelane %d will use %d loops, %d bursts, and %d patterns.\n",
|
||
node, bytelane, loops, dram_tune_byte_bursts, NUM_BYTE_PATTERNS);
|
||
|
||
// FIXME? get flag from LMC0 only
|
||
lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(0));
|
||
if_64b = !lmc_config.s.mode32b;
|
||
|
||
// this should be correct for 1 or 2 ranks, 1 or 2 DIMMs
|
||
dram_tune_rank_offset =
|
||
1ull << (28 + lmc_config.s.pbank_lsb - lmc_config.s.rank_ena +
|
||
(num_lmcs / 2));
|
||
|
||
// do once for each active LMC
|
||
|
||
for (lmc = 0; lmc < num_lmcs; lmc++) {
|
||
debug("N%d: H/W Tuning: starting LMC%d bytelane %d tune.\n",
|
||
node, lmc, bytelane);
|
||
|
||
/* Enable ECC for the HW tests */
|
||
// NOTE: we do enable ECC, but the HW tests used will not
|
||
// generate "visible" errors
|
||
lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(lmc));
|
||
save_ecc_ena[lmc] = lmc_config.s.ecc_ena;
|
||
lmc_config.s.ecc_ena = 1;
|
||
lmc_wr(priv, CVMX_LMCX_CONFIG(lmc), lmc_config.u64);
|
||
lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(lmc));
|
||
|
||
// testing is done on a single LMC at a time
|
||
// FIXME: for now, loop here to show what happens multiple times
|
||
for (loop = 0; loop < loops; loop++) {
|
||
/* Perform DLL offset tuning */
|
||
hw_assist_test_dll_offset(priv, 2 /* 2=read */, lmc,
|
||
bytelane,
|
||
if_64b, dram_tune_rank_offset,
|
||
dram_tune_byte_bursts);
|
||
}
|
||
|
||
// perform cleanup on active LMC
|
||
debug("N%d: H/W Tuning: finishing LMC%d bytelane %d tune.\n",
|
||
node, lmc, bytelane);
|
||
|
||
/* Restore ECC for DRAM tests */
|
||
lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(lmc));
|
||
lmc_config.s.ecc_ena = save_ecc_ena[lmc];
|
||
lmc_wr(priv, CVMX_LMCX_CONFIG(lmc), lmc_config.u64);
|
||
lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(lmc));
|
||
|
||
// finally, see if there are any read offset overrides
|
||
// after tuning
|
||
for (by = 0; by < 9; by++) {
|
||
s = lookup_env(priv, "ddr%d_tune_byte%d", lmc, by);
|
||
if (s) {
|
||
int dllro = strtoul(s, NULL, 10);
|
||
|
||
change_dll_offset_enable(priv, lmc, 0);
|
||
load_dll_offset(priv, lmc, 2, dllro, by);
|
||
change_dll_offset_enable(priv, lmc, 1);
|
||
}
|
||
}
|
||
|
||
} /* for (lmc = 0; lmc < num_lmcs; lmc++) */
|
||
|
||
// finish up...
|
||
|
||
return 0;
|
||
|
||
} /* perform_HW_dll_offset_tuning */
|
||
|
||
// this routine simply makes the calls to the tuning routine and returns
|
||
// any errors
|
||
static int cvmx_tune_node(struct ddr_priv *priv)
|
||
{
|
||
int errs, tot_errs;
|
||
int do_dllwo = 0; // default to NO
|
||
const char *str;
|
||
int node = 0;
|
||
|
||
// Automatically tune the data and ECC byte DLL read offsets
|
||
debug("N%d: Starting DLL Read Offset Tuning for LMCs\n", node);
|
||
errs = perform_HW_dll_offset_tuning(priv, 2, 0x0A /* all bytelanes */);
|
||
debug("N%d: Finished DLL Read Offset Tuning for LMCs, %d errors\n",
|
||
node, errs);
|
||
tot_errs = errs;
|
||
|
||
// disabled by default for now, does not seem to be needed?
|
||
// Automatically tune the data and ECC byte DLL write offsets
|
||
// allow override of default setting
|
||
str = env_get("ddr_tune_write_offsets");
|
||
if (str)
|
||
do_dllwo = !!strtoul(str, NULL, 0);
|
||
if (do_dllwo) {
|
||
debug("N%d: Starting DLL Write Offset Tuning for LMCs\n", node);
|
||
errs =
|
||
perform_HW_dll_offset_tuning(priv, 1,
|
||
0x0A /* all bytelanes */);
|
||
debug("N%d: Finished DLL Write Offset Tuning for LMCs, %d errors\n",
|
||
node, errs);
|
||
tot_errs += errs;
|
||
}
|
||
|
||
return tot_errs;
|
||
}
|
||
|
||
// this routine makes the calls to the tuning routines when criteria are met
|
||
// intended to be called for automated tuning, to apply filtering...
|
||
|
||
#define IS_DDR4 1
|
||
#define IS_DDR3 0
|
||
#define IS_RDIMM 1
|
||
#define IS_UDIMM 0
|
||
#define IS_1SLOT 1
|
||
#define IS_2SLOT 0
|
||
|
||
// FIXME: DDR3 is not tuned
|
||
static const u32 ddr_speed_filter[2][2][2] = {
|
||
[IS_DDR4] = {
|
||
[IS_RDIMM] = {
|
||
[IS_1SLOT] = 940,
|
||
[IS_2SLOT] = 800},
|
||
[IS_UDIMM] = {
|
||
[IS_1SLOT] = 1050,
|
||
[IS_2SLOT] = 940},
|
||
},
|
||
[IS_DDR3] = {
|
||
[IS_RDIMM] = {
|
||
[IS_1SLOT] = 0, // disabled
|
||
[IS_2SLOT] = 0 // disabled
|
||
},
|
||
[IS_UDIMM] = {
|
||
[IS_1SLOT] = 0, // disabled
|
||
[IS_2SLOT] = 0 // disabled
|
||
}
|
||
}
|
||
};
|
||
|
||
void cvmx_maybe_tune_node(struct ddr_priv *priv, u32 ddr_speed)
|
||
{
|
||
const char *s;
|
||
union cvmx_lmcx_config lmc_config;
|
||
union cvmx_lmcx_control lmc_control;
|
||
union cvmx_lmcx_ddr_pll_ctl lmc_ddr_pll_ctl;
|
||
int is_ddr4;
|
||
int is_rdimm;
|
||
int is_1slot;
|
||
int do_tune = 0;
|
||
u32 ddr_min_speed;
|
||
int node = 0;
|
||
|
||
// scale it down from Hz to MHz
|
||
ddr_speed = divide_nint(ddr_speed, 1000000);
|
||
|
||
// FIXME: allow an override here so that all configs can be tuned
|
||
// or none
|
||
// If the envvar is defined, always either force it or avoid it
|
||
// accordingly
|
||
s = env_get("ddr_tune_all_configs");
|
||
if (s) {
|
||
do_tune = !!strtoul(s, NULL, 0);
|
||
printf("N%d: DRAM auto-tuning %s.\n", node,
|
||
(do_tune) ? "forced" : "disabled");
|
||
if (do_tune)
|
||
cvmx_tune_node(priv);
|
||
|
||
return;
|
||
}
|
||
|
||
// filter the tuning calls here...
|
||
// determine if we should/can run automatically for this configuration
|
||
//
|
||
// FIXME: tune only when the configuration indicates it will help:
|
||
// DDR type, RDIMM or UDIMM, 1-slot or 2-slot, and speed
|
||
//
|
||
lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(0)); // sample LMC0
|
||
lmc_control.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(0)); // sample LMC0
|
||
// sample LMC0
|
||
lmc_ddr_pll_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(0));
|
||
|
||
is_ddr4 = (lmc_ddr_pll_ctl.s.ddr4_mode != 0);
|
||
is_rdimm = (lmc_control.s.rdimm_ena != 0);
|
||
// HACK, should do better
|
||
is_1slot = (lmc_config.s.init_status < 4);
|
||
|
||
ddr_min_speed = ddr_speed_filter[is_ddr4][is_rdimm][is_1slot];
|
||
do_tune = ((ddr_min_speed != 0) && (ddr_speed > ddr_min_speed));
|
||
|
||
debug("N%d: DDR%d %cDIMM %d-slot at %d MHz %s eligible for auto-tuning.\n",
|
||
node, (is_ddr4) ? 4 : 3, (is_rdimm) ? 'R' : 'U',
|
||
(is_1slot) ? 1 : 2, ddr_speed, (do_tune) ? "is" : "is not");
|
||
|
||
// call the tuning routine, filtering is done...
|
||
if (do_tune)
|
||
cvmx_tune_node(priv);
|
||
}
|
||
|
||
/*
|
||
* first pattern example:
|
||
* GENERAL_PURPOSE0.DATA == 64'h00ff00ff00ff00ff;
|
||
* GENERAL_PURPOSE1.DATA == 64'h00ff00ff00ff00ff;
|
||
* GENERAL_PURPOSE0.DATA == 16'h0000;
|
||
*/
|
||
|
||
static const u64 dbi_pattern[3] = {
|
||
0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL, 0x0000ULL };
|
||
|
||
// Perform switchover to DBI
|
||
static void cvmx_dbi_switchover_interface(struct ddr_priv *priv, int lmc)
|
||
{
|
||
union cvmx_lmcx_modereg_params0 modereg_params0;
|
||
union cvmx_lmcx_modereg_params3 modereg_params3;
|
||
union cvmx_lmcx_phy_ctl phy_ctl;
|
||
union cvmx_lmcx_config lmcx_config;
|
||
union cvmx_lmcx_ddr_pll_ctl ddr_pll_ctl;
|
||
int rank_mask, rankx, active_ranks;
|
||
u64 phys_addr, rank_offset;
|
||
int num_lmcs, errors;
|
||
int dbi_settings[9], byte, unlocked, retries;
|
||
int ecc_ena;
|
||
int rank_max = 1; // FIXME: make this 4 to try all the ranks
|
||
int node = 0;
|
||
|
||
ddr_pll_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(0));
|
||
|
||
lmcx_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(lmc));
|
||
rank_mask = lmcx_config.s.init_status;
|
||
ecc_ena = lmcx_config.s.ecc_ena;
|
||
|
||
// FIXME: must filter out any non-supported configs
|
||
// ie, no DDR3, no x4 devices
|
||
if (ddr_pll_ctl.s.ddr4_mode == 0 || lmcx_config.s.mode_x4dev == 1) {
|
||
debug("N%d.LMC%d: DBI switchover: inappropriate device; EXITING...\n",
|
||
node, lmc);
|
||
return;
|
||
}
|
||
|
||
// this should be correct for 1 or 2 ranks, 1 or 2 DIMMs
|
||
num_lmcs = cvmx_dram_get_num_lmc(priv);
|
||
rank_offset = 1ull << (28 + lmcx_config.s.pbank_lsb -
|
||
lmcx_config.s.rank_ena + (num_lmcs / 2));
|
||
|
||
debug("N%d.LMC%d: DBI switchover: rank mask 0x%x, rank size 0x%016llx.\n",
|
||
node, lmc, rank_mask, (unsigned long long)rank_offset);
|
||
|
||
/*
|
||
* 1. conduct the current init sequence as usual all the way
|
||
* after software write leveling.
|
||
*/
|
||
|
||
read_dac_dbi_settings(priv, lmc, /*DBI*/ 0, dbi_settings);
|
||
|
||
display_dac_dbi_settings(lmc, /*DBI*/ 0, ecc_ena, dbi_settings,
|
||
" INIT");
|
||
|
||
/*
|
||
* 2. set DBI related CSRs as below and issue MR write.
|
||
* MODEREG_PARAMS3.WR_DBI=1
|
||
* MODEREG_PARAMS3.RD_DBI=1
|
||
* PHY_CTL.DBI_MODE_ENA=1
|
||
*/
|
||
modereg_params0.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(lmc));
|
||
|
||
modereg_params3.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS3(lmc));
|
||
modereg_params3.s.wr_dbi = 1;
|
||
modereg_params3.s.rd_dbi = 1;
|
||
lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS3(lmc), modereg_params3.u64);
|
||
|
||
phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(lmc));
|
||
phy_ctl.s.dbi_mode_ena = 1;
|
||
lmc_wr(priv, CVMX_LMCX_PHY_CTL(lmc), phy_ctl.u64);
|
||
|
||
/*
|
||
* there are two options for data to send. Lets start with (1)
|
||
* and could move to (2) in the future:
|
||
*
|
||
* 1) DBTRAIN_CTL[LFSR_PATTERN_SEL] = 0 (or for older chips where
|
||
* this does not exist) set data directly in these reigsters.
|
||
* this will yield a clk/2 pattern:
|
||
* GENERAL_PURPOSE0.DATA == 64'h00ff00ff00ff00ff;
|
||
* GENERAL_PURPOSE1.DATA == 64'h00ff00ff00ff00ff;
|
||
* GENERAL_PURPOSE0.DATA == 16'h0000;
|
||
* 2) DBTRAIN_CTL[LFSR_PATTERN_SEL] = 1
|
||
* here data comes from the LFSR generating a PRBS pattern
|
||
* CHAR_CTL.EN = 0
|
||
* CHAR_CTL.SEL = 0; // for PRBS
|
||
* CHAR_CTL.DR = 1;
|
||
* CHAR_CTL.PRBS = setup for whatever type of PRBS to send
|
||
* CHAR_CTL.SKEW_ON = 1;
|
||
*/
|
||
lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE0(lmc), dbi_pattern[0]);
|
||
lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE1(lmc), dbi_pattern[1]);
|
||
lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE2(lmc), dbi_pattern[2]);
|
||
|
||
/*
|
||
* 3. adjust cas_latency (only necessary if RD_DBI is set).
|
||
* here is my code for doing this:
|
||
*
|
||
* if (csr_model.MODEREG_PARAMS3.RD_DBI.value == 1) begin
|
||
* case (csr_model.MODEREG_PARAMS0.CL.value)
|
||
* 0,1,2,3,4: csr_model.MODEREG_PARAMS0.CL.value += 2;
|
||
* // CL 9-13 -> 11-15
|
||
* 5: begin
|
||
* // CL=14, CWL=10,12 gets +2, CLW=11,14 gets +3
|
||
* if((csr_model.MODEREG_PARAMS0.CWL.value==1 ||
|
||
* csr_model.MODEREG_PARAMS0.CWL.value==3))
|
||
* csr_model.MODEREG_PARAMS0.CL.value = 7; // 14->16
|
||
* else
|
||
* csr_model.MODEREG_PARAMS0.CL.value = 13; // 14->17
|
||
* end
|
||
* 6: csr_model.MODEREG_PARAMS0.CL.value = 8; // 15->18
|
||
* 7: csr_model.MODEREG_PARAMS0.CL.value = 14; // 16->19
|
||
* 8: csr_model.MODEREG_PARAMS0.CL.value = 15; // 18->21
|
||
* default:
|
||
* `cn_fatal(("Error mem_cfg (%s) CL (%d) with RD_DBI=1,
|
||
* I am not sure what to do.",
|
||
* mem_cfg, csr_model.MODEREG_PARAMS3.RD_DBI.value))
|
||
* endcase
|
||
* end
|
||
*/
|
||
|
||
if (modereg_params3.s.rd_dbi == 1) {
|
||
int old_cl, new_cl, old_cwl;
|
||
|
||
old_cl = modereg_params0.s.cl;
|
||
old_cwl = modereg_params0.s.cwl;
|
||
|
||
switch (old_cl) {
|
||
case 0:
|
||
case 1:
|
||
case 2:
|
||
case 3:
|
||
case 4:
|
||
new_cl = old_cl + 2;
|
||
break; // 9-13->11-15
|
||
// CL=14, CWL=10,12 gets +2, CLW=11,14 gets +3
|
||
case 5:
|
||
new_cl = ((old_cwl == 1) || (old_cwl == 3)) ? 7 : 13;
|
||
break;
|
||
case 6:
|
||
new_cl = 8;
|
||
break; // 15->18
|
||
case 7:
|
||
new_cl = 14;
|
||
break; // 16->19
|
||
case 8:
|
||
new_cl = 15;
|
||
break; // 18->21
|
||
default:
|
||
printf("ERROR: Bad CL value (%d) for DBI switchover.\n",
|
||
old_cl);
|
||
// FIXME: need to error exit here...
|
||
old_cl = -1;
|
||
new_cl = -1;
|
||
break;
|
||
}
|
||
debug("N%d.LMC%d: DBI switchover: CL ADJ: old_cl 0x%x, old_cwl 0x%x, new_cl 0x%x.\n",
|
||
node, lmc, old_cl, old_cwl, new_cl);
|
||
modereg_params0.s.cl = new_cl;
|
||
lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(lmc),
|
||
modereg_params0.u64);
|
||
}
|
||
|
||
/*
|
||
* 4. issue MRW to MR0 (CL) and MR5 (DBI), using LMC sequence
|
||
* SEQ_CTL[SEQ_SEL] = MRW.
|
||
*/
|
||
// Use the default values, from the CSRs fields
|
||
// also, do B-sides for RDIMMs...
|
||
|
||
for (rankx = 0; rankx < 4; rankx++) {
|
||
if (!(rank_mask & (1 << rankx)))
|
||
continue;
|
||
|
||
// for RDIMMs, B-side writes should get done automatically
|
||
// when the A-side is written
|
||
ddr4_mrw(priv, lmc, rankx, -1 /* use_default */,
|
||
0 /*MRreg */, 0 /*A-side */); /* MR0 */
|
||
ddr4_mrw(priv, lmc, rankx, -1 /* use_default */,
|
||
5 /*MRreg */, 0 /*A-side */); /* MR5 */
|
||
}
|
||
|
||
/*
|
||
* 5. conduct DBI bit deskew training via the General Purpose
|
||
* R/W sequence (dbtrain). may need to run this over and over to get
|
||
* a lock (I need up to 5 in simulation):
|
||
* SEQ_CTL[SEQ_SEL] = RW_TRAINING (15)
|
||
* DBTRAIN_CTL.CMD_COUNT_EXT = all 1's
|
||
* DBTRAIN_CTL.READ_CMD_COUNT = all 1's
|
||
* DBTRAIN_CTL.TCCD_SEL = set according to MODEREG_PARAMS3[TCCD_L]
|
||
* DBTRAIN_CTL.RW_TRAIN = 1
|
||
* DBTRAIN_CTL.READ_DQ_COUNT = dont care
|
||
* DBTRAIN_CTL.WRITE_ENA = 1;
|
||
* DBTRAIN_CTL.ACTIVATE = 1;
|
||
* DBTRAIN_CTL LRANK, PRANK, ROW_A, BG, BA, COLUMN_A = set to a
|
||
* valid address
|
||
*/
|
||
|
||
// NOW - do the training
|
||
debug("N%d.LMC%d: DBI switchover: TRAINING begins...\n", node, lmc);
|
||
|
||
active_ranks = 0;
|
||
for (rankx = 0; rankx < rank_max; rankx++) {
|
||
if (!(rank_mask & (1 << rankx)))
|
||
continue;
|
||
|
||
phys_addr = rank_offset * active_ranks;
|
||
// FIXME: now done by test_dram_byte_hw()
|
||
|
||
active_ranks++;
|
||
|
||
retries = 0;
|
||
|
||
restart_training:
|
||
|
||
// NOTE: return is a bitmask of the erroring bytelanes -
|
||
// we only print it
|
||
errors =
|
||
test_dram_byte_hw(priv, lmc, phys_addr, DBTRAIN_DBI, NULL);
|
||
|
||
debug("N%d.LMC%d: DBI switchover: TEST: rank %d, phys_addr 0x%llx, errors 0x%x.\n",
|
||
node, lmc, rankx, (unsigned long long)phys_addr, errors);
|
||
|
||
// NEXT - check for locking
|
||
unlocked = 0;
|
||
read_dac_dbi_settings(priv, lmc, /*DBI*/ 0, dbi_settings);
|
||
|
||
for (byte = 0; byte < (8 + ecc_ena); byte++)
|
||
unlocked += (dbi_settings[byte] & 1) ^ 1;
|
||
|
||
// FIXME: print out the DBI settings array after each rank?
|
||
if (rank_max > 1) // only when doing more than 1 rank
|
||
display_dac_dbi_settings(lmc, /*DBI*/ 0, ecc_ena,
|
||
dbi_settings, " RANK");
|
||
|
||
if (unlocked > 0) {
|
||
debug("N%d.LMC%d: DBI switchover: LOCK: %d still unlocked.\n",
|
||
node, lmc, unlocked);
|
||
retries++;
|
||
if (retries < 10) {
|
||
goto restart_training;
|
||
} else {
|
||
debug("N%d.LMC%d: DBI switchover: LOCK: %d retries exhausted.\n",
|
||
node, lmc, retries);
|
||
}
|
||
}
|
||
} /* for (rankx = 0; rankx < 4; rankx++) */
|
||
|
||
// print out the final DBI settings array
|
||
display_dac_dbi_settings(lmc, /*DBI*/ 0, ecc_ena, dbi_settings,
|
||
"FINAL");
|
||
}
|
||
|
||
void cvmx_dbi_switchover(struct ddr_priv *priv)
|
||
{
|
||
int lmc;
|
||
int num_lmcs = cvmx_dram_get_num_lmc(priv);
|
||
|
||
for (lmc = 0; lmc < num_lmcs; lmc++)
|
||
cvmx_dbi_switchover_interface(priv, lmc);
|
||
}
|