diff --git a/arch/mips/dts/mrvl,cn73xx.dtsi b/arch/mips/dts/mrvl,cn73xx.dtsi index f5ad4a6213..40eb85ee0c 100644 --- a/arch/mips/dts/mrvl,cn73xx.dtsi +++ b/arch/mips/dts/mrvl,cn73xx.dtsi @@ -72,6 +72,23 @@ <0x0300e 4>, <0x0300f 4>; }; + l2c: l2c@1180080000000 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "cavium,octeon-7xxx-l2c"; + reg = <0x11800 0x80000000 0x0 0x01000000>; + u-boot,dm-pre-reloc; + }; + + lmc: lmc@1180088000000 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "cavium,octeon-7xxx-ddr4"; + reg = <0x11800 0x88000000 0x0 0x02000000>; // 2 IFs + u-boot,dm-pre-reloc; + l2c-handle = <&l2c>; + }; + reset: reset@1180006001600 { compatible = "mrvl,cn7xxx-rst"; reg = <0x11800 0x06001600 0x0 0x200>; @@ -126,5 +143,65 @@ spi-max-frequency = <25000000>; clocks = <&clk OCTEON_CLK_IO>; }; + + /* USB 0 */ + usb0: uctl@1180068000000 { + compatible = "cavium,octeon-7130-usb-uctl"; + reg = <0x11800 0x68000000 0x0 0x100>; + ranges; /* Direct mapping */ + #address-cells = <2>; + #size-cells = <2>; + /* Only 100MHz allowed */ + refclk-frequency = <100000000>; + /* Only "dlmc_ref_clk0" is supported for 73xx */ + refclk-type-ss = "dlmc_ref_clk0"; + /* Only "dlmc_ref_clk0" is supported for 73xx */ + refclk-type-hs = "dlmc_ref_clk0"; + + /* + * Power is specified by three parts: + * 1) GPIO handle (must be &gpio) + * 2) GPIO pin number + * 3) Active high (0) or active low (1) + */ + xhci@1680000000000 { + compatible = "cavium,octeon-7130-xhci","synopsys,dwc3","snps,dwc3"; + reg = <0x16800 0x00000000 0x10 0x0>; + interrupts = <0x68080 4>; /* UAHC_IMAN, level */ + maximum-speed = "super-speed"; + dr_mode = "host"; + snps,dis_u3_susphy_quirk; + snps,dis_u2_susphy_quirk; + snps,dis_enblslpm_quirk; + }; + }; + + /* USB 1 */ + usb1: uctl@1180069000000 { + compatible = "cavium,octeon-7130-usb-uctl"; + reg = <0x11800 0x69000000 0x0 0x100>; + ranges; /* Direct mapping */ + #address-cells = <2>; + #size-cells = <2>; + /* 50MHz, 100MHz and 125MHz allowed */ + refclk-frequency = <100000000>; + /* Either "dlmc_ref_clk0" or "dlmc_ref_clk0" */ + refclk-type-ss = "dlmc_ref_clk0"; + /* Either "dlmc_ref_clk0" "dlmc_ref_clk1" or "pll_ref_clk" */ + refclk-type-hs = "dlmc_ref_clk0"; + + /* + * Power is specified by three parts: + * 1) GPIO handle (must be &gpio) + * 2) GPIO pin number + * 3) Active high (0) or active low (1) + */ + xhci@1690000000000 { + compatible = "cavium,octeon-7130-xhci","synopsys,dwc3","snps,dwc3"; + reg = <0x16900 0x00000000 0x10 0x0>; + interrupts = <0x69080 4>; /* UAHC_IMAN, level */ + dr_mode = "host"; + }; + }; }; }; diff --git a/arch/mips/dts/mrvl,octeon-ebb7304.dts b/arch/mips/dts/mrvl,octeon-ebb7304.dts index 6b2e5e84bc..993b4f6890 100644 --- a/arch/mips/dts/mrvl,octeon-ebb7304.dts +++ b/arch/mips/dts/mrvl,octeon-ebb7304.dts @@ -113,3 +113,27 @@ reg = <0>; }; }; + +/* USB 0 */ +&usb0 { + status = "okay"; + /* + * Power is specified by three parts: + * 1) GPIO handle (must be &gpio) + * 2) GPIO pin number + * 3) Active high (0) or active low (1) + */ + power = <&gpio 20 0>; +}; + +/* USB 1 */ +&usb1 { + status = "okay"; + /* + * Power is specified by three parts: + * 1) GPIO handle (must be &gpio) + * 2) GPIO pin number + * 3) Active high (0) or active low (1) + */ + power = <&gpio 21 0>; +}; diff --git a/arch/mips/mach-octeon/Makefile b/arch/mips/mach-octeon/Makefile index 2e37ca572c..3486aa9d8b 100644 --- a/arch/mips/mach-octeon/Makefile +++ b/arch/mips/mach-octeon/Makefile @@ -8,3 +8,6 @@ obj-y += cache.o obj-y += clock.o obj-y += cpu.o obj-y += dram.o +obj-y += cvmx-coremask.o +obj-y += cvmx-bootmem.o +obj-y += bootoctlinux.o diff --git a/arch/mips/mach-octeon/bootoctlinux.c b/arch/mips/mach-octeon/bootoctlinux.c new file mode 100644 index 0000000000..75d7e83bd7 --- /dev/null +++ b/arch/mips/mach-octeon/bootoctlinux.c @@ -0,0 +1,661 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (C) 2020 Stefan Roese + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +DECLARE_GLOBAL_DATA_PTR; + +/* ToDo: Revisit these settings */ +#define OCTEON_RESERVED_LOW_MEM_SIZE (512 * 1024) +#define OCTEON_RESERVED_LOW_BOOT_MEM_SIZE (1024 * 1024) +#define BOOTLOADER_BOOTMEM_DESC_SPACE (1024 * 1024) + +/* Default stack and heap sizes, in bytes */ +#define DEFAULT_STACK_SIZE (1 * 1024 * 1024) +#define DEFAULT_HEAP_SIZE (3 * 1024 * 1024) + +/** + * NOTE: This must duplicate octeon_boot_descriptor_t in the toolchain + * octeon-app-init.h file. + */ +enum { + /* If set, core should do app-wide init, only one core per app will have + * this flag set. + */ + BOOT_FLAG_INIT_CORE = 1, + OCTEON_BL_FLAG_DEBUG = 1 << 1, + OCTEON_BL_FLAG_NO_MAGIC = 1 << 2, + /* If set, use uart1 for console */ + OCTEON_BL_FLAG_CONSOLE_UART1 = 1 << 3, + OCTEON_BL_FLAG_CONSOLE_PCI = 1 << 4, /* If set, use PCI console */ + /* Call exit on break on serial port */ + OCTEON_BL_FLAG_BREAK = 1 << 5, + /* + * Be sure to update OCTEON_APP_INIT_H_VERSION when new fields are added + * and to conditionalize the new flag's usage based on the version. + */ +} octeon_boot_descriptor_flag; + +/** + * NOTE: This must duplicate octeon_boot_descriptor_t in the toolchain + * octeon-app-init.h file. + */ +#ifndef OCTEON_CURRENT_DESC_VERSION +# define OCTEON_CURRENT_DESC_VERSION 7 +#endif +/** + * NOTE: This must duplicate octeon_boot_descriptor_t in the toolchain + * octeon-app-init.h file. + */ +/* Version 7 changes: Change names of deprecated fields */ +#ifndef OCTEON_ARGV_MAX_ARGS +# define OCTEON_ARGV_MAX_ARGS 64 +#endif + +/** + * NOTE: This must duplicate octeon_boot_descriptor_t in the toolchain + * octeon-app-init.h file. + */ +#ifndef OCTEON_SERIAL_LEN +# define OCTEON_SERIAL_LEN 20 +#endif + +/** + * Bootloader structure used to pass info to Octeon executive startup code. + * NOTE: all fields are deprecated except for: + * * desc_version + * * desc_size, + * * heap_base + * * heap_end + * * eclock_hz + * * flags + * * argc + * * argv + * * cvmx_desc_vaddr + * * debugger_flags_base_addr + * + * All other fields have been moved to the cvmx_descriptor, and the new + * fields should be added there. They are left as placeholders in this + * structure for binary compatibility. + * + * NOTE: This structure must match what is in the toolchain octeon-app-init.h + * file. + */ +struct octeon_boot_descriptor { + /* Start of block referenced by assembly code - do not change! */ + u32 desc_version; + u32 desc_size; + u64 stack_top; + u64 heap_base; + u64 heap_end; + u64 deprecated17; + u64 deprecated16; + /* End of block referenced by assembly code - do not change! */ + u32 deprecated18; + u32 deprecated15; + u32 deprecated14; + u32 argc; /* argc for main() */ + u32 argv[OCTEON_ARGV_MAX_ARGS]; /* argv for main() */ + u32 flags; /* Flags for application */ + u32 core_mask; /* Coremask running this image */ + u32 dram_size; /* DEPRECATED, DRAM size in megabyes. Used up to SDK 1.8.1 */ + u32 phy_mem_desc_addr; + u32 debugger_flags_base_addr; /* used to pass flags from app to debugger. */ + u32 eclock_hz; /* CPU clock speed, in hz. */ + u32 deprecated10; + u32 deprecated9; + u16 deprecated8; + u8 deprecated7; + u8 deprecated6; + u16 deprecated5; + u8 deprecated4; + u8 deprecated3; + char deprecated2[OCTEON_SERIAL_LEN]; + u8 deprecated1[6]; + u8 deprecated0; + u64 cvmx_desc_vaddr; /* Address of cvmx descriptor */ +}; + +static struct octeon_boot_descriptor boot_desc[CVMX_MIPS_MAX_CORES]; +static struct cvmx_bootinfo cvmx_bootinfo_array[CVMX_MIPS_MAX_CORES]; + +/** + * Programs the boot bus moveable region + * @param base base address to place the boot bus moveable region + * (bits [31:7]) + * @param region_num Selects which region, 0 or 1 for node 0, + * 2 or 3 for node 1 + * @param enable Set true to enable, false to disable + * @param data Pointer to data to put in the region, up to + * 16 dwords. + * @param num_words Number of data dwords (up to 32) + * + * @return 0 for success, -1 on error + */ +static int octeon_set_moveable_region(u32 base, int region_num, + bool enable, const u64 *data, + unsigned int num_words) +{ + int node = region_num >> 1; + u64 val; + int i; + u8 node_mask = 0x01; /* ToDo: Currently only one node is supported */ + + debug("%s(0x%x, %d, %d, %p, %u)\n", __func__, base, region_num, enable, + data, num_words); + + if (num_words > 32) { + printf("%s: Too many words (%d) for region %d\n", __func__, + num_words, region_num); + return -1; + } + + if (base & 0x7f) { + printf("%s: Error: base address 0x%x must be 128 byte aligned\n", + __func__, base); + return -1; + } + + if (region_num > (node_mask > 1 ? 3 : 1)) { + printf("%s: Region number %d out of range\n", + __func__, region_num); + return -1; + } + + if (!data && num_words > 0) { + printf("%s: Error: NULL data\n", __func__); + return -1; + } + + region_num &= 1; + + val = MIO_BOOT_LOC_CFG_EN | + FIELD_PREP(MIO_BOOT_LOC_CFG_BASE, base >> 7); + debug("%s: Setting MIO_BOOT_LOC_CFG(%d) on node %d to 0x%llx\n", + __func__, region_num, node, val); + csr_wr(CVMX_MIO_BOOT_LOC_CFGX(region_num & 1), val); + + val = FIELD_PREP(MIO_BOOT_LOC_ADR_ADR, (region_num ? 0x80 : 0x00) >> 3); + debug("%s: Setting MIO_BOOT_LOC_ADR start to 0x%llx\n", __func__, val); + csr_wr(CVMX_MIO_BOOT_LOC_ADR, val); + + for (i = 0; i < num_words; i++) { + debug(" 0x%02llx: 0x%016llx\n", + csr_rd(CVMX_MIO_BOOT_LOC_ADR), data[i]); + csr_wr(CVMX_MIO_BOOT_LOC_DAT, data[i]); + } + + return 0; +} + +/** + * Parse comma separated numbers into an array + * + * @param[out] values values read for each node + * @param[in] str string to parse + * @param base 0 for auto, otherwise 8, 10 or 16 for the number base + * + * @return number of values read. + */ +static int octeon_parse_nodes(u64 values[CVMX_MAX_NODES], + const char *str, int base) +{ + int node = 0; + char *sep; + + do { + debug("Parsing node %d: \"%s\"\n", node, str); + values[node] = simple_strtoull(str, &sep, base); + debug(" node %d: 0x%llx\n", node, values[node]); + str = sep + 1; + } while (++node < CVMX_MAX_NODES && *sep == ','); + + debug("%s: returning %d\n", __func__, node); + return node; +} + +/** + * Parse command line arguments + * + * @param argc number of arguments + * @param[in] argv array of argument strings + * @param cmd command type + * @param[out] boot_args parsed values + * + * @return number of arguments parsed + */ +int octeon_parse_bootopts(int argc, char *const argv[], + enum octeon_boot_cmd_type cmd, + struct octeon_boot_args *boot_args) +{ + u64 node_values[CVMX_MAX_NODES]; + int arg, j; + int num_values; + int node; + u8 node_mask = 0x01; /* ToDo: Currently only one node is supported */ + + debug("%s(%d, %p, %d, %p)\n", __func__, argc, argv, cmd, boot_args); + memset(boot_args, 0, sizeof(*boot_args)); + boot_args->stack_size = DEFAULT_STACK_SIZE; + boot_args->heap_size = DEFAULT_HEAP_SIZE; + boot_args->node_mask = 0; + + for (arg = 0; arg < argc; arg++) { + debug(" argv[%d]: %s\n", arg, argv[arg]); + if (cmd == BOOTOCT && !strncmp(argv[arg], "stack=", 6)) { + boot_args->stack_size = simple_strtoul(argv[arg] + 6, + NULL, 0); + } else if (cmd == BOOTOCT && !strncmp(argv[arg], "heap=", 5)) { + boot_args->heap_size = simple_strtoul(argv[arg] + 5, + NULL, 0); + } else if (!strncmp(argv[arg], "debug", 5)) { + puts("setting debug flag!\n"); + boot_args->boot_flags |= OCTEON_BL_FLAG_DEBUG; + } else if (cmd == BOOTOCT && !strncmp(argv[arg], "break", 5)) { + puts("setting break flag!\n"); + boot_args->boot_flags |= OCTEON_BL_FLAG_BREAK; + } else if (!strncmp(argv[arg], "forceboot", 9)) { + boot_args->forceboot = true; + } else if (!strncmp(argv[arg], "nodemask=", 9)) { + boot_args->node_mask = simple_strtoul(argv[arg] + 9, + NULL, 16); + } else if (!strncmp(argv[arg], "numcores=", 9)) { + memset(node_values, 0, sizeof(node_values)); + num_values = octeon_parse_nodes(node_values, + argv[arg] + 9, 0); + for (j = 0; j < num_values; j++) + boot_args->num_cores[j] = node_values[j]; + boot_args->num_cores_set = true; + } else if (!strncmp(argv[arg], "skipcores=", 10)) { + memset(node_values, 0, sizeof(node_values)); + num_values = octeon_parse_nodes(node_values, + argv[arg] + 10, 0); + for (j = 0; j < num_values; j++) + boot_args->num_skipped[j] = node_values[j]; + boot_args->num_skipped_set = true; + } else if (!strncmp(argv[arg], "console_uart=", 13)) { + boot_args->console_uart = simple_strtoul(argv[arg] + 13, + NULL, 0); + if (boot_args->console_uart == 1) { + boot_args->boot_flags |= + OCTEON_BL_FLAG_CONSOLE_UART1; + } else if (!boot_args->console_uart) { + boot_args->boot_flags &= + ~OCTEON_BL_FLAG_CONSOLE_UART1; + } + } else if (!strncmp(argv[arg], "coremask=", 9)) { + memset(node_values, 0, sizeof(node_values)); + num_values = octeon_parse_nodes(node_values, + argv[arg] + 9, 16); + for (j = 0; j < num_values; j++) + cvmx_coremask_set64_node(&boot_args->coremask, + j, node_values[j]); + boot_args->coremask_set = true; + } else if (cmd == BOOTOCTLINUX && + !strncmp(argv[arg], "namedblock=", 11)) { + boot_args->named_block = argv[arg] + 11; + } else if (!strncmp(argv[arg], "endbootargs", 11)) { + boot_args->endbootargs = 1; + arg++; + if (argc >= arg && cmd != BOOTOCTLINUX) + boot_args->app_name = argv[arg]; + break; + } else { + debug(" Unknown argument \"%s\"\n", argv[arg]); + } + } + + if (boot_args->coremask_set && boot_args->num_cores_set) { + puts("Warning: both coremask and numcores are set, using coremask.\n"); + } else if (!boot_args->coremask_set && !boot_args->num_cores_set) { + cvmx_coremask_set_core(&boot_args->coremask, 0); + boot_args->coremask_set = true; + } else if ((!boot_args->coremask_set) && boot_args->num_cores_set) { + cvmx_coremask_for_each_node(node, node_mask) + cvmx_coremask_set64_node(&boot_args->coremask, node, + ((1ull << boot_args->num_cores[node]) - 1) << + boot_args->num_skipped[node]); + boot_args->coremask_set = true; + } + + /* Update the node mask based on the coremask or the number of cores */ + for (j = 0; j < CVMX_MAX_NODES; j++) { + if (cvmx_coremask_get64_node(&boot_args->coremask, j)) + boot_args->node_mask |= 1 << j; + } + + debug("%s: return %d\n", __func__, arg); + return arg; +} + +int do_bootoctlinux(struct cmd_tbl *cmdtp, int flag, int argc, + char *const argv[]) +{ + typedef void __noreturn (*kernel_entry_t)(int, ulong, ulong, ulong); + kernel_entry_t kernel; + struct octeon_boot_args boot_args; + int arg_start = 1; + int arg_count; + u64 addr = 0; /* Address of the ELF image */ + int arg0; + u64 arg1; + u64 arg2; + u64 arg3; + int ret; + struct cvmx_coremask core_mask; + struct cvmx_coremask coremask_to_run; + struct cvmx_coremask avail_coremask; + int first_core; + int core; + struct ram_info ram; + struct udevice *dev; + const u64 *nmi_code; + int num_dwords; + u8 node_mask = 0x01; + int i; + + cvmx_coremask_clear_all(&core_mask); + cvmx_coremask_clear_all(&coremask_to_run); + + if (argc >= 2 && (isxdigit(argv[1][0]) && (isxdigit(argv[1][1]) || + argv[1][1] == 'x' || + argv[1][1] == 'X' || + argv[1][1] == '\0'))) { + addr = simple_strtoul(argv[1], NULL, 16); + if (!addr) + addr = CONFIG_SYS_LOAD_ADDR; + arg_start++; + } + if (addr == 0) + addr = CONFIG_SYS_LOAD_ADDR; + + debug("%s: arg start: %d\n", __func__, arg_start); + arg_count = octeon_parse_bootopts(argc - arg_start, argv + arg_start, + BOOTOCTLINUX, &boot_args); + + debug("%s:\n" + " named block: %s\n" + " node mask: 0x%x\n" + " stack size: 0x%x\n" + " heap size: 0x%x\n" + " boot flags: 0x%x\n" + " force boot: %s\n" + " coremask set: %s\n" + " num cores set: %s\n" + " num skipped set: %s\n" + " endbootargs: %s\n", + __func__, + boot_args.named_block ? boot_args.named_block : "none", + boot_args.node_mask, + boot_args.stack_size, + boot_args.heap_size, + boot_args.boot_flags, + boot_args.forceboot ? "true" : "false", + boot_args.coremask_set ? "true" : "false", + boot_args.num_cores_set ? "true" : "false", + boot_args.num_skipped_set ? "true" : "false", + boot_args.endbootargs ? "true" : "false"); + debug(" num cores: "); + for (i = 0; i < CVMX_MAX_NODES; i++) + debug("%s%d", i > 0 ? ", " : "", boot_args.num_cores[i]); + debug("\n num skipped: "); + for (i = 0; i < CVMX_MAX_NODES; i++) { + debug("%s%d", i > 0 ? ", " : "", boot_args.num_skipped[i]); + debug("\n coremask:\n"); + cvmx_coremask_dprint(&boot_args.coremask); + } + + if (boot_args.endbootargs) { + debug("endbootargs set, adjusting argc from %d to %d, arg_count: %d, arg_start: %d\n", + argc, argc - (arg_count + arg_start), arg_count, + arg_start); + argc -= (arg_count + arg_start); + argv += (arg_count + arg_start); + } + + /* + * numcores specification overrides a coremask on the same command line + */ + cvmx_coremask_copy(&core_mask, &boot_args.coremask); + + /* + * Remove cores from coremask based on environment variable stored in + * flash + */ + if (validate_coremask(&core_mask) != 0) { + puts("Invalid coremask.\n"); + return 1; + } else if (cvmx_coremask_is_empty(&core_mask)) { + puts("Coremask is empty after coremask_override mask. Nothing to do.\n"); + return 0; + } + + if (cvmx_coremask_intersects(&core_mask, &coremask_to_run)) { + puts("ERROR: Can't load code on core twice! Provided coremask:\n"); + cvmx_coremask_print(&core_mask); + puts("overlaps previously loaded coremask:\n"); + cvmx_coremask_print(&coremask_to_run); + return -1; + } + + debug("Setting up boot descriptor block with core mask:\n"); + cvmx_coremask_dprint(&core_mask); + + /* + * Add coremask to global mask of cores that have been set up and are + * runable + */ + cvmx_coremask_or(&coremask_to_run, &coremask_to_run, &core_mask); + + /* Get RAM size */ + ret = uclass_get_device(UCLASS_RAM, 0, &dev); + if (ret) { + debug("DRAM init failed: %d\n", ret); + return ret; + } + + ret = ram_get_info(dev, &ram); + if (ret) { + debug("Cannot get DRAM size: %d\n", ret); + return ret; + } + + /* + * Load kernel ELF image, or try binary if ELF is not detected. + * This way the much smaller vmlinux.bin can also be started but + * has to be loaded at the correct address (ep as parameter). + */ + if (!valid_elf_image(addr)) + printf("Booting binary image instead (vmlinux.bin)...\n"); + else + addr = load_elf_image_shdr(addr); + + /* Set kernel entry point */ + kernel = (kernel_entry_t)addr; + + /* Init bootmem list for Linux kernel booting */ + if (!cvmx_bootmem_phy_mem_list_init( + ram.size, OCTEON_RESERVED_LOW_MEM_SIZE, + (void *)CKSEG0ADDR(BOOTLOADER_BOOTMEM_DESC_SPACE))) { + printf("FATAL: Error initializing free memory list\n"); + return 0; + } + + first_core = cvmx_coremask_get_first_core(&coremask_to_run); + + cvmx_coremask_for_each_core(core, &coremask_to_run) { + debug("%s: Activating core %d\n", __func__, core); + + cvmx_bootinfo_array[core].core_mask = + cvmx_coremask_get32(&coremask_to_run); + cvmx_coremask_copy(&cvmx_bootinfo_array[core].ext_core_mask, + &coremask_to_run); + + if (core == first_core) + cvmx_bootinfo_array[core].flags |= BOOT_FLAG_INIT_CORE; + + cvmx_bootinfo_array[core].dram_size = ram.size / (1024 * 1024); + + cvmx_bootinfo_array[core].dclock_hz = gd->mem_clk * 1000000; + cvmx_bootinfo_array[core].eclock_hz = gd->cpu_clk; + + cvmx_bootinfo_array[core].led_display_base_addr = 0; + cvmx_bootinfo_array[core].phy_mem_desc_addr = + ((u32)(u64)__cvmx_bootmem_internal_get_desc_ptr()) & + 0x7ffffff; + + cvmx_bootinfo_array[core].major_version = CVMX_BOOTINFO_MAJ_VER; + cvmx_bootinfo_array[core].minor_version = CVMX_BOOTINFO_MIN_VER; + cvmx_bootinfo_array[core].fdt_addr = virt_to_phys(gd->fdt_blob); + + boot_desc[core].dram_size = gd->ram_size / (1024 * 1024); + boot_desc[core].cvmx_desc_vaddr = + virt_to_phys(&cvmx_bootinfo_array[core]); + + boot_desc[core].desc_version = OCTEON_CURRENT_DESC_VERSION; + boot_desc[core].desc_size = sizeof(boot_desc[0]); + + boot_desc[core].flags = cvmx_bootinfo_array[core].flags; + boot_desc[core].eclock_hz = cvmx_bootinfo_array[core].eclock_hz; + + boot_desc[core].argc = argc; + for (i = 0; i < argc; i++) + boot_desc[core].argv[i] = (u32)virt_to_phys(argv[i]); + } + + core = 0; + arg0 = argc; + arg1 = (u64)argv; + arg2 = 0x1; /* Core 0 sets init core for Linux */ + arg3 = XKPHYS | virt_to_phys(&boot_desc[core]); + + debug("## Transferring control to Linux (at address %p) ...\n", kernel); + + /* + * Flush cache before jumping to application. Let's flush the + * whole SDRAM area, since we don't know the size of the image + * that was loaded. + */ + flush_cache(gd->ram_base, gd->ram_top - gd->ram_base); + + /* Take all cores out of reset */ + csr_wr(CVMX_CIU_PP_RST, 0); + sync(); + + /* Wait a short while for the other cores... */ + mdelay(100); + + /* Install boot code into moveable bus for NMI (other cores) */ + nmi_code = (const u64 *)nmi_bootvector; + num_dwords = (((u64)&nmi_handler_para[0] - (u64)nmi_code) + 7) / 8; + + ret = octeon_set_moveable_region(0x1fc00000, 0, true, nmi_code, + num_dwords); + if (ret) { + printf("Error installing NMI handler for SMP core startup\n"); + return 0; + } + + /* Write NMI handler parameters for Linux kernel booting */ + nmi_handler_para[0] = (u64)kernel; + nmi_handler_para[1] = arg0; + nmi_handler_para[2] = arg1; + nmi_handler_para[3] = 0; /* Don't set init core for secondary cores */ + nmi_handler_para[4] = arg3; + sync(); + + /* Wait a short while for the other cores... */ + mdelay(100); + + /* + * Cores have already been taken out of reset to conserve power. + * We need to send a NMI to get the cores out of their wait loop + */ + octeon_get_available_coremask(&avail_coremask); + debug("Available coremask:\n"); + cvmx_coremask_dprint(&avail_coremask); + debug("Starting coremask:\n"); + cvmx_coremask_dprint(&coremask_to_run); + debug("Sending NMIs to other cores\n"); + if (octeon_has_feature(OCTEON_FEATURE_CIU3)) { + u64 avail_cm; + int node; + + cvmx_coremask_for_each_node(node, node_mask) { + avail_cm = cvmx_coremask_get64_node(&avail_coremask, + node); + + if (avail_cm != 0) { + debug("Sending NMI to node %d, coremask=0x%llx, CIU3_NMI=0x%llx\n", + node, avail_cm, + (node > 0 ? -1ull : -2ull) & avail_cm); + csr_wr(CVMX_CIU3_NMI, + (node > 0 ? -1ull : -2ull) & avail_cm); + } + } + } else { + csr_wr(CVMX_CIU_NMI, + -2ull & cvmx_coremask_get64(&avail_coremask)); + } + debug("Done sending NMIs\n"); + + /* Wait a short while for the other cores... */ + mdelay(100); + + /* + * pass address parameter as argv[0] (aka command name), + * and all remaining args + * a0 = argc + * a1 = argv (32 bit physical addresses, not pointers) + * a2 = init core + * a3 = boot descriptor address + * a4/t0 = entry point (only used by assembly stub) + */ + kernel(arg0, arg1, arg2, arg3); + + return 0; +} + +U_BOOT_CMD(bootoctlinux, 32, 0, do_bootoctlinux, + "Boot from a linux ELF image in memory", + "elf_address [coremask=mask_to_run | numcores=core_cnt_to_run] " + "[forceboot] [skipcores=core_cnt_to_skip] [namedblock=name] [endbootargs] [app_args ...]\n" + "elf_address - address of ELF image to load. If 0, default load address\n" + " is used.\n" + "coremask - mask of cores to run on. Anded with coremask_override\n" + " environment variable to ensure only working cores are used\n" + "numcores - number of cores to run on. Runs on specified number of cores,\n" + " taking into account the coremask_override.\n" + "skipcores - only meaningful with numcores. Skips this many cores\n" + " (starting from 0) when loading the numcores cores.\n" + " For example, setting skipcores to 1 will skip core 0\n" + " and load the application starting at the next available core.\n" + "forceboot - if set, boots application even if core 0 is not in mask\n" + "namedblock - specifies a named block to load the kernel\n" + "endbootargs - if set, bootloader does not process any further arguments and\n" + " only passes the arguments that follow to the kernel.\n" + " If not set, the kernel gets the entire commnad line as\n" + " arguments.\n" "\n"); diff --git a/arch/mips/mach-octeon/cache.c b/arch/mips/mach-octeon/cache.c index 9a88bb97c7..f293d65dae 100644 --- a/arch/mips/mach-octeon/cache.c +++ b/arch/mips/mach-octeon/cache.c @@ -5,14 +5,13 @@ #include -/* - * The Octeon platform is cache coherent and cache flushes and invalidates - * are not needed. Define some platform specific empty flush_foo() - * functions here to overwrite the _weak common function as a no-op. - * This effectively disables all cache operations. - */ +/* Octeon memory write barrier */ +#define CVMX_SYNCW asm volatile ("syncw\nsyncw\n" : : : "memory") + void flush_dcache_range(ulong start_addr, ulong stop) { + /* Flush all pending writes */ + CVMX_SYNCW; } void flush_cache(ulong start_addr, ulong size) @@ -21,4 +20,5 @@ void flush_cache(ulong start_addr, ulong size) void invalidate_dcache_range(ulong start_addr, ulong stop) { + /* Don't need to do anything for OCTEON */ } diff --git a/arch/mips/mach-octeon/cpu.c b/arch/mips/mach-octeon/cpu.c index 2680a2e6ed..6f87a4ef8c 100644 --- a/arch/mips/mach-octeon/cpu.c +++ b/arch/mips/mach-octeon/cpu.c @@ -13,6 +13,27 @@ DECLARE_GLOBAL_DATA_PTR; +/* + * TRUE for devices having registers with little-endian byte + * order, FALSE for registers with native-endian byte order. + * PCI mandates little-endian, USB and SATA are configurable, + * but we chose little-endian for these. + * + * This table will be referened in the Octeon platform specific + * mangle-port.h header. + */ +const bool octeon_should_swizzle_table[256] = { + [0x00] = true, /* bootbus/CF */ + [0x1b] = true, /* PCI mmio window */ + [0x1c] = true, /* PCI mmio window */ + [0x1d] = true, /* PCI mmio window */ + [0x1e] = true, /* PCI mmio window */ + [0x68] = true, /* OCTEON III USB */ + [0x69] = true, /* OCTEON III USB */ + [0x6c] = true, /* OCTEON III SATA */ + [0x6f] = true, /* OCTEON II USB */ +}; + static int get_clocks(void) { const u64 ref_clock = PLL_REF_CLK; diff --git a/arch/mips/mach-octeon/cvmx-bootmem.c b/arch/mips/mach-octeon/cvmx-bootmem.c new file mode 100644 index 0000000000..80bb7ac6c8 --- /dev/null +++ b/arch/mips/mach-octeon/cvmx-bootmem.c @@ -0,0 +1,1460 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2018-2020 Marvell International Ltd. + */ + +/* + * Simple allocate only memory allocator. Used to allocate memory at + * application start time. + */ + +#include + +#include +#include +#include + +#include +#include +#include +#include + +DECLARE_GLOBAL_DATA_PTR; + +#define CVMX_MIPS32_SPACE_KSEG0 1L +#define CVMX_MIPS_SPACE_XKPHYS 2LL + +#define CVMX_ADD_SEG(seg, add) ((((u64)(seg)) << 62) | (add)) +#define CVMX_ADD_SEG32(seg, add) (((u32)(seg) << 31) | (u32)(add)) + +/** + * This is the physical location of a struct cvmx_bootmem_desc + * structure in Octeon's memory. Note that dues to addressing + * limits or runtime environment it might not be possible to + * create a C pointer to this structure. + */ +static u64 cvmx_bootmem_desc_addr; + +/** + * This macro returns the size of a member of a structure. + * Logically it is the same as "sizeof(s::field)" in C++, but + * C lacks the "::" operator. + */ +#define SIZEOF_FIELD(s, field) sizeof(((s *)NULL)->field) + +/** + * This macro returns a member of the struct cvmx_bootmem_desc + * structure. These members can't be directly addressed as + * they might be in memory not directly reachable. In the case + * where bootmem is compiled with LINUX_HOST, the structure + * itself might be located on a remote Octeon. The argument + * "field" is the member name of the struct cvmx_bootmem_desc to read. + * Regardless of the type of the field, the return type is always + * a u64. + */ +#define CVMX_BOOTMEM_DESC_GET_FIELD(field) \ + __cvmx_bootmem_desc_get(cvmx_bootmem_desc_addr, \ + offsetof(struct cvmx_bootmem_desc, field), \ + SIZEOF_FIELD(struct cvmx_bootmem_desc, field)) + +/** + * This macro writes a member of the struct cvmx_bootmem_desc + * structure. These members can't be directly addressed as + * they might be in memory not directly reachable. In the case + * where bootmem is compiled with LINUX_HOST, the structure + * itself might be located on a remote Octeon. The argument + * "field" is the member name of the struct cvmx_bootmem_desc to write. + */ +#define CVMX_BOOTMEM_DESC_SET_FIELD(field, value) \ + __cvmx_bootmem_desc_set(cvmx_bootmem_desc_addr, \ + offsetof(struct cvmx_bootmem_desc, field), \ + SIZEOF_FIELD(struct cvmx_bootmem_desc, field), \ + value) + +/** + * This macro returns a member of the + * struct cvmx_bootmem_named_block_desc structure. These members can't + * be directly addressed as they might be in memory not directly + * reachable. In the case where bootmem is compiled with + * LINUX_HOST, the structure itself might be located on a remote + * Octeon. The argument "field" is the member name of the + * struct cvmx_bootmem_named_block_desc to read. Regardless of the type + * of the field, the return type is always a u64. The "addr" + * parameter is the physical address of the structure. + */ +#define CVMX_BOOTMEM_NAMED_GET_FIELD(addr, field) \ + __cvmx_bootmem_desc_get(addr, \ + offsetof(struct cvmx_bootmem_named_block_desc, field), \ + SIZEOF_FIELD(struct cvmx_bootmem_named_block_desc, field)) + +/** + * This macro writes a member of the struct cvmx_bootmem_named_block_desc + * structure. These members can't be directly addressed as + * they might be in memory not directly reachable. In the case + * where bootmem is compiled with LINUX_HOST, the structure + * itself might be located on a remote Octeon. The argument + * "field" is the member name of the + * struct cvmx_bootmem_named_block_desc to write. The "addr" parameter + * is the physical address of the structure. + */ +#define CVMX_BOOTMEM_NAMED_SET_FIELD(addr, field, value) \ + __cvmx_bootmem_desc_set(addr, \ + offsetof(struct cvmx_bootmem_named_block_desc, field), \ + SIZEOF_FIELD(struct cvmx_bootmem_named_block_desc, field), \ + value) + +/** + * This function is the implementation of the get macros defined + * for individual structure members. The argument are generated + * by the macros inorder to read only the needed memory. + * + * @param base 64bit physical address of the complete structure + * @param offset Offset from the beginning of the structure to the member being + * accessed. + * @param size Size of the structure member. + * + * @return Value of the structure member promoted into a u64. + */ +static inline u64 __cvmx_bootmem_desc_get(u64 base, int offset, + int size) +{ + base = (1ull << 63) | (base + offset); + switch (size) { + case 4: + return cvmx_read64_uint32(base); + case 8: + return cvmx_read64_uint64(base); + default: + return 0; + } +} + +/** + * This function is the implementation of the set macros defined + * for individual structure members. The argument are generated + * by the macros in order to write only the needed memory. + * + * @param base 64bit physical address of the complete structure + * @param offset Offset from the beginning of the structure to the member being + * accessed. + * @param size Size of the structure member. + * @param value Value to write into the structure + */ +static inline void __cvmx_bootmem_desc_set(u64 base, int offset, int size, + u64 value) +{ + base = (1ull << 63) | (base + offset); + switch (size) { + case 4: + cvmx_write64_uint32(base, value); + break; + case 8: + cvmx_write64_uint64(base, value); + break; + default: + break; + } +} + +/** + * This function returns the address of the bootmem descriptor lock. + * + * @return 64-bit address in KSEG0 of the bootmem descriptor block + */ +static inline u64 __cvmx_bootmem_get_lock_addr(void) +{ + return (1ull << 63) | + (cvmx_bootmem_desc_addr + offsetof(struct cvmx_bootmem_desc, lock)); +} + +/** + * This function retrieves the string name of a named block. It is + * more complicated than a simple memcpy() since the named block + * descriptor may not be directly accessible. + * + * @param addr Physical address of the named block descriptor + * @param str String to receive the named block string name + * @param len Length of the string buffer, which must match the length + * stored in the bootmem descriptor. + */ +static void CVMX_BOOTMEM_NAMED_GET_NAME(u64 addr, char *str, int len) +{ + int l = len; + char *ptr = str; + + addr |= (1ull << 63); + addr += offsetof(struct cvmx_bootmem_named_block_desc, name); + while (l) { + /* + * With big-endian in memory byte order, this gives uniform + * results for the CPU in either big or Little endian mode. + */ + u64 blob = cvmx_read64_uint64(addr); + int sa = 56; + + addr += sizeof(u64); + while (l && sa >= 0) { + *ptr++ = (char)(blob >> sa); + l--; + sa -= 8; + } + } + str[len] = 0; +} + +/** + * This function stores the string name of a named block. It is + * more complicated than a simple memcpy() since the named block + * descriptor may not be directly accessible. + * + * @param addr Physical address of the named block descriptor + * @param str String to store into the named block string name + * @param len Length of the string buffer, which must match the length + * stored in the bootmem descriptor. + */ +void CVMX_BOOTMEM_NAMED_SET_NAME(u64 addr, const char *str, int len) +{ + int l = len; + + addr |= (1ull << 63); + addr += offsetof(struct cvmx_bootmem_named_block_desc, name); + + while (l) { + /* + * With big-endian in memory byte order, this gives uniform + * results for the CPU in either big or Little endian mode. + */ + u64 blob = 0; + int sa = 56; + + while (l && sa >= 0) { + u64 c = (u8)(*str++); + + l--; + if (l == 0) + c = 0; + blob |= c << sa; + sa -= 8; + } + cvmx_write64_uint64(addr, blob); + addr += sizeof(u64); + } +} + +/* See header file for descriptions of functions */ + +/* + * Wrapper functions are provided for reading/writing the size and next block + * values as these may not be directly addressible (in 32 bit applications, for + * instance.) + * + * Offsets of data elements in bootmem list, must match + * struct cvmx_bootmem_block_header + */ +#define NEXT_OFFSET 0 +#define SIZE_OFFSET 8 + +static void cvmx_bootmem_phy_set_size(u64 addr, u64 size) +{ + cvmx_write64_uint64((addr + SIZE_OFFSET) | (1ull << 63), size); +} + +static void cvmx_bootmem_phy_set_next(u64 addr, u64 next) +{ + cvmx_write64_uint64((addr + NEXT_OFFSET) | (1ull << 63), next); +} + +static u64 cvmx_bootmem_phy_get_size(u64 addr) +{ + return cvmx_read64_uint64((addr + SIZE_OFFSET) | (1ull << 63)); +} + +static u64 cvmx_bootmem_phy_get_next(u64 addr) +{ + return cvmx_read64_uint64((addr + NEXT_OFFSET) | (1ull << 63)); +} + +/** + * Check the version information on the bootmem descriptor + * + * @param exact_match + * Exact major version to check against. A zero means + * check that the version supports named blocks. + * + * @return Zero if the version is correct. Negative if the version is + * incorrect. Failures also cause a message to be displayed. + */ +static int __cvmx_bootmem_check_version(int exact_match) +{ + int major_version; + + major_version = CVMX_BOOTMEM_DESC_GET_FIELD(major_version); + if (major_version > 3 || + (exact_match && major_version) != exact_match) { + debug("ERROR: Incompatible bootmem descriptor version: %d.%d at addr: 0x%llx\n", + major_version, + (int)CVMX_BOOTMEM_DESC_GET_FIELD(minor_version), + CAST_ULL(cvmx_bootmem_desc_addr)); + return -1; + } else { + return 0; + } +} + +/** + * Get the low level bootmem descriptor lock. If no locking + * is specified in the flags, then nothing is done. + * + * @param flags CVMX_BOOTMEM_FLAG_NO_LOCKING means this functions should do + * nothing. This is used to support nested bootmem calls. + */ +static inline void __cvmx_bootmem_lock(u32 flags) +{ + if (!(flags & CVMX_BOOTMEM_FLAG_NO_LOCKING)) { + /* + * Unfortunately we can't use the normal cvmx-spinlock code as + * the memory for the bootmem descriptor may be not accessible + * by a C pointer. We use a 64bit XKPHYS address to access the + * memory directly + */ + u64 lock_addr = (1ull << 63) | + (cvmx_bootmem_desc_addr + offsetof(struct cvmx_bootmem_desc, + lock)); + unsigned int tmp; + + __asm__ __volatile__(".set noreorder\n" + "1: ll %[tmp], 0(%[addr])\n" + " bnez %[tmp], 1b\n" + " li %[tmp], 1\n" + " sc %[tmp], 0(%[addr])\n" + " beqz %[tmp], 1b\n" + " nop\n" + ".set reorder\n" + : [tmp] "=&r"(tmp) + : [addr] "r"(lock_addr) + : "memory"); + } +} + +/** + * Release the low level bootmem descriptor lock. If no locking + * is specified in the flags, then nothing is done. + * + * @param flags CVMX_BOOTMEM_FLAG_NO_LOCKING means this functions should do + * nothing. This is used to support nested bootmem calls. + */ +static inline void __cvmx_bootmem_unlock(u32 flags) +{ + if (!(flags & CVMX_BOOTMEM_FLAG_NO_LOCKING)) { + /* + * Unfortunately we can't use the normal cvmx-spinlock code as + * the memory for the bootmem descriptor may be not accessible + * by a C pointer. We use a 64bit XKPHYS address to access the + * memory directly + */ + u64 lock_addr = __cvmx_bootmem_get_lock_addr(); + + CVMX_SYNCW; + __asm__ __volatile__("sw $0, 0(%[addr])\n" + : : [addr] "r"(lock_addr) + : "memory"); + CVMX_SYNCW; + } +} + +/* + * Some of the cvmx-bootmem functions dealing with C pointers are not + * supported when we are compiling for CVMX_BUILD_FOR_LINUX_HOST. This + * ifndef removes these functions when they aren't needed. + * + * This functions takes an address range and adjusts it as necessary + * to match the ABI that is currently being used. This is required to + * ensure that bootmem_alloc* functions only return valid pointers for + * 32 bit ABIs + */ +static int __cvmx_validate_mem_range(u64 *min_addr_ptr, + u64 *max_addr_ptr) +{ + u64 max_phys = (1ull << 29) - 0x10; /* KSEG0 */ + + *min_addr_ptr = min_t(u64, max_t(u64, *min_addr_ptr, 0x0), max_phys); + if (!*max_addr_ptr) { + *max_addr_ptr = max_phys; + } else { + *max_addr_ptr = max_t(u64, min_t(u64, *max_addr_ptr, + max_phys), 0x0); + } + + return 0; +} + +u64 cvmx_bootmem_phy_alloc_range(u64 size, u64 alignment, + u64 min_addr, u64 max_addr) +{ + s64 address; + + __cvmx_validate_mem_range(&min_addr, &max_addr); + address = cvmx_bootmem_phy_alloc(size, min_addr, max_addr, + alignment, 0); + if (address > 0) + return address; + else + return 0; +} + +void *cvmx_bootmem_alloc_range(u64 size, u64 alignment, + u64 min_addr, u64 max_addr) +{ + s64 address; + + __cvmx_validate_mem_range(&min_addr, &max_addr); + address = cvmx_bootmem_phy_alloc(size, min_addr, max_addr, + alignment, 0); + + if (address > 0) + return cvmx_phys_to_ptr(address); + else + return NULL; +} + +void *cvmx_bootmem_alloc_address(u64 size, u64 address, + u64 alignment) +{ + return cvmx_bootmem_alloc_range(size, alignment, address, + address + size); +} + +void *cvmx_bootmem_alloc_node(u64 node, u64 size, u64 alignment) +{ + return cvmx_bootmem_alloc_range(size, alignment, + node << CVMX_NODE_MEM_SHIFT, + ((node + 1) << CVMX_NODE_MEM_SHIFT) - 1); +} + +void *cvmx_bootmem_alloc(u64 size, u64 alignment) +{ + return cvmx_bootmem_alloc_range(size, alignment, 0, 0); +} + +void *cvmx_bootmem_alloc_named_range_once(u64 size, u64 min_addr, + u64 max_addr, u64 align, + const char *name, + void (*init)(void *)) +{ + u64 named_block_desc_addr; + void *ptr; + s64 addr; + + __cvmx_bootmem_lock(0); + + __cvmx_validate_mem_range(&min_addr, &max_addr); + named_block_desc_addr = + cvmx_bootmem_phy_named_block_find(name, + CVMX_BOOTMEM_FLAG_NO_LOCKING); + + if (named_block_desc_addr) { + addr = CVMX_BOOTMEM_NAMED_GET_FIELD(named_block_desc_addr, + base_addr); + __cvmx_bootmem_unlock(0); + return cvmx_phys_to_ptr(addr); + } + + addr = cvmx_bootmem_phy_named_block_alloc(size, min_addr, max_addr, + align, name, + CVMX_BOOTMEM_FLAG_NO_LOCKING); + + if (addr < 0) { + __cvmx_bootmem_unlock(0); + return NULL; + } + ptr = cvmx_phys_to_ptr(addr); + + if (init) + init(ptr); + else + memset(ptr, 0, size); + + __cvmx_bootmem_unlock(0); + return ptr; +} + +void *cvmx_bootmem_alloc_named_range_flags(u64 size, u64 min_addr, + u64 max_addr, u64 align, + const char *name, u32 flags) +{ + s64 addr; + + __cvmx_validate_mem_range(&min_addr, &max_addr); + addr = cvmx_bootmem_phy_named_block_alloc(size, min_addr, max_addr, + align, name, flags); + if (addr >= 0) + return cvmx_phys_to_ptr(addr); + else + return NULL; +} + +void *cvmx_bootmem_alloc_named_range(u64 size, u64 min_addr, + u64 max_addr, u64 align, + const char *name) +{ + return cvmx_bootmem_alloc_named_range_flags(size, min_addr, max_addr, + align, name, 0); +} + +void *cvmx_bootmem_alloc_named_address(u64 size, u64 address, + const char *name) +{ + return cvmx_bootmem_alloc_named_range(size, address, address + size, + 0, name); +} + +void *cvmx_bootmem_alloc_named(u64 size, u64 alignment, + const char *name) +{ + return cvmx_bootmem_alloc_named_range(size, 0, 0, alignment, name); +} + +void *cvmx_bootmem_alloc_named_flags(u64 size, u64 alignment, + const char *name, u32 flags) +{ + return cvmx_bootmem_alloc_named_range_flags(size, 0, 0, alignment, + name, flags); +} + +int cvmx_bootmem_free_named(const char *name) +{ + return cvmx_bootmem_phy_named_block_free(name, 0); +} + +/** + * Find a named block with flags + * + * @param name is the block name + * @param flags indicates the need to use locking during search + * @return pointer to named block descriptor + * + * Note: this function returns a pointer to a static structure, + * and is therefore not re-entrant. + * Making this function re-entrant will break backward compatibility. + */ +const struct cvmx_bootmem_named_block_desc * +__cvmx_bootmem_find_named_block_flags(const char *name, u32 flags) +{ + static struct cvmx_bootmem_named_block_desc desc; + u64 named_addr = cvmx_bootmem_phy_named_block_find(name, flags); + + if (named_addr) { + desc.base_addr = CVMX_BOOTMEM_NAMED_GET_FIELD(named_addr, + base_addr); + desc.size = CVMX_BOOTMEM_NAMED_GET_FIELD(named_addr, size); + strncpy(desc.name, name, sizeof(desc.name)); + desc.name[sizeof(desc.name) - 1] = 0; + return &desc; + } else { + return NULL; + } +} + +const struct cvmx_bootmem_named_block_desc * +cvmx_bootmem_find_named_block(const char *name) +{ + return __cvmx_bootmem_find_named_block_flags(name, 0); +} + +void cvmx_bootmem_print_named(void) +{ + cvmx_bootmem_phy_named_block_print(); +} + +int cvmx_bootmem_init(u64 mem_desc_addr) +{ + if (!cvmx_bootmem_desc_addr) + cvmx_bootmem_desc_addr = mem_desc_addr; + + return 0; +} + +u64 cvmx_bootmem_available_mem(u64 min_block_size) +{ + return cvmx_bootmem_phy_available_mem(min_block_size); +} + +/* + * The cvmx_bootmem_phy* functions below return 64 bit physical + * addresses, and expose more features that the cvmx_bootmem_functions + * above. These are required for full memory space access in 32 bit + * applications, as well as for using some advance features. Most + * applications should not need to use these. + */ + +s64 cvmx_bootmem_phy_alloc(u64 req_size, u64 address_min, + u64 address_max, u64 alignment, + u32 flags) +{ + u64 head_addr, ent_addr, ent_size; + u64 target_ent_addr = 0, target_prev_addr = 0; + u64 target_size = ~0ull; + u64 free_start, free_end; + u64 next_addr, prev_addr = 0; + u64 new_ent_addr = 0, new_ent_size; + u64 desired_min_addr, usable_max; + u64 align, align_mask; + + debug("%s: req_size: 0x%llx, min_addr: 0x%llx, max_addr: 0x%llx, align: 0x%llx\n", + __func__, CAST_ULL(req_size), CAST_ULL(address_min), + CAST_ULL(address_max), CAST_ULL(alignment)); + + if (__cvmx_bootmem_check_version(0)) + return -1; + + /* + * Do a variety of checks to validate the arguments. The + * allocator code will later assume that these checks have + * been made. We validate that the requested constraints are + * not self-contradictory before we look through the list of + * available memory + */ + + /* 0 is not a valid req_size for this allocator */ + if (!req_size) + return -1; + + /* Round req_size up to multiple of minimum alignment bytes */ + req_size = (req_size + (CVMX_BOOTMEM_ALIGNMENT_SIZE - 1)) & + ~(CVMX_BOOTMEM_ALIGNMENT_SIZE - 1); + + /* Make sure alignment is power of 2, and at least the minimum */ + for (align = CVMX_BOOTMEM_ALIGNMENT_SIZE; + align < (1ull << 48); + align <<= 1) { + if (align >= alignment) + break; + } + + align_mask = ~(align - 1); + + /* + * Adjust address minimum based on requested alignment (round + * up to meet alignment). Do this here so we can reject + * impossible requests up front. (NOP for address_min == 0) + */ + address_min = (address_min + (align - 1)) & align_mask; + + /* + * Convert !0 address_min and 0 address_max to special case of + * range that specifies an exact memory block to allocate. Do + * this before other checks and adjustments so that this + * tranformation will be validated + */ + if (address_min && !address_max) + address_max = address_min + req_size; + else if (!address_min && !address_max) + address_max = ~0ull; /* If no limits given, use max */ + + /* + * Reject inconsistent args. We have adjusted these, so this + * may fail due to our internal changes even if this check + * would pass for the values the user supplied. + */ + if (req_size > address_max - address_min) + return -1; + + __cvmx_bootmem_lock(flags); + + /* Walk through the list entries to find the right fit */ + head_addr = CVMX_BOOTMEM_DESC_GET_FIELD(head_addr); + + for (ent_addr = head_addr; + ent_addr != 0ULL && ent_addr < address_max; + prev_addr = ent_addr, + ent_addr = cvmx_bootmem_phy_get_next(ent_addr)) { + /* Raw free block size */ + ent_size = cvmx_bootmem_phy_get_size(ent_addr); + next_addr = cvmx_bootmem_phy_get_next(ent_addr); + + /* Validate the free list ascending order */ + if (ent_size < CVMX_BOOTMEM_ALIGNMENT_SIZE || + (next_addr && ent_addr > next_addr)) { + debug("ERROR: %s: bad free list ent: %#llx, next: %#llx\n", + __func__, CAST_ULL(ent_addr), + CAST_ULL(next_addr)); + goto error_out; + } + + /* adjust free block edges for alignment */ + free_start = (ent_addr + align - 1) & align_mask; + free_end = (ent_addr + ent_size) & align_mask; + + /* check that free block is large enough */ + if ((free_start + req_size) > free_end) + continue; + + /* check that desired start is within the free block */ + if (free_end < address_min || free_start > address_max) + continue; + if ((free_end - address_min) < req_size) + continue; + if ((address_max - free_start) < req_size) + continue; + + /* Found usebale free block */ + target_ent_addr = ent_addr; + target_prev_addr = prev_addr; + target_size = ent_size; + + /* Continue looking for highest/best block that fits */ + } + + /* Bail if the search has resulted in no eligible free blocks */ + if (target_ent_addr == 0) { + debug("%s: eligible free block not found\n", __func__); + goto error_out; + } + + /* Found the free block to allocate from */ + ent_addr = target_ent_addr; + prev_addr = target_prev_addr; + ent_size = target_size; + + debug("%s: using free block at %#010llx size %#llx\n", + __func__, CAST_ULL(ent_addr), CAST_ULL(ent_size)); + + /* Always allocate from the end of a free block */ + usable_max = min_t(u64, address_max, ent_addr + ent_size); + desired_min_addr = usable_max - req_size; + desired_min_addr &= align_mask; + + /* Split current free block into up to 3 free blocks */ + + /* Check for head room */ + if (desired_min_addr > ent_addr) { + /* Create a new free block at the allocation address */ + new_ent_addr = desired_min_addr; + new_ent_size = ent_size - (desired_min_addr - ent_addr); + + cvmx_bootmem_phy_set_next(new_ent_addr, + cvmx_bootmem_phy_get_next(ent_addr)); + cvmx_bootmem_phy_set_size(new_ent_addr, new_ent_size); + + /* Split out head room into a new free block */ + ent_size -= new_ent_size; + cvmx_bootmem_phy_set_next(ent_addr, new_ent_addr); + cvmx_bootmem_phy_set_size(ent_addr, ent_size); + + debug("%s: splitting head, addr %#llx size %#llx\n", + __func__, CAST_ULL(ent_addr), CAST_ULL(ent_size)); + + /* Make the allocation target the current free block */ + prev_addr = ent_addr; + ent_addr = new_ent_addr; + ent_size = new_ent_size; + } + + /* Check for tail room */ + if ((desired_min_addr + req_size) < (ent_addr + ent_size)) { + new_ent_addr = ent_addr + req_size; + new_ent_size = ent_size - req_size; + + /* Create a new free block from tail room */ + cvmx_bootmem_phy_set_next(new_ent_addr, + cvmx_bootmem_phy_get_next(ent_addr)); + cvmx_bootmem_phy_set_size(new_ent_addr, new_ent_size); + + debug("%s: splitting tail, addr %#llx size %#llx\n", + __func__, CAST_ULL(new_ent_addr), CAST_ULL(new_ent_size)); + + /* Adjust the current block to exclude tail room */ + ent_size = ent_size - new_ent_size; + cvmx_bootmem_phy_set_next(ent_addr, new_ent_addr); + cvmx_bootmem_phy_set_size(ent_addr, ent_size); + } + + /* The current free block IS the allocation target */ + if (desired_min_addr != ent_addr || ent_size != req_size) + debug("ERROR: %s: internal error - addr %#llx %#llx size %#llx %#llx\n", + __func__, CAST_ULL(desired_min_addr), CAST_ULL(ent_addr), + CAST_ULL(ent_size), CAST_ULL(req_size)); + + /* Remove the current free block from list */ + if (prev_addr) { + cvmx_bootmem_phy_set_next(prev_addr, + cvmx_bootmem_phy_get_next(ent_addr)); + } else { + /* head of list being returned, so update head ptr */ + CVMX_BOOTMEM_DESC_SET_FIELD(head_addr, + cvmx_bootmem_phy_get_next(ent_addr)); + } + + __cvmx_bootmem_unlock(flags); + debug("%s: allocated size: %#llx, at addr: %#010llx\n", + __func__, + CAST_ULL(req_size), + CAST_ULL(desired_min_addr)); + + return desired_min_addr; + +error_out: + /* Requested memory not found or argument error */ + __cvmx_bootmem_unlock(flags); + return -1; +} + +int __cvmx_bootmem_phy_free(u64 phy_addr, u64 size, u32 flags) +{ + u64 cur_addr; + u64 prev_addr = 0; /* zero is invalid */ + int retval = 0; + + debug("%s addr: %#llx, size: %#llx\n", __func__, + CAST_ULL(phy_addr), CAST_ULL(size)); + + if (__cvmx_bootmem_check_version(0)) + return 0; + + /* 0 is not a valid size for this allocator */ + if (!size || !phy_addr) + return 0; + + /* Round size up to mult of minimum alignment bytes */ + size = (size + (CVMX_BOOTMEM_ALIGNMENT_SIZE - 1)) & + ~(CVMX_BOOTMEM_ALIGNMENT_SIZE - 1); + + __cvmx_bootmem_lock(flags); + cur_addr = CVMX_BOOTMEM_DESC_GET_FIELD(head_addr); + if (cur_addr == 0 || phy_addr < cur_addr) { + /* add at front of list - special case with changing head ptr */ + if (cur_addr && phy_addr + size > cur_addr) + goto bootmem_free_done; /* error, overlapping section */ + else if (phy_addr + size == cur_addr) { + /* Add to front of existing first block */ + cvmx_bootmem_phy_set_next(phy_addr, + cvmx_bootmem_phy_get_next(cur_addr)); + cvmx_bootmem_phy_set_size(phy_addr, + cvmx_bootmem_phy_get_size(cur_addr) + size); + CVMX_BOOTMEM_DESC_SET_FIELD(head_addr, phy_addr); + + } else { + /* New block before first block */ + /* OK if cur_addr is 0 */ + cvmx_bootmem_phy_set_next(phy_addr, cur_addr); + cvmx_bootmem_phy_set_size(phy_addr, size); + CVMX_BOOTMEM_DESC_SET_FIELD(head_addr, phy_addr); + } + retval = 1; + goto bootmem_free_done; + } + + /* Find place in list to add block */ + while (cur_addr && phy_addr > cur_addr) { + prev_addr = cur_addr; + cur_addr = cvmx_bootmem_phy_get_next(cur_addr); + } + + if (!cur_addr) { + /* + * We have reached the end of the list, add on to end, checking + * to see if we need to combine with last block + */ + if (prev_addr + cvmx_bootmem_phy_get_size(prev_addr) == phy_addr) { + cvmx_bootmem_phy_set_size(prev_addr, + cvmx_bootmem_phy_get_size(prev_addr) + size); + } else { + cvmx_bootmem_phy_set_next(prev_addr, phy_addr); + cvmx_bootmem_phy_set_size(phy_addr, size); + cvmx_bootmem_phy_set_next(phy_addr, 0); + } + retval = 1; + goto bootmem_free_done; + } else { + /* + * insert between prev and cur nodes, checking for merge with + * either/both + */ + if (prev_addr + cvmx_bootmem_phy_get_size(prev_addr) == phy_addr) { + /* Merge with previous */ + cvmx_bootmem_phy_set_size(prev_addr, + cvmx_bootmem_phy_get_size(prev_addr) + size); + if (phy_addr + size == cur_addr) { + /* Also merge with current */ + cvmx_bootmem_phy_set_size(prev_addr, + cvmx_bootmem_phy_get_size(cur_addr) + + cvmx_bootmem_phy_get_size(prev_addr)); + cvmx_bootmem_phy_set_next(prev_addr, + cvmx_bootmem_phy_get_next(cur_addr)); + } + retval = 1; + goto bootmem_free_done; + } else if (phy_addr + size == cur_addr) { + /* Merge with current */ + cvmx_bootmem_phy_set_size(phy_addr, + cvmx_bootmem_phy_get_size(cur_addr) + size); + cvmx_bootmem_phy_set_next(phy_addr, + cvmx_bootmem_phy_get_next(cur_addr)); + cvmx_bootmem_phy_set_next(prev_addr, phy_addr); + retval = 1; + goto bootmem_free_done; + } + + /* It is a standalone block, add in between prev and cur */ + cvmx_bootmem_phy_set_size(phy_addr, size); + cvmx_bootmem_phy_set_next(phy_addr, cur_addr); + cvmx_bootmem_phy_set_next(prev_addr, phy_addr); + } + retval = 1; + +bootmem_free_done: + __cvmx_bootmem_unlock(flags); + return retval; +} + +void cvmx_bootmem_phy_list_print(void) +{ + u64 addr; + + addr = CVMX_BOOTMEM_DESC_GET_FIELD(head_addr); + printf("\n\n\nPrinting bootmem block list, descriptor: 0x%llx, head is 0x%llx\n", + CAST_ULL(cvmx_bootmem_desc_addr), CAST_ULL(addr)); + printf("Descriptor version: %d.%d\n", + (int)CVMX_BOOTMEM_DESC_GET_FIELD(major_version), + (int)CVMX_BOOTMEM_DESC_GET_FIELD(minor_version)); + if (CVMX_BOOTMEM_DESC_GET_FIELD(major_version) > 3) + debug("Warning: Bootmem descriptor version is newer than expected\n"); + + if (!addr) + printf("mem list is empty!\n"); + + while (addr) { + printf("Block address: 0x%08llx, size: 0x%08llx, next: 0x%08llx\n", CAST_ULL(addr), + CAST_ULL(cvmx_bootmem_phy_get_size(addr)), + CAST_ULL(cvmx_bootmem_phy_get_next(addr))); + addr = cvmx_bootmem_phy_get_next(addr); + } + printf("\n\n"); +} + +u64 cvmx_bootmem_phy_available_mem(u64 min_block_size) +{ + u64 addr; + + u64 available_mem = 0; + + __cvmx_bootmem_lock(0); + addr = CVMX_BOOTMEM_DESC_GET_FIELD(head_addr); + while (addr) { + if (cvmx_bootmem_phy_get_size(addr) >= min_block_size) + available_mem += cvmx_bootmem_phy_get_size(addr); + addr = cvmx_bootmem_phy_get_next(addr); + } + __cvmx_bootmem_unlock(0); + return available_mem; +} + +u64 cvmx_bootmem_phy_named_block_find(const char *name, u32 flags) +{ + u64 result = 0; + + debug("%s: %s\n", __func__, name); + + __cvmx_bootmem_lock(flags); + if (!__cvmx_bootmem_check_version(3)) { + int i; + u64 named_block_array_addr = + CVMX_BOOTMEM_DESC_GET_FIELD(named_block_array_addr); + int num_blocks = + CVMX_BOOTMEM_DESC_GET_FIELD(named_block_num_blocks); + int name_length = + CVMX_BOOTMEM_DESC_GET_FIELD(named_block_name_len); + u64 named_addr = named_block_array_addr; + + for (i = 0; i < num_blocks; i++) { + u64 named_size = + CVMX_BOOTMEM_NAMED_GET_FIELD(named_addr, size); + if (name && named_size) { + char name_tmp[name_length + 1]; + + CVMX_BOOTMEM_NAMED_GET_NAME(named_addr, + name_tmp, + name_length); + if (!strncmp(name, name_tmp, name_length)) { + result = named_addr; + break; + } + } else if (!name && !named_size) { + result = named_addr; + break; + } + + named_addr += + sizeof(struct cvmx_bootmem_named_block_desc); + } + } + __cvmx_bootmem_unlock(flags); + return result; +} + +int cvmx_bootmem_phy_named_block_free(const char *name, u32 flags) +{ + u64 named_block_addr; + + if (__cvmx_bootmem_check_version(3)) + return 0; + + debug("%s: %s\n", __func__, name); + + /* + * Take lock here, as name lookup/block free/name free need to be + * atomic + */ + __cvmx_bootmem_lock(flags); + + named_block_addr = cvmx_bootmem_phy_named_block_find(name, + CVMX_BOOTMEM_FLAG_NO_LOCKING); + if (named_block_addr) { + u64 named_addr = + CVMX_BOOTMEM_NAMED_GET_FIELD(named_block_addr, + base_addr); + u64 named_size = + CVMX_BOOTMEM_NAMED_GET_FIELD(named_block_addr, size); + + debug("%s: %s, base: 0x%llx, size: 0x%llx\n", + __func__, name, CAST_ULL(named_addr), + CAST_ULL(named_size)); + + __cvmx_bootmem_phy_free(named_addr, named_size, + CVMX_BOOTMEM_FLAG_NO_LOCKING); + + /* Set size to zero to indicate block not used. */ + CVMX_BOOTMEM_NAMED_SET_FIELD(named_block_addr, size, 0); + } + + __cvmx_bootmem_unlock(flags); + return !!named_block_addr; /* 0 on failure, 1 on success */ +} + +s64 cvmx_bootmem_phy_named_block_alloc(u64 size, u64 min_addr, + u64 max_addr, + u64 alignment, const char *name, + u32 flags) +{ + s64 addr_allocated; + u64 named_block_desc_addr; + + debug("%s: size: 0x%llx, min: 0x%llx, max: 0x%llx, align: 0x%llx, name: %s\n", + __func__, CAST_ULL(size), CAST_ULL(min_addr), CAST_ULL(max_addr), + CAST_ULL(alignment), name); + + if (__cvmx_bootmem_check_version(3)) + return -1; + + /* + * Take lock here, as name lookup/block alloc/name add need to be + * atomic + */ + __cvmx_bootmem_lock(flags); + + named_block_desc_addr = + cvmx_bootmem_phy_named_block_find(name, flags | + CVMX_BOOTMEM_FLAG_NO_LOCKING); + if (named_block_desc_addr) { + __cvmx_bootmem_unlock(flags); + return -1; + } + + /* Get pointer to first available named block descriptor */ + named_block_desc_addr = + cvmx_bootmem_phy_named_block_find(NULL, flags | + CVMX_BOOTMEM_FLAG_NO_LOCKING); + if (!named_block_desc_addr) { + __cvmx_bootmem_unlock(flags); + return -1; + } + + /* + * Round size up to mult of minimum alignment bytes + * We need the actual size allocated to allow for blocks to be + * coallesced when they are freed. The alloc routine does the + * same rounding up on all allocations. + */ + size = (size + (CVMX_BOOTMEM_ALIGNMENT_SIZE - 1)) & + ~(CVMX_BOOTMEM_ALIGNMENT_SIZE - 1); + + addr_allocated = cvmx_bootmem_phy_alloc(size, min_addr, max_addr, + alignment, + flags | CVMX_BOOTMEM_FLAG_NO_LOCKING); + if (addr_allocated >= 0) { + CVMX_BOOTMEM_NAMED_SET_FIELD(named_block_desc_addr, base_addr, + addr_allocated); + CVMX_BOOTMEM_NAMED_SET_FIELD(named_block_desc_addr, size, size); + CVMX_BOOTMEM_NAMED_SET_NAME(named_block_desc_addr, name, + CVMX_BOOTMEM_DESC_GET_FIELD(named_block_name_len)); + } + + __cvmx_bootmem_unlock(flags); + return addr_allocated; +} + +void cvmx_bootmem_phy_named_block_print(void) +{ + int i; + int printed = 0; + + u64 named_block_array_addr = + CVMX_BOOTMEM_DESC_GET_FIELD(named_block_array_addr); + int num_blocks = CVMX_BOOTMEM_DESC_GET_FIELD(named_block_num_blocks); + int name_length = CVMX_BOOTMEM_DESC_GET_FIELD(named_block_name_len); + u64 named_block_addr = named_block_array_addr; + + debug("%s: desc addr: 0x%llx\n", + __func__, CAST_ULL(cvmx_bootmem_desc_addr)); + + if (__cvmx_bootmem_check_version(3)) + return; + + printf("List of currently allocated named bootmem blocks:\n"); + for (i = 0; i < num_blocks; i++) { + u64 named_size = + CVMX_BOOTMEM_NAMED_GET_FIELD(named_block_addr, size); + if (named_size) { + char name_tmp[name_length + 1]; + u64 named_addr = + CVMX_BOOTMEM_NAMED_GET_FIELD(named_block_addr, + base_addr); + CVMX_BOOTMEM_NAMED_GET_NAME(named_block_addr, name_tmp, + name_length); + printed++; + printf("Name: %s, address: 0x%08llx, size: 0x%08llx, index: %d\n", name_tmp, + CAST_ULL(named_addr), + CAST_ULL(named_size), i); + } + named_block_addr += + sizeof(struct cvmx_bootmem_named_block_desc); + } + + if (!printed) + printf("No named bootmem blocks exist.\n"); +} + +s64 cvmx_bootmem_phy_mem_list_init(u64 mem_size, + u32 low_reserved_bytes, + struct cvmx_bootmem_desc *desc_buffer) +{ + u64 cur_block_addr; + s64 addr; + int i; + + debug("%s (arg desc ptr: %p, cvmx_bootmem_desc: 0x%llx)\n", + __func__, desc_buffer, CAST_ULL(cvmx_bootmem_desc_addr)); + + /* + * Descriptor buffer needs to be in 32 bit addressable space to be + * compatible with 32 bit applications + */ + if (!desc_buffer) { + debug("ERROR: no memory for cvmx_bootmem descriptor provided\n"); + return 0; + } + + if (mem_size > OCTEON_MAX_PHY_MEM_SIZE) { + mem_size = OCTEON_MAX_PHY_MEM_SIZE; + debug("ERROR: requested memory size too large, truncating to maximum size\n"); + } + + if (cvmx_bootmem_desc_addr) + return 1; + + /* Initialize cvmx pointer to descriptor */ + cvmx_bootmem_init(cvmx_ptr_to_phys(desc_buffer)); + + /* Fill the bootmem descriptor */ + CVMX_BOOTMEM_DESC_SET_FIELD(lock, 0); + CVMX_BOOTMEM_DESC_SET_FIELD(flags, 0); + CVMX_BOOTMEM_DESC_SET_FIELD(head_addr, 0); + CVMX_BOOTMEM_DESC_SET_FIELD(major_version, CVMX_BOOTMEM_DESC_MAJ_VER); + CVMX_BOOTMEM_DESC_SET_FIELD(minor_version, CVMX_BOOTMEM_DESC_MIN_VER); + CVMX_BOOTMEM_DESC_SET_FIELD(app_data_addr, 0); + CVMX_BOOTMEM_DESC_SET_FIELD(app_data_size, 0); + + /* + * Set up global pointer to start of list, exclude low 64k for exception + * vectors, space for global descriptor + */ + cur_block_addr = (OCTEON_DDR0_BASE + low_reserved_bytes); + + if (mem_size <= OCTEON_DDR0_SIZE) { + __cvmx_bootmem_phy_free(cur_block_addr, + mem_size - low_reserved_bytes, 0); + goto frees_done; + } + + __cvmx_bootmem_phy_free(cur_block_addr, + OCTEON_DDR0_SIZE - low_reserved_bytes, 0); + + mem_size -= OCTEON_DDR0_SIZE; + + /* Add DDR2 block next if present */ + if (mem_size > OCTEON_DDR1_SIZE) { + __cvmx_bootmem_phy_free(OCTEON_DDR1_BASE, OCTEON_DDR1_SIZE, 0); + __cvmx_bootmem_phy_free(OCTEON_DDR2_BASE, + mem_size - OCTEON_DDR1_SIZE, 0); + } else { + __cvmx_bootmem_phy_free(OCTEON_DDR1_BASE, mem_size, 0); + } +frees_done: + + /* Initialize the named block structure */ + CVMX_BOOTMEM_DESC_SET_FIELD(named_block_name_len, CVMX_BOOTMEM_NAME_LEN); + CVMX_BOOTMEM_DESC_SET_FIELD(named_block_num_blocks, + CVMX_BOOTMEM_NUM_NAMED_BLOCKS); + CVMX_BOOTMEM_DESC_SET_FIELD(named_block_array_addr, 0); + + /* Allocate this near the top of the low 256 MBytes of memory */ + addr = cvmx_bootmem_phy_alloc(CVMX_BOOTMEM_NUM_NAMED_BLOCKS * + sizeof(struct cvmx_bootmem_named_block_desc), + 0, 0x10000000, 0, + CVMX_BOOTMEM_FLAG_END_ALLOC); + if (addr >= 0) + CVMX_BOOTMEM_DESC_SET_FIELD(named_block_array_addr, addr); + + debug("%s: named_block_array_addr: 0x%llx)\n", + __func__, CAST_ULL(addr)); + + if (addr < 0) { + debug("FATAL ERROR: unable to allocate memory for bootmem descriptor!\n"); + return 0; + } + + for (i = 0; i < CVMX_BOOTMEM_NUM_NAMED_BLOCKS; i++) { + CVMX_BOOTMEM_NAMED_SET_FIELD(addr, base_addr, 0); + CVMX_BOOTMEM_NAMED_SET_FIELD(addr, size, 0); + addr += sizeof(struct cvmx_bootmem_named_block_desc); + } + + return 1; +} + +s64 cvmx_bootmem_phy_mem_list_init_multi(u8 node_mask, + u32 mem_sizes[], + u32 low_reserved_bytes, + struct cvmx_bootmem_desc *desc_buffer) +{ + u64 cur_block_addr; + u64 mem_size; + s64 addr; + int i; + int node; + u64 node_base; /* Make u64 to reduce type casting */ + + mem_sizes[0] = gd->ram_size / (1024 * 1024); + + debug("cvmx_bootmem_phy_mem_list_init (arg desc ptr: %p, cvmx_bootmem_desc: 0x%llx)\n", + desc_buffer, CAST_ULL(cvmx_bootmem_desc_addr)); + + /* + * Descriptor buffer needs to be in 32 bit addressable space to be + * compatible with 32 bit applications + */ + if (!desc_buffer) { + debug("ERROR: no memory for cvmx_bootmem descriptor provided\n"); + return 0; + } + + cvmx_coremask_for_each_node(node, node_mask) { + if ((mem_sizes[node] * 1024 * 1024) > OCTEON_MAX_PHY_MEM_SIZE) { + mem_sizes[node] = OCTEON_MAX_PHY_MEM_SIZE / + (1024 * 1024); + debug("ERROR node#%lld: requested memory size too large, truncating to maximum size\n", + CAST_ULL(node)); + } + } + + if (cvmx_bootmem_desc_addr) + return 1; + + /* Initialize cvmx pointer to descriptor */ + cvmx_bootmem_init(cvmx_ptr_to_phys(desc_buffer)); + + /* Fill the bootmem descriptor */ + CVMX_BOOTMEM_DESC_SET_FIELD(lock, 0); + CVMX_BOOTMEM_DESC_SET_FIELD(flags, 0); + CVMX_BOOTMEM_DESC_SET_FIELD(head_addr, 0); + CVMX_BOOTMEM_DESC_SET_FIELD(major_version, CVMX_BOOTMEM_DESC_MAJ_VER); + CVMX_BOOTMEM_DESC_SET_FIELD(minor_version, CVMX_BOOTMEM_DESC_MIN_VER); + CVMX_BOOTMEM_DESC_SET_FIELD(app_data_addr, 0); + CVMX_BOOTMEM_DESC_SET_FIELD(app_data_size, 0); + + cvmx_coremask_for_each_node(node, node_mask) { + if (node != 0) /* do not reserve memory on remote nodes */ + low_reserved_bytes = 0; + + mem_size = (u64)mem_sizes[node] * (1024 * 1024); /* MBytes */ + + /* + * Set up global pointer to start of list, exclude low 64k + * for exception vectors, space for global descriptor + */ + + node_base = (u64)node << CVMX_NODE_MEM_SHIFT; + cur_block_addr = (OCTEON_DDR0_BASE + low_reserved_bytes) | + node_base; + + if (mem_size <= OCTEON_DDR0_SIZE) { + __cvmx_bootmem_phy_free(cur_block_addr, + mem_size - low_reserved_bytes, + 0); + continue; + } + + __cvmx_bootmem_phy_free(cur_block_addr, + OCTEON_DDR0_SIZE - low_reserved_bytes, + 0); + + mem_size -= OCTEON_DDR0_SIZE; + + /* Add DDR2 block next if present */ + if (mem_size > OCTEON_DDR1_SIZE) { + __cvmx_bootmem_phy_free(OCTEON_DDR1_BASE | + node_base, + OCTEON_DDR1_SIZE, 0); + __cvmx_bootmem_phy_free(OCTEON_DDR2_BASE | + node_base, + mem_size - OCTEON_DDR1_SIZE, 0); + } else { + __cvmx_bootmem_phy_free(OCTEON_DDR1_BASE | + node_base, + mem_size, 0); + } + } + + debug("%s: Initialize the named block\n", __func__); + + /* Initialize the named block structure */ + CVMX_BOOTMEM_DESC_SET_FIELD(named_block_name_len, CVMX_BOOTMEM_NAME_LEN); + CVMX_BOOTMEM_DESC_SET_FIELD(named_block_num_blocks, + CVMX_BOOTMEM_NUM_NAMED_BLOCKS); + CVMX_BOOTMEM_DESC_SET_FIELD(named_block_array_addr, 0); + + /* Allocate this near the top of the low 256 MBytes of memory */ + addr = cvmx_bootmem_phy_alloc(CVMX_BOOTMEM_NUM_NAMED_BLOCKS * + sizeof(struct cvmx_bootmem_named_block_desc), + 0, 0x10000000, 0, + CVMX_BOOTMEM_FLAG_END_ALLOC); + if (addr >= 0) + CVMX_BOOTMEM_DESC_SET_FIELD(named_block_array_addr, addr); + + debug("cvmx_bootmem_phy_mem_list_init: named_block_array_addr: 0x%llx)\n", + CAST_ULL(addr)); + + if (addr < 0) { + debug("FATAL ERROR: unable to allocate memory for bootmem descriptor!\n"); + return 0; + } + + for (i = 0; i < CVMX_BOOTMEM_NUM_NAMED_BLOCKS; i++) { + CVMX_BOOTMEM_NAMED_SET_FIELD(addr, base_addr, 0); + CVMX_BOOTMEM_NAMED_SET_FIELD(addr, size, 0); + addr += sizeof(struct cvmx_bootmem_named_block_desc); + } + + // test-only: DEBUG ifdef??? + cvmx_bootmem_phy_list_print(); + + return 1; +} + +int cvmx_bootmem_reserve_memory(u64 start_addr, u64 size, + const char *name, u32 flags) +{ + u64 addr; + int rc = 1; + static unsigned int block_num; + char block_name[CVMX_BOOTMEM_NAME_LEN]; + + debug("%s: start %#llx, size: %#llx, name: %s, flags:%#x)\n", + __func__, CAST_ULL(start_addr), CAST_ULL(size), name, flags); + + if (__cvmx_bootmem_check_version(3)) + return 0; + + addr = CVMX_BOOTMEM_DESC_GET_FIELD(head_addr); + if (!addr) + return 0; + + if (!name) + name = "__cvmx_bootmem_reserved"; + + while (addr && rc) { + u64 block_size = cvmx_bootmem_phy_get_size(addr); + u64 reserve_size = 0; + + if (addr >= start_addr && addr < start_addr + size) { + reserve_size = size - (addr - start_addr); + if (block_size < reserve_size) + reserve_size = block_size; + } else if (start_addr > addr && + start_addr < (addr + block_size)) { + reserve_size = block_size - (start_addr - addr); + } + + if (reserve_size) { + snprintf(block_name, sizeof(block_name), + "%.32s_%012llx_%u", + name, (unsigned long long)start_addr, + (unsigned int)block_num); + + debug("%s: Reserving 0x%llx bytes at address 0x%llx with name %s\n", + __func__, CAST_ULL(reserve_size), + CAST_ULL(addr), block_name); + + if (cvmx_bootmem_phy_named_block_alloc(reserve_size, + addr, 0, 0, + block_name, + flags) == -1) { + debug("%s: Failed to reserve 0x%llx bytes at address 0x%llx\n", + __func__, CAST_ULL(reserve_size), + (unsigned long long)addr); + rc = 0; + break; + } + + debug("%s: Reserved 0x%llx bytes at address 0x%llx with name %s\n", + __func__, CAST_ULL(reserve_size), + CAST_ULL(addr), block_name); + } + + addr = cvmx_bootmem_phy_get_next(addr); + block_num++; + } + + return rc; +} + +void cvmx_bootmem_lock(void) +{ + __cvmx_bootmem_lock(0); +} + +void cvmx_bootmem_unlock(void) +{ + __cvmx_bootmem_unlock(0); +} + +void *__cvmx_phys_addr_to_ptr(u64 phys, int size) +{ + void *tmp; + + if (sizeof(void *) == 8) { + tmp = CASTPTR(void, CVMX_ADD_SEG(CVMX_MIPS_SPACE_XKPHYS, phys)); + } else { + u32 phy32 = (u32)(phys & 0x7fffffffULL); + + tmp = CASTPTR(void, CVMX_ADD_SEG32(CVMX_MIPS32_SPACE_KSEG0, + phy32)); + } + + return tmp; +} + +void *__cvmx_bootmem_internal_get_desc_ptr(void) +{ + return cvmx_phys_to_ptr(cvmx_bootmem_desc_addr); +} diff --git a/arch/mips/mach-octeon/cvmx-coremask.c b/arch/mips/mach-octeon/cvmx-coremask.c new file mode 100644 index 0000000000..cff8c08b97 --- /dev/null +++ b/arch/mips/mach-octeon/cvmx-coremask.c @@ -0,0 +1,366 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2018-2020 Marvell International Ltd. + */ + +#include +#include + +#include +#include + +#include +#include +#include +#include +#include + +struct cvmx_coremask *get_coremask_override(struct cvmx_coremask *pcm) +{ + struct cvmx_coremask pcm_override = CVMX_COREMASK_MAX; + char *cptr; + + /* The old code sets the number of cores to be to 16 in this case. */ + cvmx_coremask_set_cores(pcm, 0, 16); + + if (OCTEON_IS_OCTEON2() || OCTEON_IS_OCTEON3()) + cvmx_coremask_copy(pcm, &pcm_override); + + cptr = env_get("coremask_override"); + if (cptr) { + if (cvmx_coremask_str2bmp(pcm, cptr) < 0) + return NULL; + } + + return pcm; +} + +/* Validate the coremask that is passed to a boot* function. */ +int validate_coremask(struct cvmx_coremask *pcm) +{ + struct cvmx_coremask coremask_override; + struct cvmx_coremask fuse_coremask; + + if (!get_coremask_override(&coremask_override)) + return -1; + + octeon_get_available_coremask(&fuse_coremask); + + if (!cvmx_coremask_is_subset(&fuse_coremask, pcm)) { + puts("ERROR: Can't boot cores that don't exist!\n"); + puts("Available coremask:\n"); + cvmx_coremask_print(&fuse_coremask); + return -1; + } + + if (!cvmx_coremask_is_subset(&coremask_override, pcm)) { + struct cvmx_coremask print_cm; + + puts("Notice: coremask changed from:\n"); + cvmx_coremask_print(pcm); + puts("based on coremask_override of:\n"); + cvmx_coremask_print(&coremask_override); + cvmx_coremask_and(&print_cm, pcm, &coremask_override); + puts("to:\n"); + cvmx_coremask_print(&print_cm); + } + + return 0; +} + +/** + * In CIU_FUSE for the 78XX, odd and even cores are separated out. + * For example, a CIU_FUSE value of 0xfffffefffffe indicates that bits 0 and 1 + * are set. + * This function converts the bit number in the CIU_FUSE register to a + * physical core number. + */ +static int convert_ciu_fuse_to_physical_core(int core, int max_cores) +{ + if (!octeon_has_feature(OCTEON_FEATURE_CIU3)) + return core; + else if (!OCTEON_IS_MODEL(OCTEON_CN78XX)) + return core; + else if (core < (max_cores / 2)) + return core * 2; + else + return ((core - (max_cores / 2)) * 2) + 1; +} + +/** + * Get the total number of fuses blown as well as the number blown per tad. + * + * @param coremask fuse coremask + * @param[out] tad_blown_count number of cores blown for each tad + * @param num_tads number of tads + * @param max_cores maximum number of cores + * + * @return void + */ +void fill_tad_corecount(u64 coremask, int tad_blown_count[], int num_tads, + int max_cores) +{ + int core, physical_core; + + for (core = 0; core < max_cores; core++) { + if (!(coremask & (1ULL << core))) { + int tad; + + physical_core = + convert_ciu_fuse_to_physical_core(core, + max_cores); + tad = physical_core % num_tads; + tad_blown_count[tad]++; + } + } +} + +u64 get_core_pattern(int num_tads, int max_cores) +{ + u64 pattern = 1ULL; + int cnt; + + for (cnt = 1; cnt < (max_cores / num_tads); cnt++) + pattern |= pattern << num_tads; + + return pattern; +} + +/** + * For CN78XX and CN68XX this function returns the logical coremask from the + * CIU_FUSE register value. For other models there is no difference. + * + * @param ciu_fuse_value fuse value from CIU_FUSE register + * @return logical coremask of CIU_FUSE value. + */ +u64 get_logical_coremask(u64 ciu_fuse_value) +{ + int tad_blown_count[MAX_CORE_TADS] = {0}; + int tad; + u64 logical_coremask = 0; + u64 tad_mask, pattern; + int num_tads, max_cores; + + if (OCTEON_IS_MODEL(OCTEON_CN78XX)) { + num_tads = 8; + max_cores = 48; + } else if (OCTEON_IS_MODEL(OCTEON_CN73XX) || + OCTEON_IS_MODEL(OCTEON_CNF75XX)) { + num_tads = 4; + max_cores = 16; + } else if (OCTEON_IS_MODEL(OCTEON_CN68XX)) { + num_tads = 4; + max_cores = 32; + } else { + /* Most Octeon devices don't need any mapping. */ + return ciu_fuse_value; + } + + pattern = get_core_pattern(num_tads, max_cores); + fill_tad_corecount(ciu_fuse_value, tad_blown_count, + num_tads, max_cores); + + for (tad = 0; tad < num_tads; tad++) { + tad_mask = pattern << tad; + logical_coremask |= tad_mask >> (tad_blown_count[tad] * num_tads); + } + return logical_coremask; +} + +/** + * Returns the available coremask either from env or fuses. + * If the fuses are blown and locked, they are the definitive coremask. + * + * @param pcm pointer to coremask to fill in + * @return pointer to coremask + */ +struct cvmx_coremask *octeon_get_available_coremask(struct cvmx_coremask *pcm) +{ + u8 node_mask = 0x01; /* ToDo: Currently only one node is supported */ + u64 ciu_fuse; + u64 cores; + + cvmx_coremask_clear_all(pcm); + + if (octeon_has_feature(OCTEON_FEATURE_CIU3)) { + int node; + + cvmx_coremask_for_each_node(node, node_mask) { + ciu_fuse = (csr_rd(CVMX_CIU_FUSE) & + 0x0000FFFFFFFFFFFFULL); + + ciu_fuse = get_logical_coremask(ciu_fuse); + cvmx_coremask_set64_node(pcm, node, ciu_fuse); + } + + return pcm; + } + + ciu_fuse = (csr_rd(CVMX_CIU_FUSE) & 0x0000FFFFFFFFFFFFULL); + ciu_fuse = get_logical_coremask(ciu_fuse); + + if (OCTEON_IS_MODEL(OCTEON_CN68XX)) + cvmx_coremask_set64(pcm, ciu_fuse); + + /* Get number of cores from fuse register, convert to coremask */ + cores = __builtin_popcountll(ciu_fuse); + + cvmx_coremask_set_cores(pcm, 0, cores); + + return pcm; +} + +int cvmx_coremask_str2bmp(struct cvmx_coremask *pcm, char *hexstr) +{ + int i, j; + int l; /* length of the hexstr in characters */ + int lb; /* number of bits taken by hexstr */ + int hldr_offset;/* holder's offset within the coremask */ + int hldr_xsz; /* holder's size in the number of hex digits */ + u64 h; + char c; + +#define MINUS_ONE (hexstr[0] == '-' && hexstr[1] == '1' && hexstr[2] == 0) + if (MINUS_ONE) { + cvmx_coremask_set_all(pcm); + return 0; + } + + /* Skip '0x' from hexstr */ + if (hexstr[0] == '0' && (hexstr[1] == 'x' || hexstr[1] == 'X')) + hexstr += 2; + + if (!strlen(hexstr)) { + printf("%s: Error: hex string is empty\n", __func__); + return -2; + } + + /* Trim leading zeros */ + while (*hexstr == '0') + hexstr++; + + cvmx_coremask_clear_all(pcm); + l = strlen(hexstr); + + /* If length is 0 then the hex string must be all zeros */ + if (l == 0) + return 0; + + for (i = 0; i < l; i++) { + if (isxdigit((int)hexstr[i]) == 0) { + printf("%s: Non-hex digit within hexstr\n", __func__); + return -2; + } + } + + lb = (l - 1) * 4; + if (hexstr[0] > '7') + lb += 4; + else if (hexstr[0] > '3') + lb += 3; + else if (hexstr[0] > '1') + lb += 2; + else + lb += 1; + if (lb > CVMX_MIPS_MAX_CORES) { + printf("%s: hexstr (%s) is too long\n", __func__, hexstr); + return -1; + } + + hldr_offset = 0; + hldr_xsz = 2 * sizeof(u64); + for (i = l; i > 0; i -= hldr_xsz) { + c = hexstr[i]; + hexstr[i] = 0; + j = i - hldr_xsz; + if (j < 0) + j = 0; + h = simple_strtoull(&hexstr[j], NULL, 16); + if (errno == EINVAL) { + printf("%s: strtou returns w/ EINVAL\n", __func__); + return -2; + } + pcm->coremask_bitmap[hldr_offset] = h; + hexstr[i] = c; + hldr_offset++; + } + + return 0; +} + +void cvmx_coremask_print(const struct cvmx_coremask *pcm) +{ + int i, j; + int start; + int found = 0; + + /* + * Print one node per line. Since the bitmap is stored LSB to MSB + * we reverse the order when printing. + */ + if (!octeon_has_feature(OCTEON_FEATURE_MULTINODE)) { + start = 0; + for (j = CVMX_COREMASK_MAX_CORES_PER_NODE - + CVMX_COREMASK_HLDRSZ; + j >= 0; j -= CVMX_COREMASK_HLDRSZ) { + if (pcm->coremask_bitmap[j / CVMX_COREMASK_HLDRSZ] != 0) + start = 1; + if (start) { + printf(" 0x%llx", + (u64)pcm->coremask_bitmap[j / + CVMX_COREMASK_HLDRSZ]); + } + } + + if (start) + found = 1; + + /* + * If the coremask is empty print so it is not + * confusing + */ + if (!found) + printf(""); + printf("\n"); + + return; + } + + for (i = 0; i < CVMX_MAX_USED_CORES_BMP; + i += CVMX_COREMASK_MAX_CORES_PER_NODE) { + printf("%s node %d:", i > 0 ? "\n" : "", + cvmx_coremask_core_to_node(i)); + start = 0; + + for (j = i + CVMX_COREMASK_MAX_CORES_PER_NODE - + CVMX_COREMASK_HLDRSZ; + j >= i; + j -= CVMX_COREMASK_HLDRSZ) { + /* Don't start printing until we get a non-zero word. */ + if (pcm->coremask_bitmap[j / CVMX_COREMASK_HLDRSZ] != 0) + start = 1; + + if (start) { + printf(" 0x%llx", (u64)pcm->coremask_bitmap[j / + CVMX_COREMASK_HLDRSZ]); + } + } + + if (start) + found = 1; + } + + i /= CVMX_COREMASK_HLDRSZ; + for (; i < CVMX_COREMASK_BMPSZ; i++) { + if (pcm->coremask_bitmap[i]) { + printf(" EXTRA GARBAGE[%i]: %016llx\n", i, + (u64)pcm->coremask_bitmap[i]); + } + } + + /* If the coremask is empty print so it is not confusing */ + if (!found) + printf(""); + + printf("\n"); +} diff --git a/arch/mips/mach-octeon/dram.c b/arch/mips/mach-octeon/dram.c index ff7a59f2ab..6dc08e19da 100644 --- a/arch/mips/mach-octeon/dram.c +++ b/arch/mips/mach-octeon/dram.c @@ -1,28 +1,84 @@ // SPDX-License-Identifier: GPL-2.0+ /* - * Copyright (C) Stefan Roese + * Copyright (C) 2020 Stefan Roese */ +#include #include #include #include #include +#include DECLARE_GLOBAL_DATA_PTR; +#define UBOOT_RAM_SIZE_MAX 0x10000000ULL + int dram_init(void) { - /* - * No DDR init yet -> run in L2 cache - */ - gd->ram_size = (4 << 20); - gd->bd->bi_dram[0].size = gd->ram_size; - gd->bd->bi_dram[1].size = 0; + if (IS_ENABLED(CONFIG_RAM_OCTEON)) { + struct ram_info ram; + struct udevice *dev; + int ret; + + ret = uclass_get_device(UCLASS_RAM, 0, &dev); + if (ret) { + debug("DRAM init failed: %d\n", ret); + return ret; + } + + ret = ram_get_info(dev, &ram); + if (ret) { + debug("Cannot get DRAM size: %d\n", ret); + return ret; + } + + gd->ram_size = min_t(size_t, ram.size, UBOOT_RAM_SIZE_MAX); + debug("SDRAM base=%lx, size=%lx\n", + (unsigned long)ram.base, (unsigned long)ram.size); + } else { + /* + * No DDR init yet -> run in L2 cache + */ + gd->ram_size = (4 << 20); + gd->bd->bi_dram[0].size = gd->ram_size; + gd->bd->bi_dram[1].size = 0; + } return 0; } +void board_add_ram_info(int use_default) +{ + if (IS_ENABLED(CONFIG_RAM_OCTEON)) { + struct ram_info ram; + struct udevice *dev; + int ret; + + ret = uclass_get_device(UCLASS_RAM, 0, &dev); + if (ret) { + debug("DRAM init failed: %d\n", ret); + return; + } + + ret = ram_get_info(dev, &ram); + if (ret) { + debug("Cannot get DRAM size: %d\n", ret); + return; + } + + printf(" ("); + print_size(ram.size, " total)"); + } +} + ulong board_get_usable_ram_top(ulong total_size) { - return gd->ram_top; + if (IS_ENABLED(CONFIG_RAM_OCTEON)) { + /* Map a maximum of 256MiB - return not size but address */ + return CONFIG_SYS_SDRAM_BASE + min(gd->ram_size, + UBOOT_RAM_SIZE_MAX); + } else { + return gd->ram_top; + } } diff --git a/arch/mips/mach-octeon/include/mach/bootoct_cmd.h b/arch/mips/mach-octeon/include/mach/bootoct_cmd.h new file mode 100644 index 0000000000..657698ba54 --- /dev/null +++ b/arch/mips/mach-octeon/include/mach/bootoct_cmd.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Marvell International Ltd. + */ + +#ifndef __BOOTOCT_CMD_H__ +#define __BOOTOCT_CMD_H__ + +#include "cvmx-coremask.h" + +enum octeon_boot_cmd_type { + BOOTOCT, + BOOTOCTLINUX, + BOOTOCTELF +}; + +/** Structure to contain results of command line argument parsing */ +struct octeon_boot_args { + struct cvmx_coremask coremask; /** Parsed coremask */ + int num_cores[CVMX_MAX_NODES]; /** number of cores */ + int num_skipped[CVMX_MAX_NODES];/** number of skipped cores */ + const char *app_name; /** Application name */ + const char *named_block; /** Named block to load Linux into */ + u32 stack_size; /** stack size */ + u32 heap_size; /** heap size */ + u32 boot_flags; /** boot flags */ + int node_mask; /** Node mask to use */ + int console_uart; /** serial console number */ + bool forceboot; /** force booting if core 0 not set */ + bool coremask_set; /** set if coremask was set */ + bool num_cores_set; /** Set if num_cores was set */ + bool num_skipped_set; /** Set if num_skipped was set */ + /** Set if endbootargs parameter was passed. */ + bool endbootargs; +}; + +/** + * Parse command line arguments + * + * @param argc number of arguments + * @param[in] argv array of argument strings + * @param cmd command type + * @param[out] boot_args parsed values + * + * @return number of arguments parsed + */ +int octeon_parse_bootopts(int argc, char *const argv[], + enum octeon_boot_cmd_type cmd, + struct octeon_boot_args *boot_args); + +void nmi_bootvector(void); +extern u64 nmi_handler_para[]; + +#endif /* __BOOTOCT_CMD_H__ */ diff --git a/arch/mips/mach-octeon/include/mach/cvmx-bootinfo.h b/arch/mips/mach-octeon/include/mach/cvmx-bootinfo.h new file mode 100644 index 0000000000..337987178f --- /dev/null +++ b/arch/mips/mach-octeon/include/mach/cvmx-bootinfo.h @@ -0,0 +1,350 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Marvell International Ltd. + */ + +/* + * Header file containing the ABI with the bootloader. + */ + +#ifndef __CVMX_BOOTINFO_H__ +#define __CVMX_BOOTINFO_H__ + +#include "cvmx-coremask.h" + +/* + * Current major and minor versions of the CVMX bootinfo block that is + * passed from the bootloader to the application. This is versioned + * so that applications can properly handle multiple bootloader + * versions. + */ +#define CVMX_BOOTINFO_MAJ_VER 1 +#define CVMX_BOOTINFO_MIN_VER 4 + +#if (CVMX_BOOTINFO_MAJ_VER == 1) +#define CVMX_BOOTINFO_OCTEON_SERIAL_LEN 20 +/* + * This structure is populated by the bootloader. For binary + * compatibility the only changes that should be made are + * adding members to the end of the structure, and the minor + * version should be incremented at that time. + * If an incompatible change is made, the major version + * must be incremented, and the minor version should be reset + * to 0. + */ +struct cvmx_bootinfo { + u32 major_version; + u32 minor_version; + + u64 stack_top; + u64 heap_base; + u64 heap_end; + u64 desc_vaddr; + + u32 exception_base_addr; + u32 stack_size; + u32 flags; + u32 core_mask; + /* DRAM size in megabytes */ + u32 dram_size; + /* physical address of free memory descriptor block*/ + u32 phy_mem_desc_addr; + /* used to pass flags from app to debugger */ + u32 debugger_flags_base_addr; + + /* CPU clock speed, in hz */ + u32 eclock_hz; + + /* DRAM clock speed, in hz */ + u32 dclock_hz; + + u32 reserved0; + u16 board_type; + u8 board_rev_major; + u8 board_rev_minor; + u16 reserved1; + u8 reserved2; + u8 reserved3; + char board_serial_number[CVMX_BOOTINFO_OCTEON_SERIAL_LEN]; + u8 mac_addr_base[6]; + u8 mac_addr_count; +#if (CVMX_BOOTINFO_MIN_VER >= 1) + /* + * Several boards support compact flash on the Octeon boot + * bus. The CF memory spaces may be mapped to different + * addresses on different boards. These are the physical + * addresses, so care must be taken to use the correct + * XKPHYS/KSEG0 addressing depending on the application's + * ABI. These values will be 0 if CF is not present. + */ + u64 compact_flash_common_base_addr; + u64 compact_flash_attribute_base_addr; + /* + * Base address of the LED display (as on EBT3000 board) + * This will be 0 if LED display not present. + */ + u64 led_display_base_addr; +#endif +#if (CVMX_BOOTINFO_MIN_VER >= 2) + /* DFA reference clock in hz (if applicable)*/ + u32 dfa_ref_clock_hz; + + /* + * flags indicating various configuration options. These + * flags supercede the 'flags' variable and should be used + * instead if available. + */ + u32 config_flags; +#endif +#if (CVMX_BOOTINFO_MIN_VER >= 3) + /* + * Address of the OF Flattened Device Tree structure + * describing the board. + */ + u64 fdt_addr; +#endif +#if (CVMX_BOOTINFO_MIN_VER >= 4) + /* + * Coremask used for processors with more than 32 cores + * or with OCI. This replaces core_mask. + */ + struct cvmx_coremask ext_core_mask; +#endif +}; + +#define CVMX_BOOTINFO_CFG_FLAG_PCI_HOST (1ull << 0) +#define CVMX_BOOTINFO_CFG_FLAG_PCI_TARGET (1ull << 1) +#define CVMX_BOOTINFO_CFG_FLAG_DEBUG (1ull << 2) +#define CVMX_BOOTINFO_CFG_FLAG_NO_MAGIC (1ull << 3) +/* + * This flag is set if the TLB mappings are not contained in the + * 0x10000000 - 0x20000000 boot bus region. + */ +#define CVMX_BOOTINFO_CFG_FLAG_OVERSIZE_TLB_MAPPING (1ull << 4) +#define CVMX_BOOTINFO_CFG_FLAG_BREAK (1ull << 5) + +#endif /* (CVMX_BOOTINFO_MAJ_VER == 1) */ + +/* Type defines for board and chip types */ +enum cvmx_board_types_enum { + CVMX_BOARD_TYPE_NULL = 0, + CVMX_BOARD_TYPE_SIM = 1, + CVMX_BOARD_TYPE_EBT3000 = 2, + CVMX_BOARD_TYPE_KODAMA = 3, + CVMX_BOARD_TYPE_NIAGARA = 4, + CVMX_BOARD_TYPE_NAC38 = 5, /* formerly NAO38 */ + CVMX_BOARD_TYPE_THUNDER = 6, + CVMX_BOARD_TYPE_TRANTOR = 7, + CVMX_BOARD_TYPE_EBH3000 = 8, + CVMX_BOARD_TYPE_EBH3100 = 9, + CVMX_BOARD_TYPE_HIKARI = 10, + CVMX_BOARD_TYPE_CN3010_EVB_HS5 = 11, + CVMX_BOARD_TYPE_CN3005_EVB_HS5 = 12, + CVMX_BOARD_TYPE_KBP = 13, + /* Deprecated, CVMX_BOARD_TYPE_CN3010_EVB_HS5 supports the CN3020 */ + CVMX_BOARD_TYPE_CN3020_EVB_HS5 = 14, + CVMX_BOARD_TYPE_EBT5800 = 15, + CVMX_BOARD_TYPE_NICPRO2 = 16, + CVMX_BOARD_TYPE_EBH5600 = 17, + CVMX_BOARD_TYPE_EBH5601 = 18, + CVMX_BOARD_TYPE_EBH5200 = 19, + CVMX_BOARD_TYPE_BBGW_REF = 20, + CVMX_BOARD_TYPE_NIC_XLE_4G = 21, + CVMX_BOARD_TYPE_EBT5600 = 22, + CVMX_BOARD_TYPE_EBH5201 = 23, + CVMX_BOARD_TYPE_EBT5200 = 24, + CVMX_BOARD_TYPE_CB5600 = 25, + CVMX_BOARD_TYPE_CB5601 = 26, + CVMX_BOARD_TYPE_CB5200 = 27, + /* Special 'generic' board type, supports many boards */ + CVMX_BOARD_TYPE_GENERIC = 28, + CVMX_BOARD_TYPE_EBH5610 = 29, + CVMX_BOARD_TYPE_LANAI2_A = 30, + CVMX_BOARD_TYPE_LANAI2_U = 31, + CVMX_BOARD_TYPE_EBB5600 = 32, + CVMX_BOARD_TYPE_EBB6300 = 33, + CVMX_BOARD_TYPE_NIC_XLE_10G = 34, + CVMX_BOARD_TYPE_LANAI2_G = 35, + CVMX_BOARD_TYPE_EBT5810 = 36, + CVMX_BOARD_TYPE_NIC10E = 37, + CVMX_BOARD_TYPE_EP6300C = 38, + CVMX_BOARD_TYPE_EBB6800 = 39, + CVMX_BOARD_TYPE_NIC4E = 40, + CVMX_BOARD_TYPE_NIC2E = 41, + CVMX_BOARD_TYPE_EBB6600 = 42, + CVMX_BOARD_TYPE_REDWING = 43, + CVMX_BOARD_TYPE_NIC68_4 = 44, + CVMX_BOARD_TYPE_NIC10E_66 = 45, + CVMX_BOARD_TYPE_MAX, + + /* + * The range from CVMX_BOARD_TYPE_MAX to + * CVMX_BOARD_TYPE_CUST_DEFINED_MIN is reserved for future + * SDK use. + */ + + /* + * Set aside a range for customer boards. These numbers are managed + * by Cavium. + */ + CVMX_BOARD_TYPE_CUST_DEFINED_MIN = 10000, + CVMX_BOARD_TYPE_CUST_WSX16 = 10001, + CVMX_BOARD_TYPE_CUST_NS0216 = 10002, + CVMX_BOARD_TYPE_CUST_NB5 = 10003, + CVMX_BOARD_TYPE_CUST_WMR500 = 10004, + CVMX_BOARD_TYPE_CUST_ITB101 = 10005, + CVMX_BOARD_TYPE_CUST_NTE102 = 10006, + CVMX_BOARD_TYPE_CUST_AGS103 = 10007, + CVMX_BOARD_TYPE_CUST_GST104 = 10008, + CVMX_BOARD_TYPE_CUST_GCT105 = 10009, + CVMX_BOARD_TYPE_CUST_AGS106 = 10010, + CVMX_BOARD_TYPE_CUST_SGM107 = 10011, + CVMX_BOARD_TYPE_CUST_GCT108 = 10012, + CVMX_BOARD_TYPE_CUST_AGS109 = 10013, + CVMX_BOARD_TYPE_CUST_GCT110 = 10014, + CVMX_BOARD_TYPE_CUST_L2_AIR_SENDER = 10015, + CVMX_BOARD_TYPE_CUST_L2_AIR_RECEIVER = 10016, + CVMX_BOARD_TYPE_CUST_L2_ACCTON2_TX = 10017, + CVMX_BOARD_TYPE_CUST_L2_ACCTON2_RX = 10018, + CVMX_BOARD_TYPE_CUST_L2_WSTRNSNIC_TX = 10019, + CVMX_BOARD_TYPE_CUST_L2_WSTRNSNIC_RX = 10020, + CVMX_BOARD_TYPE_CUST_L2_ZINWELL = 10021, + CVMX_BOARD_TYPE_CUST_DEFINED_MAX = 20000, + + /* + * Set aside a range for customer private use. The SDK won't + * use any numbers in this range. + */ + CVMX_BOARD_TYPE_CUST_PRIVATE_MIN = 20001, + CVMX_BOARD_TYPE_UBNT_E100 = 20002, + CVMX_BOARD_TYPE_CUST_DSR1000N = 20006, + CVMX_BOARD_TYPE_KONTRON_S1901 = 21901, + CVMX_BOARD_TYPE_CUST_PRIVATE_MAX = 30000, + + /* The remaining range is reserved for future use. */ +}; + +enum cvmx_chip_types_enum { + CVMX_CHIP_TYPE_NULL = 0, + CVMX_CHIP_SIM_TYPE_DEPRECATED = 1, + CVMX_CHIP_TYPE_OCTEON_SAMPLE = 2, + CVMX_CHIP_TYPE_MAX, +}; + +/* + * Compatibility alias for NAC38 name change, planned to be removed + * from SDK 1.7 + */ +#define CVMX_BOARD_TYPE_NAO38 CVMX_BOARD_TYPE_NAC38 + +/* Functions to return string based on type */ +#define ENUM_BRD_TYPE_CASE(x) \ + case x: \ + return(#x + 16) /* Skip CVMX_BOARD_TYPE_ */ + +static inline const char *cvmx_board_type_to_string(enum + cvmx_board_types_enum type) +{ + switch (type) { + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_NULL); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_SIM); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_EBT3000); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_KODAMA); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_NIAGARA); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_NAC38); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_THUNDER); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_TRANTOR); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_EBH3000); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_EBH3100); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_HIKARI); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CN3010_EVB_HS5); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CN3005_EVB_HS5); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_KBP); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CN3020_EVB_HS5); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_EBT5800); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_NICPRO2); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_EBH5600); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_EBH5601); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_EBH5200); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_BBGW_REF); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_NIC_XLE_4G); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_EBT5600); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_EBH5201); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_EBT5200); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CB5600); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CB5601); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CB5200); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_GENERIC); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_EBH5610); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_LANAI2_A); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_LANAI2_U); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_EBB5600); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_EBB6300); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_NIC_XLE_10G); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_LANAI2_G); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_EBT5810); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_NIC10E); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_EP6300C); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_EBB6800); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_NIC4E); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_NIC2E); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_EBB6600); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_REDWING); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_NIC68_4); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_NIC10E_66); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_MAX); + + /* Customer boards listed here */ + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_DEFINED_MIN); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_WSX16); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_NS0216); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_NB5); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_WMR500); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_ITB101); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_NTE102); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_AGS103); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_GST104); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_GCT105); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_AGS106); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_SGM107); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_GCT108); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_AGS109); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_GCT110); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_L2_AIR_SENDER); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_L2_AIR_RECEIVER); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_L2_ACCTON2_TX); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_L2_ACCTON2_RX); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_L2_WSTRNSNIC_TX); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_L2_WSTRNSNIC_RX); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_L2_ZINWELL); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_DEFINED_MAX); + + /* Customer private range */ + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_PRIVATE_MIN); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_UBNT_E100); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_DSR1000N); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_KONTRON_S1901); + ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_PRIVATE_MAX); + } + + return NULL; +} + +#define ENUM_CHIP_TYPE_CASE(x) \ + case x: \ + return(#x + 15) /* Skip CVMX_CHIP_TYPE */ + +static inline const char *cvmx_chip_type_to_string(enum + cvmx_chip_types_enum type) +{ + switch (type) { + ENUM_CHIP_TYPE_CASE(CVMX_CHIP_TYPE_NULL); + ENUM_CHIP_TYPE_CASE(CVMX_CHIP_SIM_TYPE_DEPRECATED); + ENUM_CHIP_TYPE_CASE(CVMX_CHIP_TYPE_OCTEON_SAMPLE); + ENUM_CHIP_TYPE_CASE(CVMX_CHIP_TYPE_MAX); + } + + return "Unsupported Chip"; +} + +#endif /* __CVMX_BOOTINFO_H__ */ diff --git a/arch/mips/mach-octeon/include/mach/cvmx-bootmem.h b/arch/mips/mach-octeon/include/mach/cvmx-bootmem.h new file mode 100644 index 0000000000..d60668c9ad --- /dev/null +++ b/arch/mips/mach-octeon/include/mach/cvmx-bootmem.h @@ -0,0 +1,533 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Marvell International Ltd. + */ + +/** + * @file + * Simple allocate only memory allocator. Used to allocate memory at application + * start time. + */ + +#ifndef __CVMX_BOOTMEM_H__ +#define __CVMX_BOOTMEM_H__ + +/* Must be multiple of 8, changing breaks ABI */ +#define CVMX_BOOTMEM_NAME_LEN 128 +/* Can change without breaking ABI */ +#define CVMX_BOOTMEM_NUM_NAMED_BLOCKS 64 +/* minimum alignment of bootmem alloced blocks */ +#define CVMX_BOOTMEM_ALIGNMENT_SIZE (16ull) + +/* Flags for cvmx_bootmem_phy_mem* functions */ +/* Allocate from end of block instead of beginning */ +#define CVMX_BOOTMEM_FLAG_END_ALLOC (1 << 0) +#define CVMX_BOOTMEM_FLAG_NO_LOCKING (1 << 1) /* Don't do any locking. */ + +/* Real physical addresses of memory regions */ +#define OCTEON_DDR0_BASE (0x0ULL) +#define OCTEON_DDR0_SIZE (0x010000000ULL) +#define OCTEON_DDR1_BASE ((OCTEON_IS_OCTEON2() || OCTEON_IS_OCTEON3()) \ + ? 0x20000000ULL : 0x410000000ULL) +#define OCTEON_DDR1_SIZE (0x010000000ULL) +#define OCTEON_DDR2_BASE ((OCTEON_IS_OCTEON2() || OCTEON_IS_OCTEON3()) \ + ? 0x30000000ULL : 0x20000000ULL) +#define OCTEON_DDR2_SIZE ((OCTEON_IS_OCTEON2() || OCTEON_IS_OCTEON3()) \ + ? 0x7d0000000ULL : 0x3e0000000ULL) +#define OCTEON_MAX_PHY_MEM_SIZE ((OCTEON_IS_MODEL(OCTEON_CN68XX)) \ + ? 128 * 1024 * 1024 * 1024ULL \ + : (OCTEON_IS_OCTEON2()) \ + ? 32 * 1024 * 1024 * 1024ull \ + : (OCTEON_IS_OCTEON3()) \ + ? 512 * 1024 * 1024 * 1024ULL \ + : 16 * 1024 * 1024 * 1024ULL) + +/* + * First bytes of each free physical block of memory contain this structure, + * which is used to maintain the free memory list. Since the bootloader is + * only 32 bits, there is a union providing 64 and 32 bit versions. The + * application init code converts addresses to 64 bit addresses before the + * application starts. + */ +struct cvmx_bootmem_block_header { + /* Note: these are referenced from assembly routines in the bootloader, + * so this structure should not be changed without changing those + * routines as well. + */ + u64 next_block_addr; + u64 size; + +}; + +/* + * Structure for named memory blocks + * Number of descriptors + * available can be changed without affecting compatibility, + * but name length changes require a bump in the bootmem + * descriptor version + * Note: This structure must be naturally 64 bit aligned, as a single + * memory image will be used by both 32 and 64 bit programs. + */ +struct cvmx_bootmem_named_block_desc { + u64 base_addr; /* Base address of named block */ + /* + * Size actually allocated for named block (may differ from requested) + */ + u64 size; + char name[CVMX_BOOTMEM_NAME_LEN]; /* name of named block */ +}; + +/* Current descriptor versions */ +/* CVMX bootmem descriptor major version */ +#define CVMX_BOOTMEM_DESC_MAJ_VER 3 +/* CVMX bootmem descriptor minor version */ +#define CVMX_BOOTMEM_DESC_MIN_VER 0 + +/* + * First three members of cvmx_bootmem_desc_t are left in original + * positions for backwards compatibility. + */ +struct cvmx_bootmem_desc { + /* Linux compatible proxy for __BIG_ENDIAN */ + u32 lock; /* spinlock to control access to list */ + u32 flags; /* flags for indicating various conditions */ + u64 head_addr; + + /* incremented changed when incompatible changes made */ + u32 major_version; + /* + * incremented changed when compatible changes made, reset to + * zero when major incremented + */ + u32 minor_version; + u64 app_data_addr; + u64 app_data_size; + + /* number of elements in named blocks array */ + u32 named_block_num_blocks; + /* length of name array in bootmem blocks */ + u32 named_block_name_len; + /* address of named memory block descriptors */ + u64 named_block_array_addr; +}; + +/** + * Initialize the boot alloc memory structures. This is + * normally called inside of cvmx_user_app_init() + * + * @param mem_desc_addr Address of the free memory list + * @return + */ +int cvmx_bootmem_init(u64 mem_desc_addr); + +/** + * Allocate a block of memory from the free list that was passed + * to the application by the bootloader. + * This is an allocate-only algorithm, so freeing memory is not possible. + * + * @param size Size in bytes of block to allocate + * @param alignment Alignment required - must be power of 2 + * + * @return pointer to block of memory, NULL on error + */ +void *cvmx_bootmem_alloc(u64 size, u64 alignment); + +/** + * Allocate a block of memory from the free list that was passed + * to the application by the bootloader from a specific node. + * This is an allocate-only algorithm, so freeing memory is not possible. + * + * @param node The node to allocate memory from + * @param size Size in bytes of block to allocate + * @param alignment Alignment required - must be power of 2 + * + * @return pointer to block of memory, NULL on error + */ +void *cvmx_bootmem_alloc_node(u64 node, u64 size, u64 alignment); + +/** + * Allocate a block of memory from the free list that was + * passed to the application by the bootloader at a specific + * address. This is an allocate-only algorithm, so + * freeing memory is not possible. Allocation will fail if + * memory cannot be allocated at the specified address. + * + * @param size Size in bytes of block to allocate + * @param address Physical address to allocate memory at. If this + * memory is not available, the allocation fails. + * @param alignment Alignment required - must be power of 2 + * @return pointer to block of memory, NULL on error + */ +void *cvmx_bootmem_alloc_address(u64 size, u64 address, + u64 alignment); + +/** + * Allocate a block of memory from the free list that was + * passed to the application by the bootloader within a specified + * address range. This is an allocate-only algorithm, so + * freeing memory is not possible. Allocation will fail if + * memory cannot be allocated in the requested range. + * + * @param size Size in bytes of block to allocate + * @param min_addr defines the minimum address of the range + * @param max_addr defines the maximum address of the range + * @param alignment Alignment required - must be power of 2 + * @return pointer to block of memory, NULL on error + */ +void *cvmx_bootmem_alloc_range(u64 size, u64 alignment, + u64 min_addr, u64 max_addr); + +/** + * Allocate a block of memory from the free list that was passed + * to the application by the bootloader, and assign it a name in the + * global named block table. (part of the cvmx_bootmem_descriptor_t structure) + * Named blocks can later be freed. + * + * @param size Size in bytes of block to allocate + * @param alignment Alignment required - must be power of 2 + * @param name name of block - must be less than CVMX_BOOTMEM_NAME_LEN bytes + * + * @return pointer to block of memory, NULL on error + */ +void *cvmx_bootmem_alloc_named(u64 size, u64 alignment, + const char *name); + +/** + * Allocate a block of memory from the free list that was passed + * to the application by the bootloader, and assign it a name in the + * global named block table. (part of the cvmx_bootmem_descriptor_t structure) + * Named blocks can later be freed. + * + * @param size Size in bytes of block to allocate + * @param alignment Alignment required - must be power of 2 + * @param name name of block - must be less than CVMX_BOOTMEM_NAME_LEN bytes + * @param flags Flags to control options for the allocation. + * + * @return pointer to block of memory, NULL on error + */ +void *cvmx_bootmem_alloc_named_flags(u64 size, u64 alignment, + const char *name, u32 flags); + +/** + * Allocate a block of memory from the free list that was passed + * to the application by the bootloader, and assign it a name in the + * global named block table. (part of the cvmx_bootmem_descriptor_t structure) + * Named blocks can later be freed. + * + * @param size Size in bytes of block to allocate + * @param address Physical address to allocate memory at. If this + * memory is not available, the allocation fails. + * @param name name of block - must be less than CVMX_BOOTMEM_NAME_LEN bytes + * + * @return pointer to block of memory, NULL on error + */ +void *cvmx_bootmem_alloc_named_address(u64 size, u64 address, + const char *name); + +/** + * Allocate a block of memory from a specific range of the free list + * that was passed to the application by the bootloader, and assign it + * a name in the global named block table. (part of the + * cvmx_bootmem_descriptor_t structure) Named blocks can later be + * freed. If request cannot be satisfied within the address range + * specified, NULL is returned + * + * @param size Size in bytes of block to allocate + * @param min_addr minimum address of range + * @param max_addr maximum address of range + * @param align Alignment of memory to be allocated. (must be a power of 2) + * @param name name of block - must be less than CVMX_BOOTMEM_NAME_LEN bytes + * + * @return pointer to block of memory, NULL on error + */ +void *cvmx_bootmem_alloc_named_range(u64 size, u64 min_addr, + u64 max_addr, u64 align, + const char *name); + +/** + * Allocate if needed a block of memory from a specific range of the + * free list that was passed to the application by the bootloader, and + * assign it a name in the global named block table. (part of the + * cvmx_bootmem_descriptor_t structure) Named blocks can later be + * freed. If the requested name block is already allocated, return + * the pointer to block of memory. If request cannot be satisfied + * within the address range specified, NULL is returned + * + * @param size Size in bytes of block to allocate + * @param min_addr minimum address of range + * @param max_addr maximum address of range + * @param align Alignment of memory to be allocated. (must be a power of 2) + * @param name name of block - must be less than CVMX_BOOTMEM_NAME_LEN bytes + * @param init Initialization function + * + * The initialization function is optional, if omitted the named block + * is initialized to all zeros when it is created, i.e. once. + * + * @return pointer to block of memory, NULL on error + */ +void *cvmx_bootmem_alloc_named_range_once(u64 size, + u64 min_addr, + u64 max_addr, + u64 align, + const char *name, + void (*init)(void *)); + +/** + * Allocate all free memory starting at the start address. This is used to + * prevent any free blocks from later being allocated within the reserved space. + * Note that any memory allocated with this function cannot be later freed. + * + * @param start_addr Starting address to reserve + * @param size Size in bytes to reserve starting at start_addr + * @param name Name to assign to reserved blocks + * @param flags Flags to use when reserving memory + * + * @return 0 on failure, + * !0 on success + */ +int cvmx_bootmem_reserve_memory(u64 start_addr, u64 size, + const char *name, u32 flags); + +/** + * Frees a previously allocated named bootmem block. + * + * @param name name of block to free + * + * @return 0 on failure, + * !0 on success + */ +int cvmx_bootmem_free_named(const char *name); + +/** + * Finds a named bootmem block by name. + * + * @param name name of block to free + * + * @return pointer to named block descriptor on success + * 0 on failure + */ +const struct cvmx_bootmem_named_block_desc * +cvmx_bootmem_find_named_block(const char *name); + +/** + * Returns the size of available memory in bytes, only + * counting blocks that are at least as big as the minimum block + * size. + * + * @param min_block_size + * Minimum block size to count in total. + * + * @return Number of bytes available for allocation that meet the + * block size requirement + */ +u64 cvmx_bootmem_available_mem(u64 min_block_size); + +/** + * Prints out the list of named blocks that have been allocated + * along with their addresses and sizes. + * This is primarily used for debugging purposes + */ +void cvmx_bootmem_print_named(void); + +/** + * Allocates a block of physical memory from the free list, at + * (optional) requested address and alignment. + * + * @param req_size size of region to allocate. All requests are + * rounded up to be a multiple CVMX_BOOTMEM_ALIGNMENT_SIZE bytes size + * + * @param address_min Minimum address that block can occupy. + * + * @param address_max Specifies the maximum address_min (inclusive) + * that the allocation can use. + * + * @param alignment Requested alignment of the block. If this + * alignment cannot be met, the allocation fails. + * This must be a power of 2. (Note: Alignment of + * CVMX_BOOTMEM_ALIGNMENT_SIZE bytes is required, and + * internally enforced. Requested alignments of less + * than CVMX_BOOTMEM_ALIGNMENT_SIZE are set to + * CVMX_BOOTMEM_ALIGNMENT_SIZE.) + * @param flags Flags to control options for the allocation. + * + * @return physical address of block allocated, or -1 on failure + */ +s64 cvmx_bootmem_phy_alloc(u64 req_size, u64 address_min, u64 address_max, + u64 alignment, u32 flags); + +/** + * Allocates a named block of physical memory from the free list, at + * (optional) requested address and alignment. + * + * @param size size of region to allocate. All requests are rounded + * up to be a multiple CVMX_BOOTMEM_ALIGNMENT_SIZE bytes size + * + * @param min_addr Minimum address that block can occupy. + * + * @param max_addr Specifies the maximum address_min (inclusive) that + * the allocation can use. + * + * @param alignment Requested alignment of the block. If this + * alignment cannot be met, the allocation fails. + * This must be a power of 2. (Note: Alignment of + * CVMX_BOOTMEM_ALIGNMENT_SIZE bytes is required, and + * internally enforced. Requested alignments of less + * than CVMX_BOOTMEM_ALIGNMENT_SIZE are set to + * CVMX_BOOTMEM_ALIGNMENT_SIZE.) + * + * @param name name to assign to named block + * + * @param flags Flags to control options for the allocation. + * + * @return physical address of block allocated, or -1 on failure + */ +s64 cvmx_bootmem_phy_named_block_alloc(u64 size, u64 min_addr, u64 max_addr, + u64 alignment, const char *name, + u32 flags); + +/** + * Finds a named memory block by name. + * Also used for finding an unused entry in the named block table. + * + * @param name Name of memory block to find. If NULL pointer given, + * then finds unused descriptor, if available. + * + * @param flags Flags to control options for the allocation. + * + * @return Physical address of the memory block descriptor, zero if not + * found. If zero returned when name parameter is NULL, then no + * memory block descriptors are available. + */ +u64 cvmx_bootmem_phy_named_block_find(const char *name, u32 flags); + +/** + * Returns the size of available memory in bytes, only + * counting blocks that are at least as big as the minimum block + * size. + * + * @param min_block_size + * Minimum block size to count in total. + * + * @return Number of bytes available for allocation that meet the + * block size requirement + */ +u64 cvmx_bootmem_phy_available_mem(u64 min_block_size); + +/** + * Frees a named block. + * + * @param name name of block to free + * @param flags flags for passing options + * + * @return 0 on failure + * 1 on success + */ +int cvmx_bootmem_phy_named_block_free(const char *name, u32 flags); + +/** + * Frees a block to the bootmem allocator list. This must + * be used with care, as the size provided must match the size + * of the block that was allocated, or the list will become + * corrupted. + * + * IMPORTANT: This is only intended to be used as part of named block + * frees and initial population of the free memory list. + * * + * + * @param phy_addr physical address of block + * @param size size of block in bytes. + * @param flags flags for passing options + * + * @return 1 on success, + * 0 on failure + */ +int __cvmx_bootmem_phy_free(u64 phy_addr, u64 size, u32 flags); + +/** + * Prints the list of currently allocated named blocks + * + */ +void cvmx_bootmem_phy_named_block_print(void); + +/** + * Prints the list of available memory. + * + */ +void cvmx_bootmem_phy_list_print(void); + +/** + * This function initializes the free memory list used by cvmx_bootmem. + * This must be called before any allocations can be done. + * + * @param mem_size Total memory available, in bytes + * + * @param low_reserved_bytes Number of bytes to reserve (leave out of + * free list) at address 0x0. + * + * @param desc_buffer Buffer for the bootmem descriptor. This must be + * a 32 bit addressable address. + * + * @return 1 on success + * 0 on failure + */ +s64 cvmx_bootmem_phy_mem_list_init(u64 mem_size, u32 low_reserved_bytes, + struct cvmx_bootmem_desc *desc_buffer); + +/** + * This function initializes the free memory list used by cvmx_bootmem. + * This must be called before any allocations can be done. + * + * @param nodemask Nodemask - one bit per node (bit0->node0, bit1->node1,...) + * + * @param mem_size[] Array of memory sizes in MBytes per node ([0]->node0,...) + * + * @param low_reserved_bytes Number of bytes to reserve (leave out of + * free list) at address 0x0. + * + * @param desc_buffer Buffer for the bootmem descriptor. This must be + * a 32 bit addressable address. + * + * @return 1 on success + * 0 on failure + */ +s64 cvmx_bootmem_phy_mem_list_init_multi(u8 nodemask, u32 mem_size[], + u32 low_reserved_bytes, + struct cvmx_bootmem_desc *desc_buffer); +/** + * Locks the bootmem allocator. This is useful in certain situations + * where multiple allocations must be made without being interrupted. + * This should be used with the CVMX_BOOTMEM_FLAG_NO_LOCKING flag. + * + */ +void cvmx_bootmem_lock(void); + +/** + * Unlocks the bootmem allocator. This is useful in certain situations + * where multiple allocations must be made without being interrupted. + * This should be used with the CVMX_BOOTMEM_FLAG_NO_LOCKING flag. + * + */ +void cvmx_bootmem_unlock(void); + +/** + * Internal use function to get the current descriptor pointer + */ +void *__cvmx_bootmem_internal_get_desc_ptr(void); + +/** + * Internal use. This is userd to get a pointer to a physical + * address. For linux n32 the physical address in mmaped to a virtual + * address and the virtual address is returned. For n64 the address + * is converted to an xkphys address and the xkhpys address is + * returned. + */ +void *__cvmx_phys_addr_to_ptr(u64 phys, int size); +const struct cvmx_bootmem_named_block_desc * +__cvmx_bootmem_find_named_block_flags(const char *name, u32 flags); +void *cvmx_bootmem_alloc_named_range_flags(u64 size, u64 min_addr, + u64 max_addr, u64 align, + const char *name, u32 flags); +u64 cvmx_bootmem_phy_alloc_range(u64 size, u64 alignment, + u64 min_addr, u64 max_addr); + +#endif /* __CVMX_BOOTMEM_H__ */ diff --git a/arch/mips/mach-octeon/include/mach/cvmx-coremask.h b/arch/mips/mach-octeon/include/mach/cvmx-coremask.h new file mode 100644 index 0000000000..c34ff46d3a --- /dev/null +++ b/arch/mips/mach-octeon/include/mach/cvmx-coremask.h @@ -0,0 +1,752 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Marvell International Ltd. + */ + +/** + * Module to support operations on bitmap of cores. Coremask can be used to + * select a specific core, a group of cores, or all available cores, for + * initialization and differentiation of roles within a single shared binary + * executable image. + * + * The core numbers used in this file are the same value as what is found in + * the COP0_EBASE register and the rdhwr 0 instruction. + * + * For the CN78XX and other multi-node environments the core numbers are not + * contiguous. The core numbers for the CN78XX are as follows: + * + * Node 0: Cores 0 - 47 + * Node 1: Cores 128 - 175 + * Node 2: Cores 256 - 303 + * Node 3: Cores 384 - 431 + * + * The coremask environment generally tries to be node agnostic in order to + * provide future compatibility if more cores are added to future processors + * or more nodes are supported. + */ + +#ifndef __CVMX_COREMASK_H__ +#define __CVMX_COREMASK_H__ + +#include "cvmx-regs.h" + +/* bits per holder */ +#define CVMX_COREMASK_HLDRSZ ((int)(sizeof(u64) * 8)) + +/** Maximum allowed cores per node */ +#define CVMX_COREMASK_MAX_CORES_PER_NODE (1 << CVMX_NODE_NO_SHIFT) + +/** Maximum number of bits actually used in the coremask */ +#define CVMX_MAX_USED_CORES_BMP (1 << (CVMX_NODE_NO_SHIFT + CVMX_NODE_BITS)) + +/* the number of valid bits in and the mask of the most significant holder */ +#define CVMX_COREMASK_MSHLDR_NBITS \ + (CVMX_MIPS_MAX_CORES % CVMX_COREMASK_HLDRSZ) + +#define CVMX_COREMASK_MSHLDR_MASK \ + ((CVMX_COREMASK_MSHLDR_NBITS) ? \ + (((u64)1 << CVMX_COREMASK_MSHLDR_NBITS) - 1) : \ + ((u64)-1)) + +/* cvmx_coremask size in u64 */ +#define CVMX_COREMASK_BMPSZ \ + ((int)(CVMX_MIPS_MAX_CORES / CVMX_COREMASK_HLDRSZ + \ + (CVMX_COREMASK_MSHLDR_NBITS != 0))) + +#define CVMX_COREMASK_USED_BMPSZ \ + (CVMX_MAX_USED_CORES_BMP / CVMX_COREMASK_HLDRSZ) + +#define CVMX_COREMASK_BMP_NODE_CORE_IDX(node, core) \ + ((((node) << CVMX_NODE_NO_SHIFT) + (core)) / CVMX_COREMASK_HLDRSZ) +/** + * Maximum available coremask. + */ +#define CVMX_COREMASK_MAX \ + { { \ + 0x0000FFFFFFFFFFFF, 0, \ + 0x0000FFFFFFFFFFFF, 0, \ + 0x0000FFFFFFFFFFFF, 0, \ + 0x0000FFFFFFFFFFFF, 0, \ + 0, 0, \ + 0, 0, \ + 0, 0, \ + 0, 0} } + +/** + * Empty coremask + */ +#define CVMX_COREMASK_EMPTY \ + { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} } + +struct cvmx_coremask { + u64 coremask_bitmap[CVMX_COREMASK_BMPSZ]; +}; + +/** + * Macro to iterate through all available cores in a coremask + * + * @param core - core variable to use to iterate + * @param pcm - pointer to core mask + * + * Use this like a for statement + */ +#define cvmx_coremask_for_each_core(core, pcm) \ + for ((core) = -1; \ + (core) = cvmx_coremask_next_core((core), pcm), \ + (core) >= 0;) + +/** + * Given a node and node mask, return the next available node. + * + * @param node starting node number + * @param node_mask node mask to use to find the next node + * + * @return next node number or -1 if no more nodes are available + */ +static inline int cvmx_coremask_next_node(int node, u8 node_mask) +{ + int next_offset; + + next_offset = __builtin_ffs(node_mask >> (node + 1)); + if (next_offset == 0) + return -1; + else + return node + next_offset; +} + +/** + * Iterate through all nodes in a node mask + * + * @param node node iterator variable + * @param node_mask mask to use for iterating + * + * Use this like a for statement + */ +#define cvmx_coremask_for_each_node(node, node_mask) \ + for ((node) = __builtin_ffs(node_mask) - 1; \ + (node) >= 0 && (node) < CVMX_MAX_NODES; \ + (node) = cvmx_coremask_next_node(node, node_mask)) + +/** + * Is ``core'' set in the coremask? + * + * @param pcm is the pointer to the coremask. + * @param core + * @return 1 if core is set and 0 if not. + */ +static inline int cvmx_coremask_is_core_set(const struct cvmx_coremask *pcm, + int core) +{ + int n, i; + + n = core % CVMX_COREMASK_HLDRSZ; + i = core / CVMX_COREMASK_HLDRSZ; + + return (pcm->coremask_bitmap[i] & ((u64)1 << n)) != 0; +} + +/** + * Is ``current core'' set in the coremask? + * + * @param pcm is the pointer to the coremask. + * @return 1 if core is set and 0 if not. + */ +static inline int cvmx_coremask_is_self_set(const struct cvmx_coremask *pcm) +{ + return cvmx_coremask_is_core_set(pcm, (int)cvmx_get_core_num()); +} + +/** + * Is coremask empty? + * @param pcm is the pointer to the coremask. + * @return 1 if *pcm is empty (all zeros), 0 if not empty. + */ +static inline int cvmx_coremask_is_empty(const struct cvmx_coremask *pcm) +{ + int i; + + for (i = 0; i < CVMX_COREMASK_USED_BMPSZ; i++) + if (pcm->coremask_bitmap[i] != 0) + return 0; + + return 1; +} + +/** + * Set ``core'' in the coremask. + * + * @param pcm is the pointer to the coremask. + * @param core + * @return 0. + */ +static inline int cvmx_coremask_set_core(struct cvmx_coremask *pcm, int core) +{ + int n, i; + + n = core % CVMX_COREMASK_HLDRSZ; + i = core / CVMX_COREMASK_HLDRSZ; + pcm->coremask_bitmap[i] |= ((u64)1 << n); + + return 0; +} + +/** + * Set ``current core'' in the coremask. + * + * @param pcm is the pointer to the coremask. + * @return 0. + */ +static inline int cvmx_coremask_set_self(struct cvmx_coremask *pcm) +{ + return cvmx_coremask_set_core(pcm, (int)cvmx_get_core_num()); +} + +/** + * Clear ``core'' from the coremask. + * + * @param pcm is the pointer to the coremask. + * @param core + * @return 0. + */ +static inline int cvmx_coremask_clear_core(struct cvmx_coremask *pcm, int core) +{ + int n, i; + + n = core % CVMX_COREMASK_HLDRSZ; + i = core / CVMX_COREMASK_HLDRSZ; + pcm->coremask_bitmap[i] &= ~((u64)1 << n); + + return 0; +} + +/** + * Clear ``current core'' from the coremask. + * + * @param pcm is the pointer to the coremask. + * @return 0. + */ +static inline int cvmx_coremask_clear_self(struct cvmx_coremask *pcm) +{ + return cvmx_coremask_clear_core(pcm, cvmx_get_core_num()); +} + +/** + * Toggle ``core'' in the coremask. + * + * @param pcm is the pointer to the coremask. + * @param core + * @return 0. + */ +static inline int cvmx_coremask_toggle_core(struct cvmx_coremask *pcm, int core) +{ + int n, i; + + n = core % CVMX_COREMASK_HLDRSZ; + i = core / CVMX_COREMASK_HLDRSZ; + pcm->coremask_bitmap[i] ^= ((u64)1 << n); + + return 0; +} + +/** + * Toggle ``current core'' in the coremask. + * + * @param pcm is the pointer to the coremask. + * @return 0. + */ +static inline int cvmx_coremask_toggle_self(struct cvmx_coremask *pcm) +{ + return cvmx_coremask_toggle_core(pcm, cvmx_get_core_num()); +} + +/** + * Set the lower 64-bit of the coremask. + * @param pcm pointer to coremask + * @param coremask_64 64-bit coremask to apply to the first node (0) + */ +static inline void cvmx_coremask_set64(struct cvmx_coremask *pcm, + u64 coremask_64) +{ + pcm->coremask_bitmap[0] = coremask_64; +} + +/** + * Set the 64-bit of the coremask for a particular node. + * @param pcm pointer to coremask + * @param node node to set + * @param coremask_64 64-bit coremask to apply to the specified node + */ +static inline void cvmx_coremask_set64_node(struct cvmx_coremask *pcm, + u8 node, + u64 coremask_64) +{ + pcm->coremask_bitmap[CVMX_COREMASK_BMP_NODE_CORE_IDX(node, 0)] = + coremask_64; +} + +/** + * Gets the lower 64-bits of the coremask + * + * @param[in] pcm - pointer to coremask + * @return 64-bit coremask for the first node + */ +static inline u64 cvmx_coremask_get64(const struct cvmx_coremask *pcm) +{ + return pcm->coremask_bitmap[0]; +} + +/** + * Gets the lower 64-bits of the coremask for the specified node + * + * @param[in] pcm - pointer to coremask + * @param node - node to get coremask for + * @return 64-bit coremask for the first node + */ +static inline u64 cvmx_coremask_get64_node(const struct cvmx_coremask *pcm, + u8 node) +{ + return pcm->coremask_bitmap[CVMX_COREMASK_BMP_NODE_CORE_IDX(node, 0)]; +} + +/** + * Gets the lower 32-bits of the coremask for compatibility + * + * @param[in] pcm - pointer to coremask + * @return 32-bit coremask for the first node + * @deprecated This function is to maintain compatibility with older + * SDK applications and may disappear at some point. + * This function is not compatible with the CN78XX or any other + * Octeon device with more than 32 cores. + */ +static inline u32 cvmx_coremask_get32(const struct cvmx_coremask *pcm) +{ + return pcm->coremask_bitmap[0] & 0xffffffff; +} + +/* + * cvmx_coremask_cmp() returns an integer less than, equal to, or + * greater than zero if *pcm1 is found, respectively, to be less than, + * to match, or be greater than *pcm2. + */ +static inline int cvmx_coremask_cmp(const struct cvmx_coremask *pcm1, + const struct cvmx_coremask *pcm2) +{ + int i; + + /* Start from highest node for arithemtically correct result */ + for (i = CVMX_COREMASK_USED_BMPSZ - 1; i >= 0; i--) + if (pcm1->coremask_bitmap[i] != pcm2->coremask_bitmap[i]) { + return (pcm1->coremask_bitmap[i] > + pcm2->coremask_bitmap[i]) ? 1 : -1; + } + + return 0; +} + +/* + * cvmx_coremask_OPx(pcm1, pcm2[, pcm3]), where OPx can be + * - and + * - or + * - xor + * - not + * ... + * For binary operators, pcm3 <-- pcm1 OPX pcm2. + * For unaries, pcm2 <-- OPx pcm1. + */ +#define CVMX_COREMASK_BINARY_DEFUN(binary_op, op) \ + static inline int cvmx_coremask_##binary_op( \ + struct cvmx_coremask *pcm1, \ + const struct cvmx_coremask *pcm2, \ + const struct cvmx_coremask *pcm3) \ + { \ + int i; \ + \ + for (i = 0; i < CVMX_COREMASK_USED_BMPSZ; i++) \ + pcm1->coremask_bitmap[i] = \ + pcm2->coremask_bitmap[i] \ + op \ + pcm3->coremask_bitmap[i]; \ + \ + return 0; \ + } + +#define CVMX_COREMASK_UNARY_DEFUN(unary_op, op) \ + static inline int cvmx_coremask_##unary_op( \ + struct cvmx_coremask *pcm1, \ + const struct cvmx_coremask *pcm2) \ + { \ + int i; \ + \ + for (i = 0; i < CVMX_COREMASK_USED_BMPSZ; i++) \ + pcm1->coremask_bitmap[i] = \ + op \ + pcm2->coremask_bitmap[i]; \ + \ + return 0; \ + } + +/* cvmx_coremask_and(pcm1, pcm2, pcm3): pcm1 = pmc2 & pmc3 */ +CVMX_COREMASK_BINARY_DEFUN(and, &) +/* cvmx_coremask_or(pcm1, pcm2, pcm3): pcm1 = pmc2 | pmc3 */ +CVMX_COREMASK_BINARY_DEFUN(or, |) +/* cvmx_coremask_xor(pcm1, pcm2, pcm3): pcm1 = pmc2 ^ pmc3 */ +CVMX_COREMASK_BINARY_DEFUN(xor, ^) +/* cvmx_coremask_maskoff(pcm1, pcm2, pcm3): pcm1 = pmc2 & ~pmc3 */ +CVMX_COREMASK_BINARY_DEFUN(maskoff, & ~) +/* cvmx_coremask_not(pcm1, pcm2): pcm1 = ~pcm2 */ +CVMX_COREMASK_UNARY_DEFUN(not, ~) +/* cvmx_coremask_fill(pcm1, pcm2): pcm1 = -1 */ +CVMX_COREMASK_UNARY_DEFUN(fill, -1 |) +/* cvmx_coremask_clear(pcm1, pcm2): pcm1 = 0 */ +CVMX_COREMASK_UNARY_DEFUN(clear, 0 &) +/* cvmx_coremask_dup(pcm1, pcm2): pcm1 = pcm2 */ +CVMX_COREMASK_UNARY_DEFUN(dup, +) + +/* + * Macros using the unary functions defined w/ + * CVMX_COREMASK_UNARY_DEFUN + * - set *pcm to its complement + * - set all bits in *pcm to 0 + * - set all (valid) bits in *pcm to 1 + */ +#define cvmx_coremask_complement(pcm) cvmx_coremask_not(pcm, pcm) +/* On clear, even clear the unused bits */ +#define cvmx_coremask_clear_all(pcm) \ + *(pcm) = (struct cvmx_coremask)CVMX_COREMASK_EMPTY +#define cvmx_coremask_set_all(pcm) cvmx_coremask_fill(pcm, NULL) + +/* + * convert a string of hex digits to struct cvmx_coremask + * + * @param pcm + * @param hexstr can be + * - "[1-9A-Fa-f][0-9A-Fa-f]*", or + * - "-1" to set the bits for all the cores. + * return + * 0 for success, + * -1 for string too long (i.e., hexstr takes more bits than + * CVMX_MIPS_MAX_CORES), + * -2 for conversion problems from hex string to an unsigned + * long long, e.g., non-hex char in hexstr, and + * -3 for hexstr starting with '0'. + * NOTE: + * This function clears the bitmask in *pcm before the conversion. + */ +int cvmx_coremask_str2bmp(struct cvmx_coremask *pcm, char *hexstr); + +/* + * convert a struct cvmx_coremask to a string of hex digits + * + * @param pcm + * @param hexstr is "[1-9A-Fa-f][0-9A-Fa-f]*" + * + * return 0. + */ +int cvmx_coremask_bmp2str(const struct cvmx_coremask *pcm, char *hexstr); + +/* + * Returns the index of the lowest bit in a coremask holder. + */ +static inline int cvmx_coremask_lowest_bit(u64 h) +{ + return __builtin_ctzll(h); +} + +/* + * Returns the 0-based index of the highest bit in a coremask holder. + */ +static inline int cvmx_coremask_highest_bit(u64 h) +{ + return (64 - __builtin_clzll(h) - 1); +} + +/** + * Returns the last core within the coremask and -1 when the coremask + * is empty. + * + * @param[in] pcm - pointer to coremask + * @returns last core set in the coremask or -1 if all clear + * + */ +static inline int cvmx_coremask_get_last_core(const struct cvmx_coremask *pcm) +{ + int i; + int found = -1; + + for (i = 0; i < CVMX_COREMASK_USED_BMPSZ; i++) { + if (pcm->coremask_bitmap[i]) + found = i; + } + + if (found == -1) + return -1; + + return found * CVMX_COREMASK_HLDRSZ + + cvmx_coremask_highest_bit(pcm->coremask_bitmap[found]); +} + +/** + * Returns the first core within the coremask and -1 when the coremask + * is empty. + * + * @param[in] pcm - pointer to coremask + * @returns first core set in the coremask or -1 if all clear + * + */ +static inline int cvmx_coremask_get_first_core(const struct cvmx_coremask *pcm) +{ + int i; + + for (i = 0; i < CVMX_COREMASK_USED_BMPSZ; i++) + if (pcm->coremask_bitmap[i]) + break; + + if (i == CVMX_COREMASK_USED_BMPSZ) + return -1; + + return i * CVMX_COREMASK_HLDRSZ + + cvmx_coremask_lowest_bit(pcm->coremask_bitmap[i]); +} + +/** + * Given a core and coremask, return the next available core in the coremask + * or -1 if none are available. + * + * @param core - starting core to check (can be -1 for core 0) + * @param pcm - pointer to coremask to check for the next core. + * + * @return next core following the core parameter or -1 if no more cores. + */ +static inline int cvmx_coremask_next_core(int core, + const struct cvmx_coremask *pcm) +{ + int n, i; + + core++; + n = core % CVMX_COREMASK_HLDRSZ; + i = core / CVMX_COREMASK_HLDRSZ; + + if (pcm->coremask_bitmap[i] != 0) { + for (; n < CVMX_COREMASK_HLDRSZ; n++) + if (pcm->coremask_bitmap[i] & (1ULL << n)) + return ((i * CVMX_COREMASK_HLDRSZ) + n); + } + + for (i = i + 1; i < CVMX_COREMASK_USED_BMPSZ; i++) { + if (pcm->coremask_bitmap[i] != 0) + return (i * CVMX_COREMASK_HLDRSZ) + + cvmx_coremask_lowest_bit(pcm->coremask_bitmap[i]); + } + return -1; +} + +/** + * Compute coremask for count cores starting with start_core. + * Note that the coremask for multi-node processors may have + * gaps. + * + * @param[out] pcm pointer to core mask data structure + * @param start_core starting code number + * @param count number of cores + * + */ +static inline void cvmx_coremask_set_cores(struct cvmx_coremask *pcm, + unsigned int start_core, + unsigned int count) +{ + int node; + int core; /** Current core in node */ + int cores_in_node; + int i; + + assert(CVMX_MAX_CORES < CVMX_COREMASK_HLDRSZ); + node = start_core >> CVMX_NODE_NO_SHIFT; + core = start_core & ((1 << CVMX_NODE_NO_SHIFT) - 1); + assert(core < CVMX_MAX_CORES); + + cvmx_coremask_clear_all(pcm); + while (count > 0) { + if (count + core > CVMX_MAX_CORES) + cores_in_node = CVMX_MAX_CORES - core; + else + cores_in_node = count; + + i = CVMX_COREMASK_BMP_NODE_CORE_IDX(node, core); + pcm->coremask_bitmap[i] = ((1ULL << cores_in_node) - 1) << core; + count -= cores_in_node; + core = 0; + node++; + } +} + +/** + * Makes a copy of a coremask + * + * @param[out] dest - pointer to destination coremask + * @param[in] src - pointer to source coremask + */ +static inline void cvmx_coremask_copy(struct cvmx_coremask *dest, + const struct cvmx_coremask *src) +{ + memcpy(dest, src, sizeof(*dest)); +} + +/** + * Test to see if the specified core is first core in coremask. + * + * @param[in] pcm pointer to the coremask to test against + * @param[in] core core to check + * + * @return 1 if the core is first core in the coremask, 0 otherwise + * + */ +static inline int cvmx_coremask_is_core_first_core(const struct cvmx_coremask *pcm, + unsigned int core) +{ + int n, i; + + n = core / CVMX_COREMASK_HLDRSZ; + + for (i = 0; i < n; i++) + if (pcm->coremask_bitmap[i] != 0) + return 0; + + /* From now on we only care about the core number within an entry */ + core &= (CVMX_COREMASK_HLDRSZ - 1); + if (__builtin_ffsll(pcm->coremask_bitmap[n]) < (core + 1)) + return 0; + + return (__builtin_ffsll(pcm->coremask_bitmap[n]) == core + 1); +} + +/* + * NOTE: + * cvmx_coremask_is_first_core() was retired due to improper usage. + * For inquiring about the current core being the initializing + * core for an application, use cvmx_is_init_core(). + * For simply inquring if the current core is numerically + * lowest in a given mask, use : + * cvmx_coremask_is_core_first_core( pcm, dvmx_get_core_num()) + */ + +/** + * Returns the number of 1 bits set in a coremask + * + * @param[in] pcm - pointer to core mask + * + * @return number of bits set in the coremask + */ +static inline int cvmx_coremask_get_core_count(const struct cvmx_coremask *pcm) +{ + int i; + int count = 0; + + for (i = 0; i < CVMX_COREMASK_USED_BMPSZ; i++) + count += __builtin_popcountll(pcm->coremask_bitmap[i]); + + return count; +} + +/** + * For multi-node systems, return the node a core belongs to. + * + * @param core - core number (0-1023) + * + * @return node number core belongs to + */ +static inline int cvmx_coremask_core_to_node(int core) +{ + return (core >> CVMX_NODE_NO_SHIFT) & CVMX_NODE_MASK; +} + +/** + * Given a core number on a multi-node system, return the core number for a + * particular node. + * + * @param core - global core number + * + * @returns core number local to the node. + */ +static inline int cvmx_coremask_core_on_node(int core) +{ + return (core & ((1 << CVMX_NODE_NO_SHIFT) - 1)); +} + +/** + * Returns if one coremask is a subset of another coremask + * + * @param main - main coremask to test + * @param subset - subset coremask to test + * + * @return 0 if the subset contains cores not in the main coremask or 1 if + * the subset is fully contained in the main coremask. + */ +static inline int cvmx_coremask_is_subset(const struct cvmx_coremask *main, + const struct cvmx_coremask *subset) +{ + int i; + + for (i = 0; i < CVMX_COREMASK_USED_BMPSZ; i++) + if ((main->coremask_bitmap[i] & subset->coremask_bitmap[i]) != + subset->coremask_bitmap[i]) + return 0; + return 1; +} + +/** + * Returns if one coremask intersects another coremask + * + * @param c1 - main coremask to test + * @param c2 - subset coremask to test + * + * @return 1 if coremask c1 intersects coremask c2, 0 if they are exclusive + */ +static inline int cvmx_coremask_intersects(const struct cvmx_coremask *c1, + const struct cvmx_coremask *c2) +{ + int i; + + for (i = 0; i < CVMX_COREMASK_USED_BMPSZ; i++) + if ((c1->coremask_bitmap[i] & c2->coremask_bitmap[i]) != 0) + return 1; + return 0; +} + +/** + * Masks a single node of a coremask + * + * @param pcm - coremask to mask [inout] + * @param node - node number to mask against + */ +static inline void cvmx_coremask_mask_node(struct cvmx_coremask *pcm, int node) +{ + int i; + + for (i = 0; i < CVMX_COREMASK_BMP_NODE_CORE_IDX(node, 0); i++) + pcm->coremask_bitmap[i] = 0; + + for (i = CVMX_COREMASK_BMP_NODE_CORE_IDX(node + 1, 0); + i < CVMX_COREMASK_USED_BMPSZ; i++) + pcm->coremask_bitmap[i] = 0; +} + +/** + * Prints out a coremask in the form of node X: 0x... 0x... + * + * @param[in] pcm - pointer to core mask + * + * @return nothing + */ +void cvmx_coremask_print(const struct cvmx_coremask *pcm); + +static inline void cvmx_coremask_dprint(const struct cvmx_coremask *pcm) +{ + if (IS_ENABLED(DEBUG)) + cvmx_coremask_print(pcm); +} + +struct cvmx_coremask *octeon_get_available_coremask(struct cvmx_coremask *pcm); + +int validate_coremask(struct cvmx_coremask *pcm); + +#endif /* __CVMX_COREMASK_H__ */ diff --git a/arch/mips/mach-octeon/include/mach/cvmx-fuse.h b/arch/mips/mach-octeon/include/mach/cvmx-fuse.h new file mode 100644 index 0000000000..a06a1326cb --- /dev/null +++ b/arch/mips/mach-octeon/include/mach/cvmx-fuse.h @@ -0,0 +1,71 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Marvell International Ltd. + */ + +#ifndef __CVMX_FUSE_H__ +#define __CVMX_FUSE_H__ + +/** + * Read a byte of fuse data + * @param node node to read from + * @param byte_addr address to read + * + * @return fuse value: 0 or 1 + */ +static inline u8 cvmx_fuse_read_byte_node(u8 node, int byte_addr) +{ + u64 val; + + val = FIELD_PREP(MIO_FUS_RCMD_ADDR, byte_addr) | MIO_FUS_RCMD_PEND; + csr_wr_node(node, CVMX_MIO_FUS_RCMD, val); + + do { + val = csr_rd_node(node, CVMX_MIO_FUS_RCMD); + } while (val & MIO_FUS_RCMD_PEND); + + return FIELD_GET(MIO_FUS_RCMD_DAT, val); +} + +/** + * Read a byte of fuse data + * @param byte_addr address to read + * + * @return fuse value: 0 or 1 + */ +static inline u8 cvmx_fuse_read_byte(int byte_addr) +{ + return cvmx_fuse_read_byte_node(0, byte_addr); +} + +/** + * Read a single fuse bit + * + * @param node Node number + * @param fuse Fuse number (0-1024) + * + * @return fuse value: 0 or 1 + */ +static inline int cvmx_fuse_read_node(u8 node, int fuse) +{ + return (cvmx_fuse_read_byte_node(node, fuse >> 3) >> (fuse & 0x7)) & 1; +} + +/** + * Read a single fuse bit + * + * @param fuse Fuse number (0-1024) + * + * @return fuse value: 0 or 1 + */ +static inline int cvmx_fuse_read(int fuse) +{ + return cvmx_fuse_read_node(0, fuse); +} + +static inline int cvmx_octeon_fuse_locked(void) +{ + return cvmx_fuse_read(123); +} + +#endif /* __CVMX_FUSE_H__ */ diff --git a/arch/mips/mach-octeon/include/mach/cvmx-regs.h b/arch/mips/mach-octeon/include/mach/cvmx-regs.h new file mode 100644 index 0000000000..b84fc9fd57 --- /dev/null +++ b/arch/mips/mach-octeon/include/mach/cvmx-regs.h @@ -0,0 +1,144 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright (C) 2020 Stefan Roese + */ + +#ifndef __CVMX_REGS_H__ +#define __CVMX_REGS_H__ + +#include +#include +#include + +/* General defines */ +#define CVMX_MAX_CORES 48 +/* Maximum # of bits to define core in node */ +#define CVMX_NODE_NO_SHIFT 7 +#define CVMX_NODE_BITS 2 /* Number of bits to define a node */ +#define CVMX_MAX_NODES (1 << CVMX_NODE_BITS) +#define CVMX_NODE_MASK (CVMX_MAX_NODES - 1) +#define CVMX_NODE_IO_SHIFT 36 +#define CVMX_NODE_MEM_SHIFT 40 +#define CVMX_NODE_IO_MASK ((u64)CVMX_NODE_MASK << CVMX_NODE_IO_SHIFT) + +#define CVMX_MIPS_MAX_CORE_BITS 10 /* Maximum # of bits to define cores */ +#define CVMX_MIPS_MAX_CORES (1 << CVMX_MIPS_MAX_CORE_BITS) + +#define MAX_CORE_TADS 8 + +#define CAST_ULL(v) ((unsigned long long)(v)) +#define CASTPTR(type, v) ((type *)(long)(v)) + +/* Regs */ +#define CVMX_CIU_PP_RST 0x0001010000000100ULL +#define CVMX_CIU3_NMI 0x0001010000000160ULL +#define CVMX_CIU_FUSE 0x00010100000001a0ULL +#define CVMX_CIU_NMI 0x0001070000000718ULL + +#define CVMX_MIO_BOOT_LOC_CFGX(x) (0x0001180000000080ULL + ((x) & 1) * 8) +#define MIO_BOOT_LOC_CFG_BASE GENMASK_ULL(27, 3) +#define MIO_BOOT_LOC_CFG_EN BIT_ULL(31) + +#define CVMX_MIO_BOOT_LOC_ADR 0x0001180000000090ULL +#define MIO_BOOT_LOC_ADR_ADR GENMASK_ULL(7, 3) + +#define CVMX_MIO_BOOT_LOC_DAT 0x0001180000000098ULL + +#define CVMX_MIO_FUS_DAT2 0x0001180000001410ULL +#define MIO_FUS_DAT2_NOCRYPTO BIT_ULL(26) +#define MIO_FUS_DAT2_NOMUL BIT_ULL(27) +#define MIO_FUS_DAT2_DORM_CRYPTO BIT_ULL(34) + +#define CVMX_MIO_FUS_RCMD 0x0001180000001500ULL +#define MIO_FUS_RCMD_ADDR GENMASK_ULL(7, 0) +#define MIO_FUS_RCMD_PEND BIT_ULL(12) +#define MIO_FUS_RCMD_DAT GENMASK_ULL(23, 16) + +#define CVMX_RNM_CTL_STATUS 0x0001180040000000ULL +#define RNM_CTL_STATUS_EER_VAL BIT_ULL(9) + +/* turn the variable name into a string */ +#define CVMX_TMP_STR(x) CVMX_TMP_STR2(x) +#define CVMX_TMP_STR2(x) #x + +#define CVMX_RDHWRNV(result, regstr) \ + asm volatile ("rdhwr %[rt],$" CVMX_TMP_STR(regstr) : [rt] "=d" (result)) + +#define CVMX_SYNCW \ + asm volatile ("syncw\nsyncw\n" : : : "memory") + +/* ToDo: Currently only node = 0 supported */ +static inline u64 csr_rd_node(int node, u64 addr) +{ + void __iomem *base; + + base = ioremap_nocache(addr, 0x100); + return ioread64(base); +} + +static inline u64 csr_rd(u64 addr) +{ + return csr_rd_node(0, addr); +} + +static inline void csr_wr_node(int node, u64 addr, u64 val) +{ + void __iomem *base; + + base = ioremap_nocache(addr, 0x100); + iowrite64(val, base); +} + +static inline void csr_wr(u64 addr, u64 val) +{ + csr_wr_node(0, addr, val); +} + +/* + * We need to use the volatile access here, otherwise the IO accessor + * functions might swap the bytes + */ +static inline u64 cvmx_read64_uint64(u64 addr) +{ + return *(volatile u64 *)addr; +} + +static inline void cvmx_write64_uint64(u64 addr, u64 val) +{ + *(volatile u64 *)addr = val; +} + +static inline u32 cvmx_read64_uint32(u64 addr) +{ + return *(volatile u32 *)addr; +} + +static inline void cvmx_write64_uint32(u64 addr, u32 val) +{ + *(volatile u32 *)addr = val; +} + +static inline void *cvmx_phys_to_ptr(u64 addr) +{ + return (void *)CKSEG0ADDR(addr); +} + +static inline u64 cvmx_ptr_to_phys(void *ptr) +{ + return virt_to_phys(ptr); +} + +/** + * Number of the Core on which the program is currently running. + * + * @return core number + */ +static inline unsigned int cvmx_get_core_num(void) +{ + unsigned int core_num; + + CVMX_RDHWRNV(core_num, 0); + return core_num; +} + +#endif /* __CVMX_REGS_H__ */ diff --git a/arch/mips/mach-octeon/include/mach/cvmx/cvmx-lmcx-defs.h b/arch/mips/mach-octeon/include/mach/cvmx/cvmx-lmcx-defs.h new file mode 100644 index 0000000000..3b4cba9241 --- /dev/null +++ b/arch/mips/mach-octeon/include/mach/cvmx/cvmx-lmcx-defs.h @@ -0,0 +1,4574 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Marvell International Ltd. + */ + +#ifndef __CVMX_LMCX_DEFS_H__ +#define __CVMX_LMCX_DEFS_H__ + +#define CVMX_LMCX_BANK_CONFLICT1(offs) \ + ((0x000360ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_BANK_CONFLICT2(offs) \ + ((0x000368ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_BIST_RESULT(offs) \ + ((0x0000F8ull) + ((offs) & 1) * 0x60000000ull) +#define CVMX_LMCX_CHAR_CTL(offs) \ + ((0x000220ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_CHAR_DQ_ERR_COUNT(offs) \ + ((0x000040ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_CHAR_MASK0(offs) \ + ((0x000228ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_CHAR_MASK1(offs) \ + ((0x000230ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_CHAR_MASK2(offs) \ + ((0x000238ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_CHAR_MASK3(offs) \ + ((0x000240ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_CHAR_MASK4(offs) \ + ((0x000318ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_COMP_CTL(offs) \ + ((0x000028ull) + ((offs) & 1) * 0x60000000ull) +#define CVMX_LMCX_COMP_CTL2(offs) \ + ((0x0001B8ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_CONFIG(offs) \ + ((0x000188ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_CONTROL(offs) \ + ((0x000190ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_CTL(offs) \ + ((0x000010ull) + ((offs) & 1) * 0x60000000ull) +#define CVMX_LMCX_CTL1(offs) \ + ((0x000090ull) + ((offs) & 1) * 0x60000000ull) +#define CVMX_LMCX_DBTRAIN_CTL(offs) \ + ((0x0003F8ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_DCLK_CNT(offs) \ + ((0x0001E0ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_DCLK_CNT_HI(offs) \ + ((0x000070ull) + ((offs) & 1) * 0x60000000ull) +#define CVMX_LMCX_DCLK_CNT_LO(offs) \ + ((0x000068ull) + ((offs) & 1) * 0x60000000ull) +#define CVMX_LMCX_DCLK_CTL(offs) \ + ((0x0000B8ull) + ((offs) & 1) * 0x60000000ull) +#define CVMX_LMCX_DDR2_CTL(offs) \ + ((0x000018ull) + ((offs) & 1) * 0x60000000ull) +#define CVMX_LMCX_DDR4_DIMM_CTL(offs) \ + ((0x0003F0ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_DDR_PLL_CTL(offs) \ + ((0x000258ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_DELAY_CFG(offs) \ + ((0x000088ull) + ((offs) & 1) * 0x60000000ull) +#define CVMX_LMCX_DIMMX_DDR4_PARAMS0(offs, id) \ + ((0x0000D0ull) + (((offs) & 1) + ((id) & 3) * 0x200000ull) * 8) +#define CVMX_LMCX_DIMMX_DDR4_PARAMS1(offs, id) \ + ((0x000140ull) + (((offs) & 1) + ((id) & 3) * 0x200000ull) * 8) +#define CVMX_LMCX_DIMMX_PARAMS(offs, id) \ + ((0x000270ull) + (((offs) & 1) + ((id) & 3) * 0x200000ull) * 8) +#define CVMX_LMCX_DIMM_CTL(offs) \ + ((0x000310ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_DLL_CTL(offs) \ + ((0x0000C0ull) + ((offs) & 1) * 0x60000000ull) +#define CVMX_LMCX_DLL_CTL2(offs) \ + ((0x0001C8ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_DLL_CTL3(offs) \ + ((0x000218ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_ECC_PARITY_TEST(offs) \ + ((0x000108ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_EXT_CONFIG(offs) \ + ((0x000030ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_EXT_CONFIG2(offs) \ + ((0x000090ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_GENERAL_PURPOSE0(offs) \ + ((0x000340ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_GENERAL_PURPOSE1(offs) \ + ((0x000348ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_GENERAL_PURPOSE2(offs) \ + ((0x000350ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_IFB_CNT(offs) \ + ((0x0001D0ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_IFB_CNT_HI(offs) \ + ((0x000050ull) + ((offs) & 1) * 0x60000000ull) +#define CVMX_LMCX_IFB_CNT_LO(offs) \ + ((0x000048ull) + ((offs) & 1) * 0x60000000ull) +#define CVMX_LMCX_INT(offs) \ + ((0x0001F0ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_INT_EN(offs) \ + ((0x0001E8ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_LANEX_CRC_SWIZ(x, id) \ + ((0x000380ull) + (((offs) & 15) + ((id) & 3) * 0x200000ull) * 8) +#define CVMX_LMCX_MEM_CFG0(offs) \ + ((0x000000ull) + ((offs) & 1) * 0x60000000ull) +#define CVMX_LMCX_MEM_CFG1(offs) \ + ((0x000008ull) + ((offs) & 1) * 0x60000000ull) +#define CVMX_LMCX_MODEREG_PARAMS0(offs) \ + ((0x0001A8ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_MODEREG_PARAMS1(offs) \ + ((0x000260ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_MODEREG_PARAMS2(offs) \ + ((0x000050ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_MODEREG_PARAMS3(offs) \ + ((0x000058ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_MPR_DATA0(offs) \ + ((0x000070ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_MPR_DATA1(offs) \ + ((0x000078ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_MPR_DATA2(offs) \ + ((0x000080ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_MR_MPR_CTL(offs) \ + ((0x000068ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_NS_CTL(offs) \ + ((0x000178ull) + ((offs) & 3) * 0x1000000ull) + +static inline uint64_t CVMX_LMCX_NXM(unsigned long offs) +{ + switch (cvmx_get_octeon_family()) { + case OCTEON_CNF71XX & OCTEON_FAMILY_MASK: + case OCTEON_CN61XX & OCTEON_FAMILY_MASK: + case OCTEON_CN70XX & OCTEON_FAMILY_MASK: + case OCTEON_CN66XX & OCTEON_FAMILY_MASK: + case OCTEON_CN63XX & OCTEON_FAMILY_MASK: + return (0x0000C8ull) + (offs) * 0x60000000ull; + case OCTEON_CNF75XX & OCTEON_FAMILY_MASK: + case OCTEON_CN73XX & OCTEON_FAMILY_MASK: + return (0x0000C8ull) + (offs) * 0x1000000ull; + case OCTEON_CN78XX & OCTEON_FAMILY_MASK: + if (OCTEON_IS_MODEL(OCTEON_CN78XX_PASS1_X)) + return (0x0000C8ull) + (offs) * 0x1000000ull; + if (OCTEON_IS_MODEL(OCTEON_CN78XX)) + return (0x0000C8ull) + (offs) * 0x1000000ull; + case OCTEON_CN68XX & OCTEON_FAMILY_MASK: + return (0x0000C8ull) + (offs) * 0x1000000ull; + } + return (0x0000C8ull) + (offs) * 0x1000000ull; +} + +#define CVMX_LMCX_NXM_FADR(offs) \ + ((0x000028ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_OPS_CNT(offs) \ + ((0x0001D8ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_OPS_CNT_HI(offs) \ + ((0x000060ull) + ((offs) & 1) * 0x60000000ull) +#define CVMX_LMCX_OPS_CNT_LO(offs) \ + ((0x000058ull) + ((offs) & 1) * 0x60000000ull) +#define CVMX_LMCX_PHY_CTL(offs) \ + ((0x000210ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_PHY_CTL2(offs) \ + ((0x000250ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_PLL_BWCTL(offs) \ + ((0x000040ull)) +#define CVMX_LMCX_PLL_CTL(offs) \ + ((0x0000A8ull) + ((offs) & 1) * 0x60000000ull) +#define CVMX_LMCX_PLL_STATUS(offs) \ + ((0x0000B0ull) + ((offs) & 1) * 0x60000000ull) +#define CVMX_LMCX_PPR_CTL(offs) \ + ((0x0003E0ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_READ_LEVEL_CTL(offs) \ + ((0x000140ull) + ((offs) & 1) * 0x60000000ull) +#define CVMX_LMCX_READ_LEVEL_DBG(offs) \ + ((0x000148ull) + ((offs) & 1) * 0x60000000ull) +#define CVMX_LMCX_READ_LEVEL_RANKX(offs, id) \ + ((0x000100ull) + (((offs) & 3) + ((id) & 1) * 0xC000000ull) * 8) +#define CVMX_LMCX_REF_STATUS(offs) \ + ((0x0000A0ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_RESET_CTL(offs) \ + ((0x000180ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_RETRY_CONFIG(offs) \ + ((0x000110ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_RETRY_STATUS(offs) \ + ((0x000118ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_RLEVEL_CTL(offs) \ + ((0x0002A0ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_RLEVEL_DBG(offs) \ + ((0x0002A8ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_RLEVEL_RANKX(offs, id) \ + ((0x000280ull) + (((offs) & 3) + ((id) & 3) * 0x200000ull) * 8) +#define CVMX_LMCX_RODT_COMP_CTL(offs) \ + ((0x0000A0ull) + ((offs) & 1) * 0x60000000ull) +#define CVMX_LMCX_RODT_CTL(offs) \ + ((0x000078ull) + ((offs) & 1) * 0x60000000ull) +#define CVMX_LMCX_RODT_MASK(offs) \ + ((0x000268ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_SCRAMBLED_FADR(offs) \ + ((0x000330ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_SCRAMBLE_CFG0(offs) \ + ((0x000320ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_SCRAMBLE_CFG1(offs) \ + ((0x000328ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_SCRAMBLE_CFG2(offs) \ + ((0x000338ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_SEQ_CTL(offs) \ + ((0x000048ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_SLOT_CTL0(offs) \ + ((0x0001F8ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_SLOT_CTL1(offs) \ + ((0x000200ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_SLOT_CTL2(offs) \ + ((0x000208ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_SLOT_CTL3(offs) \ + ((0x000248ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_TIMING_PARAMS0(offs) \ + ((0x000198ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_TIMING_PARAMS1(offs) \ + ((0x0001A0ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_TIMING_PARAMS2(offs) \ + ((0x000060ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_TRO_CTL(offs) \ + ((0x000248ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_TRO_STAT(offs) \ + ((0x000250ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_WLEVEL_CTL(offs) \ + ((0x000300ull) + ((offs) & 3) * 0x1000000ull) +#define CVMX_LMCX_WLEVEL_DBG(offs) \ + ((0x000308ull) + ((offs) & 3) * 0x1000000ull) + +static inline uint64_t CVMX_LMCX_WLEVEL_RANKX(unsigned long offs, + unsigned long id) +{ + switch (cvmx_get_octeon_family()) { + case OCTEON_CN70XX & OCTEON_FAMILY_MASK: + return (0x0002C0ull) + ((offs) + (id) * 0x200000ull) * 8; + case OCTEON_CNF75XX & OCTEON_FAMILY_MASK: + case OCTEON_CN73XX & OCTEON_FAMILY_MASK: + return (0x0002C0ull) + ((offs) + (id) * 0x200000ull) * 8; + case OCTEON_CN78XX & OCTEON_FAMILY_MASK: + if (OCTEON_IS_MODEL(OCTEON_CN78XX_PASS1_X)) + return (0x0002C0ull) + ((offs) + + (id) * 0x200000ull) * 8; + if (OCTEON_IS_MODEL(OCTEON_CN78XX)) + return (0x0002C0ull) + ((offs) + + (id) * 0x200000ull) * 8; + + case OCTEON_CN66XX & OCTEON_FAMILY_MASK: + case OCTEON_CN63XX & OCTEON_FAMILY_MASK: + return (0x0002B0ull) + ((offs) + (id) * 0x0ull) * 8; + case OCTEON_CNF71XX & OCTEON_FAMILY_MASK: + case OCTEON_CN61XX & OCTEON_FAMILY_MASK: + return (0x0002B0ull) + ((offs) + (id) * 0x200000ull) * 8; + case OCTEON_CN68XX & OCTEON_FAMILY_MASK: + return (0x0002B0ull) + ((offs) + (id) * 0x200000ull) * 8; + } + return (0x0002C0ull) + ((offs) + (id) * 0x200000ull) * 8; +} + +#define CVMX_LMCX_WODT_CTL0(offs) \ + ((0x000030ull) + ((offs) & 1) * 0x60000000ull) +#define CVMX_LMCX_WODT_CTL1(offs) \ + ((0x000080ull) + ((offs) & 1) * 0x60000000ull) +#define CVMX_LMCX_WODT_MASK(offs) \ + ((0x0001B0ull) + ((offs) & 3) * 0x1000000ull) + +/** + * cvmx_lmc#_char_ctl + * + * This register provides an assortment of various control fields needed + * to characterize the DDR3 interface. + */ +union cvmx_lmcx_char_ctl { + u64 u64; + struct cvmx_lmcx_char_ctl_s { + uint64_t reserved_54_63:10; + uint64_t dq_char_byte_check:1; + uint64_t dq_char_check_lock:1; + uint64_t dq_char_check_enable:1; + uint64_t dq_char_bit_sel:3; + uint64_t dq_char_byte_sel:4; + uint64_t dr:1; + uint64_t skew_on:1; + uint64_t en:1; + uint64_t sel:1; + uint64_t prog:8; + uint64_t prbs:32; + } s; + struct cvmx_lmcx_char_ctl_cn61xx { + uint64_t reserved_44_63:20; + uint64_t dr:1; + uint64_t skew_on:1; + uint64_t en:1; + uint64_t sel:1; + uint64_t prog:8; + uint64_t prbs:32; + } cn61xx; + struct cvmx_lmcx_char_ctl_cn63xx { + uint64_t reserved_42_63:22; + uint64_t en:1; + uint64_t sel:1; + uint64_t prog:8; + uint64_t prbs:32; + } cn63xx; + struct cvmx_lmcx_char_ctl_cn63xx cn63xxp1; + struct cvmx_lmcx_char_ctl_cn61xx cn66xx; + struct cvmx_lmcx_char_ctl_cn61xx cn68xx; + struct cvmx_lmcx_char_ctl_cn63xx cn68xxp1; + struct cvmx_lmcx_char_ctl_cn70xx { + uint64_t reserved_53_63:11; + uint64_t dq_char_check_lock:1; + uint64_t dq_char_check_enable:1; + uint64_t dq_char_bit_sel:3; + uint64_t dq_char_byte_sel:4; + uint64_t dr:1; + uint64_t skew_on:1; + uint64_t en:1; + uint64_t sel:1; + uint64_t prog:8; + uint64_t prbs:32; + } cn70xx; + struct cvmx_lmcx_char_ctl_cn70xx cn70xxp1; + struct cvmx_lmcx_char_ctl_s cn73xx; + struct cvmx_lmcx_char_ctl_s cn78xx; + struct cvmx_lmcx_char_ctl_s cn78xxp1; + struct cvmx_lmcx_char_ctl_cn61xx cnf71xx; + struct cvmx_lmcx_char_ctl_s cnf75xx; +}; + +/** + * cvmx_lmc#_comp_ctl2 + * + * LMC_COMP_CTL2 = LMC Compensation control + * + */ +union cvmx_lmcx_comp_ctl2 { + u64 u64; + struct cvmx_lmcx_comp_ctl2_s { + uint64_t reserved_51_63:13; + uint64_t rclk_char_mode:1; + uint64_t reserved_40_49:10; + uint64_t ptune_offset:4; + uint64_t reserved_12_35:24; + uint64_t cmd_ctl:4; + uint64_t ck_ctl:4; + uint64_t dqx_ctl:4; + } s; + struct cvmx_lmcx_comp_ctl2_cn61xx { + uint64_t reserved_34_63:30; + uint64_t ddr__ptune:4; + uint64_t ddr__ntune:4; + uint64_t m180:1; + uint64_t byp:1; + uint64_t ptune:4; + uint64_t ntune:4; + uint64_t rodt_ctl:4; + uint64_t cmd_ctl:4; + uint64_t ck_ctl:4; + uint64_t dqx_ctl:4; + } cn61xx; + struct cvmx_lmcx_comp_ctl2_cn61xx cn63xx; + struct cvmx_lmcx_comp_ctl2_cn61xx cn63xxp1; + struct cvmx_lmcx_comp_ctl2_cn61xx cn66xx; + struct cvmx_lmcx_comp_ctl2_cn61xx cn68xx; + struct cvmx_lmcx_comp_ctl2_cn61xx cn68xxp1; + struct cvmx_lmcx_comp_ctl2_cn70xx { + uint64_t reserved_51_63:13; + uint64_t rclk_char_mode:1; + uint64_t ddr__ptune:5; + uint64_t ddr__ntune:5; + uint64_t ptune_offset:4; + uint64_t ntune_offset:4; + uint64_t m180:1; + uint64_t byp:1; + uint64_t ptune:5; + uint64_t ntune:5; + uint64_t rodt_ctl:4; + uint64_t control_ctl:4; + uint64_t cmd_ctl:4; + uint64_t ck_ctl:4; + uint64_t dqx_ctl:4; + } cn70xx; + struct cvmx_lmcx_comp_ctl2_cn70xx cn70xxp1; + struct cvmx_lmcx_comp_ctl2_cn70xx cn73xx; + struct cvmx_lmcx_comp_ctl2_cn70xx cn78xx; + struct cvmx_lmcx_comp_ctl2_cn70xx cn78xxp1; + struct cvmx_lmcx_comp_ctl2_cn61xx cnf71xx; + struct cvmx_lmcx_comp_ctl2_cn70xx cnf75xx; +}; + +/** + * cvmx_lmc#_config + * + * This register controls certain parameters required for memory configuration. + * Note the following: + * * Priority order for hardware write operations to + * LMC()_CONFIG/LMC()_FADR/LMC()_ECC_SYND: DED error > SEC error. + * * The self-refresh entry sequence(s) power the DLL up/down (depending on + * LMC()_MODEREG_PARAMS0[DLL]) when LMC()_CONFIG[SREF_WITH_DLL] is set. + * * Prior to the self-refresh exit sequence, LMC()_MODEREG_PARAMS0 should + * be reprogrammed + * (if needed) to the appropriate values. + * + * See LMC initialization sequence for the LMC bringup sequence. + */ +union cvmx_lmcx_config { + u64 u64; + struct cvmx_lmcx_config_s { + uint64_t lrdimm_ena:1; + uint64_t bg2_enable:1; + uint64_t mode_x4dev:1; + uint64_t mode32b:1; + uint64_t scrz:1; + uint64_t early_unload_d1_r1:1; + uint64_t early_unload_d1_r0:1; + uint64_t early_unload_d0_r1:1; + uint64_t early_unload_d0_r0:1; + uint64_t init_status:4; + uint64_t mirrmask:4; + uint64_t rankmask:4; + uint64_t rank_ena:1; + uint64_t sref_with_dll:1; + uint64_t early_dqx:1; + uint64_t reserved_18_39:22; + uint64_t reset:1; + uint64_t ecc_adr:1; + uint64_t forcewrite:4; + uint64_t idlepower:3; + uint64_t pbank_lsb:4; + uint64_t row_lsb:3; + uint64_t ecc_ena:1; + uint64_t init_start:1; + } s; + struct cvmx_lmcx_config_cn61xx { + uint64_t reserved_61_63:3; + uint64_t mode32b:1; + uint64_t scrz:1; + uint64_t early_unload_d1_r1:1; + uint64_t early_unload_d1_r0:1; + uint64_t early_unload_d0_r1:1; + uint64_t early_unload_d0_r0:1; + uint64_t init_status:4; + uint64_t mirrmask:4; + uint64_t rankmask:4; + uint64_t rank_ena:1; + uint64_t sref_with_dll:1; + uint64_t early_dqx:1; + uint64_t sequence:3; + uint64_t ref_zqcs_int:19; + uint64_t reset:1; + uint64_t ecc_adr:1; + uint64_t forcewrite:4; + uint64_t idlepower:3; + uint64_t pbank_lsb:4; + uint64_t row_lsb:3; + uint64_t ecc_ena:1; + uint64_t init_start:1; + } cn61xx; + struct cvmx_lmcx_config_cn63xx { + uint64_t reserved_59_63:5; + uint64_t early_unload_d1_r1:1; + uint64_t early_unload_d1_r0:1; + uint64_t early_unload_d0_r1:1; + uint64_t early_unload_d0_r0:1; + uint64_t init_status:4; + uint64_t mirrmask:4; + uint64_t rankmask:4; + uint64_t rank_ena:1; + uint64_t sref_with_dll:1; + uint64_t early_dqx:1; + uint64_t sequence:3; + uint64_t ref_zqcs_int:19; + uint64_t reset:1; + uint64_t ecc_adr:1; + uint64_t forcewrite:4; + uint64_t idlepower:3; + uint64_t pbank_lsb:4; + uint64_t row_lsb:3; + uint64_t ecc_ena:1; + uint64_t init_start:1; + } cn63xx; + struct cvmx_lmcx_config_cn63xxp1 { + uint64_t reserved_55_63:9; + uint64_t init_status:4; + uint64_t mirrmask:4; + uint64_t rankmask:4; + uint64_t rank_ena:1; + uint64_t sref_with_dll:1; + uint64_t early_dqx:1; + uint64_t sequence:3; + uint64_t ref_zqcs_int:19; + uint64_t reset:1; + uint64_t ecc_adr:1; + uint64_t forcewrite:4; + uint64_t idlepower:3; + uint64_t pbank_lsb:4; + uint64_t row_lsb:3; + uint64_t ecc_ena:1; + uint64_t init_start:1; + } cn63xxp1; + struct cvmx_lmcx_config_cn66xx { + uint64_t reserved_60_63:4; + uint64_t scrz:1; + uint64_t early_unload_d1_r1:1; + uint64_t early_unload_d1_r0:1; + uint64_t early_unload_d0_r1:1; + uint64_t early_unload_d0_r0:1; + uint64_t init_status:4; + uint64_t mirrmask:4; + uint64_t rankmask:4; + uint64_t rank_ena:1; + uint64_t sref_with_dll:1; + uint64_t early_dqx:1; + uint64_t sequence:3; + uint64_t ref_zqcs_int:19; + uint64_t reset:1; + uint64_t ecc_adr:1; + uint64_t forcewrite:4; + uint64_t idlepower:3; + uint64_t pbank_lsb:4; + uint64_t row_lsb:3; + uint64_t ecc_ena:1; + uint64_t init_start:1; + } cn66xx; + struct cvmx_lmcx_config_cn63xx cn68xx; + struct cvmx_lmcx_config_cn63xx cn68xxp1; + struct cvmx_lmcx_config_cn70xx { + uint64_t reserved_63_63:1; + uint64_t bg2_enable:1; + uint64_t mode_x4dev:1; + uint64_t mode32b:1; + uint64_t scrz:1; + uint64_t early_unload_d1_r1:1; + uint64_t early_unload_d1_r0:1; + uint64_t early_unload_d0_r1:1; + uint64_t early_unload_d0_r0:1; + uint64_t init_status:4; + uint64_t mirrmask:4; + uint64_t rankmask:4; + uint64_t rank_ena:1; + uint64_t sref_with_dll:1; + uint64_t early_dqx:1; + uint64_t ref_zqcs_int:22; + uint64_t reset:1; + uint64_t ecc_adr:1; + uint64_t forcewrite:4; + uint64_t idlepower:3; + uint64_t pbank_lsb:4; + uint64_t row_lsb:3; + uint64_t ecc_ena:1; + uint64_t reserved_0_0:1; + } cn70xx; + struct cvmx_lmcx_config_cn70xx cn70xxp1; + struct cvmx_lmcx_config_cn73xx { + uint64_t lrdimm_ena:1; + uint64_t bg2_enable:1; + uint64_t mode_x4dev:1; + uint64_t mode32b:1; + uint64_t scrz:1; + uint64_t early_unload_d1_r1:1; + uint64_t early_unload_d1_r0:1; + uint64_t early_unload_d0_r1:1; + uint64_t early_unload_d0_r0:1; + uint64_t init_status:4; + uint64_t mirrmask:4; + uint64_t rankmask:4; + uint64_t rank_ena:1; + uint64_t sref_with_dll:1; + uint64_t early_dqx:1; + uint64_t ref_zqcs_int:22; + uint64_t reset:1; + uint64_t ecc_adr:1; + uint64_t forcewrite:4; + uint64_t idlepower:3; + uint64_t pbank_lsb:4; + uint64_t row_lsb:3; + uint64_t ecc_ena:1; + uint64_t reserved_0_0:1; + } cn73xx; + struct cvmx_lmcx_config_cn73xx cn78xx; + struct cvmx_lmcx_config_cn73xx cn78xxp1; + struct cvmx_lmcx_config_cn61xx cnf71xx; + struct cvmx_lmcx_config_cn73xx cnf75xx; +}; + +/** + * cvmx_lmc#_control + * + * LMC_CONTROL = LMC Control + * This register is an assortment of various control fields needed by the + * memory controller + */ +union cvmx_lmcx_control { + u64 u64; + struct cvmx_lmcx_control_s { + uint64_t scramble_ena:1; + uint64_t thrcnt:12; + uint64_t persub:8; + uint64_t thrmax:4; + uint64_t crm_cnt:5; + uint64_t crm_thr:5; + uint64_t crm_max:5; + uint64_t rodt_bprch:1; + uint64_t wodt_bprch:1; + uint64_t bprch:2; + uint64_t ext_zqcs_dis:1; + uint64_t int_zqcs_dis:1; + uint64_t auto_dclkdis:1; + uint64_t xor_bank:1; + uint64_t max_write_batch:4; + uint64_t nxm_write_en:1; + uint64_t elev_prio_dis:1; + uint64_t inorder_wr:1; + uint64_t inorder_rd:1; + uint64_t throttle_wr:1; + uint64_t throttle_rd:1; + uint64_t fprch2:2; + uint64_t pocas:1; + uint64_t ddr2t:1; + uint64_t bwcnt:1; + uint64_t rdimm_ena:1; + } s; + struct cvmx_lmcx_control_s cn61xx; + struct cvmx_lmcx_control_cn63xx { + uint64_t reserved_24_63:40; + uint64_t rodt_bprch:1; + uint64_t wodt_bprch:1; + uint64_t bprch:2; + uint64_t ext_zqcs_dis:1; + uint64_t int_zqcs_dis:1; + uint64_t auto_dclkdis:1; + uint64_t xor_bank:1; + uint64_t max_write_batch:4; + uint64_t nxm_write_en:1; + uint64_t elev_prio_dis:1; + uint64_t inorder_wr:1; + uint64_t inorder_rd:1; + uint64_t throttle_wr:1; + uint64_t throttle_rd:1; + uint64_t fprch2:2; + uint64_t pocas:1; + uint64_t ddr2t:1; + uint64_t bwcnt:1; + uint64_t rdimm_ena:1; + } cn63xx; + struct cvmx_lmcx_control_cn63xx cn63xxp1; + struct cvmx_lmcx_control_cn66xx { + uint64_t scramble_ena:1; + uint64_t reserved_24_62:39; + uint64_t rodt_bprch:1; + uint64_t wodt_bprch:1; + uint64_t bprch:2; + uint64_t ext_zqcs_dis:1; + uint64_t int_zqcs_dis:1; + uint64_t auto_dclkdis:1; + uint64_t xor_bank:1; + uint64_t max_write_batch:4; + uint64_t nxm_write_en:1; + uint64_t elev_prio_dis:1; + uint64_t inorder_wr:1; + uint64_t inorder_rd:1; + uint64_t throttle_wr:1; + uint64_t throttle_rd:1; + uint64_t fprch2:2; + uint64_t pocas:1; + uint64_t ddr2t:1; + uint64_t bwcnt:1; + uint64_t rdimm_ena:1; + } cn66xx; + struct cvmx_lmcx_control_cn68xx { + uint64_t reserved_63_63:1; + uint64_t thrcnt:12; + uint64_t persub:8; + uint64_t thrmax:4; + uint64_t crm_cnt:5; + uint64_t crm_thr:5; + uint64_t crm_max:5; + uint64_t rodt_bprch:1; + uint64_t wodt_bprch:1; + uint64_t bprch:2; + uint64_t ext_zqcs_dis:1; + uint64_t int_zqcs_dis:1; + uint64_t auto_dclkdis:1; + uint64_t xor_bank:1; + uint64_t max_write_batch:4; + uint64_t nxm_write_en:1; + uint64_t elev_prio_dis:1; + uint64_t inorder_wr:1; + uint64_t inorder_rd:1; + uint64_t throttle_wr:1; + uint64_t throttle_rd:1; + uint64_t fprch2:2; + uint64_t pocas:1; + uint64_t ddr2t:1; + uint64_t bwcnt:1; + uint64_t rdimm_ena:1; + } cn68xx; + struct cvmx_lmcx_control_cn68xx cn68xxp1; + struct cvmx_lmcx_control_s cn70xx; + struct cvmx_lmcx_control_s cn70xxp1; + struct cvmx_lmcx_control_s cn73xx; + struct cvmx_lmcx_control_s cn78xx; + struct cvmx_lmcx_control_s cn78xxp1; + struct cvmx_lmcx_control_cn66xx cnf71xx; + struct cvmx_lmcx_control_s cnf75xx; +}; + +/** + * cvmx_lmc#_ctl + * + * LMC_CTL = LMC Control + * This register is an assortment of various control fields needed by the + * memory controller + */ +union cvmx_lmcx_ctl { + u64 u64; + struct cvmx_lmcx_ctl_s { + uint64_t reserved_32_63:32; + uint64_t ddr__nctl:4; + uint64_t ddr__pctl:4; + uint64_t slow_scf:1; + uint64_t xor_bank:1; + uint64_t max_write_batch:4; + uint64_t pll_div2:1; + uint64_t pll_bypass:1; + uint64_t rdimm_ena:1; + uint64_t r2r_slot:1; + uint64_t inorder_mwf:1; + uint64_t inorder_mrf:1; + uint64_t reserved_10_11:2; + uint64_t fprch2:1; + uint64_t bprch:1; + uint64_t sil_lat:2; + uint64_t tskw:2; + uint64_t qs_dic:2; + uint64_t dic:2; + } s; + struct cvmx_lmcx_ctl_cn30xx { + uint64_t reserved_32_63:32; + uint64_t ddr__nctl:4; + uint64_t ddr__pctl:4; + uint64_t slow_scf:1; + uint64_t xor_bank:1; + uint64_t max_write_batch:4; + uint64_t pll_div2:1; + uint64_t pll_bypass:1; + uint64_t rdimm_ena:1; + uint64_t r2r_slot:1; + uint64_t inorder_mwf:1; + uint64_t inorder_mrf:1; + uint64_t dreset:1; + uint64_t mode32b:1; + uint64_t fprch2:1; + uint64_t bprch:1; + uint64_t sil_lat:2; + uint64_t tskw:2; + uint64_t qs_dic:2; + uint64_t dic:2; + } cn30xx; + struct cvmx_lmcx_ctl_cn30xx cn31xx; + struct cvmx_lmcx_ctl_cn38xx { + uint64_t reserved_32_63:32; + uint64_t ddr__nctl:4; + uint64_t ddr__pctl:4; + uint64_t slow_scf:1; + uint64_t xor_bank:1; + uint64_t max_write_batch:4; + uint64_t reserved_16_17:2; + uint64_t rdimm_ena:1; + uint64_t r2r_slot:1; + uint64_t inorder_mwf:1; + uint64_t inorder_mrf:1; + uint64_t set_zero:1; + uint64_t mode128b:1; + uint64_t fprch2:1; + uint64_t bprch:1; + uint64_t sil_lat:2; + uint64_t tskw:2; + uint64_t qs_dic:2; + uint64_t dic:2; + } cn38xx; + struct cvmx_lmcx_ctl_cn38xx cn38xxp2; + struct cvmx_lmcx_ctl_cn50xx { + uint64_t reserved_32_63:32; + uint64_t ddr__nctl:4; + uint64_t ddr__pctl:4; + uint64_t slow_scf:1; + uint64_t xor_bank:1; + uint64_t max_write_batch:4; + uint64_t reserved_17_17:1; + uint64_t pll_bypass:1; + uint64_t rdimm_ena:1; + uint64_t r2r_slot:1; + uint64_t inorder_mwf:1; + uint64_t inorder_mrf:1; + uint64_t dreset:1; + uint64_t mode32b:1; + uint64_t fprch2:1; + uint64_t bprch:1; + uint64_t sil_lat:2; + uint64_t tskw:2; + uint64_t qs_dic:2; + uint64_t dic:2; + } cn50xx; + struct cvmx_lmcx_ctl_cn52xx { + uint64_t reserved_32_63:32; + uint64_t ddr__nctl:4; + uint64_t ddr__pctl:4; + uint64_t slow_scf:1; + uint64_t xor_bank:1; + uint64_t max_write_batch:4; + uint64_t reserved_16_17:2; + uint64_t rdimm_ena:1; + uint64_t r2r_slot:1; + uint64_t inorder_mwf:1; + uint64_t inorder_mrf:1; + uint64_t dreset:1; + uint64_t mode32b:1; + uint64_t fprch2:1; + uint64_t bprch:1; + uint64_t sil_lat:2; + uint64_t tskw:2; + uint64_t qs_dic:2; + uint64_t dic:2; + } cn52xx; + struct cvmx_lmcx_ctl_cn52xx cn52xxp1; + struct cvmx_lmcx_ctl_cn52xx cn56xx; + struct cvmx_lmcx_ctl_cn52xx cn56xxp1; + struct cvmx_lmcx_ctl_cn58xx { + uint64_t reserved_32_63:32; + uint64_t ddr__nctl:4; + uint64_t ddr__pctl:4; + uint64_t slow_scf:1; + uint64_t xor_bank:1; + uint64_t max_write_batch:4; + uint64_t reserved_16_17:2; + uint64_t rdimm_ena:1; + uint64_t r2r_slot:1; + uint64_t inorder_mwf:1; + uint64_t inorder_mrf:1; + uint64_t dreset:1; + uint64_t mode128b:1; + uint64_t fprch2:1; + uint64_t bprch:1; + uint64_t sil_lat:2; + uint64_t tskw:2; + uint64_t qs_dic:2; + uint64_t dic:2; + } cn58xx; + struct cvmx_lmcx_ctl_cn58xx cn58xxp1; +}; + +/** + * cvmx_lmc#_ctl1 + * + * LMC_CTL1 = LMC Control1 + * This register is an assortment of various control fields needed by the + * memory controller + */ +union cvmx_lmcx_ctl1 { + u64 u64; + struct cvmx_lmcx_ctl1_s { + uint64_t reserved_21_63:43; + uint64_t ecc_adr:1; + uint64_t forcewrite:4; + uint64_t idlepower:3; + uint64_t sequence:3; + uint64_t sil_mode:1; + uint64_t dcc_enable:1; + uint64_t reserved_2_7:6; + uint64_t data_layout:2; + } s; + struct cvmx_lmcx_ctl1_cn30xx { + uint64_t reserved_2_63:62; + uint64_t data_layout:2; + } cn30xx; + struct cvmx_lmcx_ctl1_cn50xx { + uint64_t reserved_10_63:54; + uint64_t sil_mode:1; + uint64_t dcc_enable:1; + uint64_t reserved_2_7:6; + uint64_t data_layout:2; + } cn50xx; + struct cvmx_lmcx_ctl1_cn52xx { + uint64_t reserved_21_63:43; + uint64_t ecc_adr:1; + uint64_t forcewrite:4; + uint64_t idlepower:3; + uint64_t sequence:3; + uint64_t sil_mode:1; + uint64_t dcc_enable:1; + uint64_t reserved_0_7:8; + } cn52xx; + struct cvmx_lmcx_ctl1_cn52xx cn52xxp1; + struct cvmx_lmcx_ctl1_cn52xx cn56xx; + struct cvmx_lmcx_ctl1_cn52xx cn56xxp1; + struct cvmx_lmcx_ctl1_cn58xx { + uint64_t reserved_10_63:54; + uint64_t sil_mode:1; + uint64_t dcc_enable:1; + uint64_t reserved_0_7:8; + } cn58xx; + struct cvmx_lmcx_ctl1_cn58xx cn58xxp1; +}; + +/** + * cvmx_lmc#_dbtrain_ctl + * + * Reserved. + * + */ +union cvmx_lmcx_dbtrain_ctl { + u64 u64; + struct cvmx_lmcx_dbtrain_ctl_s { + uint64_t reserved_63_63:1; + uint64_t lfsr_pattern_sel:1; + uint64_t cmd_count_ext:2; + uint64_t db_output_impedance:3; + uint64_t db_sel:1; + uint64_t tccd_sel:1; + uint64_t rw_train:1; + uint64_t read_dq_count:7; + uint64_t read_cmd_count:5; + uint64_t write_ena:1; + uint64_t activate:1; + uint64_t prank:2; + uint64_t lrank:3; + uint64_t row_a:18; + uint64_t bg:2; + uint64_t ba:2; + uint64_t column_a:13; + } s; + struct cvmx_lmcx_dbtrain_ctl_cn73xx { + uint64_t reserved_60_63:4; + uint64_t db_output_impedance:3; + uint64_t db_sel:1; + uint64_t tccd_sel:1; + uint64_t rw_train:1; + uint64_t read_dq_count:7; + uint64_t read_cmd_count:5; + uint64_t write_ena:1; + uint64_t activate:1; + uint64_t prank:2; + uint64_t lrank:3; + uint64_t row_a:18; + uint64_t bg:2; + uint64_t ba:2; + uint64_t column_a:13; + } cn73xx; + struct cvmx_lmcx_dbtrain_ctl_s cn78xx; + struct cvmx_lmcx_dbtrain_ctl_cnf75xx { + uint64_t reserved_62_63:2; + uint64_t cmd_count_ext:2; + uint64_t db_output_impedance:3; + uint64_t db_sel:1; + uint64_t tccd_sel:1; + uint64_t rw_train:1; + uint64_t read_dq_count:7; + uint64_t read_cmd_count:5; + uint64_t write_ena:1; + uint64_t activate:1; + uint64_t prank:2; + uint64_t lrank:3; + uint64_t row_a:18; + uint64_t bg:2; + uint64_t ba:2; + uint64_t column_a:13; + } cnf75xx; +}; + +/** + * cvmx_lmc#_dclk_cnt + * + * LMC_DCLK_CNT = Performance Counters + * + */ +union cvmx_lmcx_dclk_cnt { + u64 u64; + struct cvmx_lmcx_dclk_cnt_s { + uint64_t dclkcnt:64; + } s; + struct cvmx_lmcx_dclk_cnt_s cn61xx; + struct cvmx_lmcx_dclk_cnt_s cn63xx; + struct cvmx_lmcx_dclk_cnt_s cn63xxp1; + struct cvmx_lmcx_dclk_cnt_s cn66xx; + struct cvmx_lmcx_dclk_cnt_s cn68xx; + struct cvmx_lmcx_dclk_cnt_s cn68xxp1; + struct cvmx_lmcx_dclk_cnt_s cn70xx; + struct cvmx_lmcx_dclk_cnt_s cn70xxp1; + struct cvmx_lmcx_dclk_cnt_s cn73xx; + struct cvmx_lmcx_dclk_cnt_s cn78xx; + struct cvmx_lmcx_dclk_cnt_s cn78xxp1; + struct cvmx_lmcx_dclk_cnt_s cnf71xx; + struct cvmx_lmcx_dclk_cnt_s cnf75xx; +}; + +/** + * cvmx_lmc#_dclk_cnt_hi + * + * LMC_DCLK_CNT_HI = Performance Counters + * + */ +union cvmx_lmcx_dclk_cnt_hi { + u64 u64; + struct cvmx_lmcx_dclk_cnt_hi_s { + uint64_t reserved_32_63:32; + uint64_t dclkcnt_hi:32; + } s; + struct cvmx_lmcx_dclk_cnt_hi_s cn30xx; + struct cvmx_lmcx_dclk_cnt_hi_s cn31xx; + struct cvmx_lmcx_dclk_cnt_hi_s cn38xx; + struct cvmx_lmcx_dclk_cnt_hi_s cn38xxp2; + struct cvmx_lmcx_dclk_cnt_hi_s cn50xx; + struct cvmx_lmcx_dclk_cnt_hi_s cn52xx; + struct cvmx_lmcx_dclk_cnt_hi_s cn52xxp1; + struct cvmx_lmcx_dclk_cnt_hi_s cn56xx; + struct cvmx_lmcx_dclk_cnt_hi_s cn56xxp1; + struct cvmx_lmcx_dclk_cnt_hi_s cn58xx; + struct cvmx_lmcx_dclk_cnt_hi_s cn58xxp1; +}; + +/** + * cvmx_lmc#_dclk_cnt_lo + * + * LMC_DCLK_CNT_LO = Performance Counters + * + */ +union cvmx_lmcx_dclk_cnt_lo { + u64 u64; + struct cvmx_lmcx_dclk_cnt_lo_s { + uint64_t reserved_32_63:32; + uint64_t dclkcnt_lo:32; + } s; + struct cvmx_lmcx_dclk_cnt_lo_s cn30xx; + struct cvmx_lmcx_dclk_cnt_lo_s cn31xx; + struct cvmx_lmcx_dclk_cnt_lo_s cn38xx; + struct cvmx_lmcx_dclk_cnt_lo_s cn38xxp2; + struct cvmx_lmcx_dclk_cnt_lo_s cn50xx; + struct cvmx_lmcx_dclk_cnt_lo_s cn52xx; + struct cvmx_lmcx_dclk_cnt_lo_s cn52xxp1; + struct cvmx_lmcx_dclk_cnt_lo_s cn56xx; + struct cvmx_lmcx_dclk_cnt_lo_s cn56xxp1; + struct cvmx_lmcx_dclk_cnt_lo_s cn58xx; + struct cvmx_lmcx_dclk_cnt_lo_s cn58xxp1; +}; + +/** + * cvmx_lmc#_dclk_ctl + * + * LMC_DCLK_CTL = LMC DCLK generation control + * + * + * Notes: + * This CSR is only relevant for LMC1. LMC0_DCLK_CTL is not used. + * + */ +union cvmx_lmcx_dclk_ctl { + u64 u64; + struct cvmx_lmcx_dclk_ctl_s { + uint64_t reserved_8_63:56; + uint64_t off90_ena:1; + uint64_t dclk90_byp:1; + uint64_t dclk90_ld:1; + uint64_t dclk90_vlu:5; + } s; + struct cvmx_lmcx_dclk_ctl_s cn56xx; + struct cvmx_lmcx_dclk_ctl_s cn56xxp1; +}; + +/** + * cvmx_lmc#_ddr2_ctl + * + * LMC_DDR2_CTL = LMC DDR2 & DLL Control Register + * + */ +union cvmx_lmcx_ddr2_ctl { + u64 u64; + struct cvmx_lmcx_ddr2_ctl_s { + uint64_t reserved_32_63:32; + uint64_t bank8:1; + uint64_t burst8:1; + uint64_t addlat:3; + uint64_t pocas:1; + uint64_t bwcnt:1; + uint64_t twr:3; + uint64_t silo_hc:1; + uint64_t ddr_eof:4; + uint64_t tfaw:5; + uint64_t crip_mode:1; + uint64_t ddr2t:1; + uint64_t odt_ena:1; + uint64_t qdll_ena:1; + uint64_t dll90_vlu:5; + uint64_t dll90_byp:1; + uint64_t rdqs:1; + uint64_t ddr2:1; + } s; + struct cvmx_lmcx_ddr2_ctl_cn30xx { + uint64_t reserved_32_63:32; + uint64_t bank8:1; + uint64_t burst8:1; + uint64_t addlat:3; + uint64_t pocas:1; + uint64_t bwcnt:1; + uint64_t twr:3; + uint64_t silo_hc:1; + uint64_t ddr_eof:4; + uint64_t tfaw:5; + uint64_t crip_mode:1; + uint64_t ddr2t:1; + uint64_t odt_ena:1; + uint64_t qdll_ena:1; + uint64_t dll90_vlu:5; + uint64_t dll90_byp:1; + uint64_t reserved_1_1:1; + uint64_t ddr2:1; + } cn30xx; + struct cvmx_lmcx_ddr2_ctl_cn30xx cn31xx; + struct cvmx_lmcx_ddr2_ctl_s cn38xx; + struct cvmx_lmcx_ddr2_ctl_s cn38xxp2; + struct cvmx_lmcx_ddr2_ctl_s cn50xx; + struct cvmx_lmcx_ddr2_ctl_s cn52xx; + struct cvmx_lmcx_ddr2_ctl_s cn52xxp1; + struct cvmx_lmcx_ddr2_ctl_s cn56xx; + struct cvmx_lmcx_ddr2_ctl_s cn56xxp1; + struct cvmx_lmcx_ddr2_ctl_s cn58xx; + struct cvmx_lmcx_ddr2_ctl_s cn58xxp1; +}; + +/** + * cvmx_lmc#_ddr4_dimm_ctl + * + * Bits 0-21 of this register are used only when LMC()_CONTROL[RDIMM_ENA] = 1. + * + * During an RCW initialization sequence, bits 0-21 control LMC's write + * operations to the extended DDR4 control words in the JEDEC standard + * registering clock driver on an RDIMM. + */ +union cvmx_lmcx_ddr4_dimm_ctl { + u64 u64; + struct cvmx_lmcx_ddr4_dimm_ctl_s { + uint64_t reserved_28_63:36; + uint64_t rank_timing_enable:1; + uint64_t bodt_trans_mode:1; + uint64_t trans_mode_ena:1; + uint64_t read_preamble_mode:1; + uint64_t buff_config_da3:1; + uint64_t mpr_over_ena:1; + uint64_t ddr4_dimm1_wmask:11; + uint64_t ddr4_dimm0_wmask:11; + } s; + struct cvmx_lmcx_ddr4_dimm_ctl_cn70xx { + uint64_t reserved_22_63:42; + uint64_t ddr4_dimm1_wmask:11; + uint64_t ddr4_dimm0_wmask:11; + } cn70xx; + struct cvmx_lmcx_ddr4_dimm_ctl_cn70xx cn70xxp1; + struct cvmx_lmcx_ddr4_dimm_ctl_s cn73xx; + struct cvmx_lmcx_ddr4_dimm_ctl_s cn78xx; + struct cvmx_lmcx_ddr4_dimm_ctl_s cn78xxp1; + struct cvmx_lmcx_ddr4_dimm_ctl_s cnf75xx; +}; + +/** + * cvmx_lmc#_ddr_pll_ctl + * + * This register controls the DDR_CK frequency. For details, refer to CK + * speed programming. See LMC initialization sequence for the initialization + * sequence. + * DDR PLL bringup sequence: + * + * 1. Write [CLKF], [CLKR], [DDR_PS_EN]. + * + * 2. Wait 128 ref clock cycles (7680 core-clock cycles). + * + * 3. Write 1 to [RESET_N]. + * + * 4. Wait 1152 ref clocks (1152*16 core-clock cycles). + * + * 5. Write 0 to [DDR_DIV_RESET]. + * + * 6. Wait 10 ref clock cycles (160 core-clock cycles) before bringing up + * the DDR interface. + */ +union cvmx_lmcx_ddr_pll_ctl { + u64 u64; + struct cvmx_lmcx_ddr_pll_ctl_s { + uint64_t reserved_45_63:19; + uint64_t dclk_alt_refclk_sel:1; + uint64_t bwadj:12; + uint64_t dclk_invert:1; + uint64_t phy_dcok:1; + uint64_t ddr4_mode:1; + uint64_t pll_fbslip:1; + uint64_t pll_lock:1; + uint64_t reserved_18_26:9; + uint64_t diffamp:4; + uint64_t cps:3; + uint64_t reserved_8_10:3; + uint64_t reset_n:1; + uint64_t clkf:7; + } s; + struct cvmx_lmcx_ddr_pll_ctl_cn61xx { + uint64_t reserved_27_63:37; + uint64_t jtg_test_mode:1; + uint64_t dfm_div_reset:1; + uint64_t dfm_ps_en:3; + uint64_t ddr_div_reset:1; + uint64_t ddr_ps_en:3; + uint64_t diffamp:4; + uint64_t cps:3; + uint64_t cpb:3; + uint64_t reset_n:1; + uint64_t clkf:7; + } cn61xx; + struct cvmx_lmcx_ddr_pll_ctl_cn61xx cn63xx; + struct cvmx_lmcx_ddr_pll_ctl_cn61xx cn63xxp1; + struct cvmx_lmcx_ddr_pll_ctl_cn61xx cn66xx; + struct cvmx_lmcx_ddr_pll_ctl_cn61xx cn68xx; + struct cvmx_lmcx_ddr_pll_ctl_cn61xx cn68xxp1; + struct cvmx_lmcx_ddr_pll_ctl_cn70xx { + uint64_t reserved_31_63:33; + uint64_t phy_dcok:1; + uint64_t ddr4_mode:1; + uint64_t pll_fbslip:1; + uint64_t pll_lock:1; + uint64_t pll_rfslip:1; + uint64_t clkr:2; + uint64_t jtg_test_mode:1; + uint64_t ddr_div_reset:1; + uint64_t ddr_ps_en:4; + uint64_t reserved_8_17:10; + uint64_t reset_n:1; + uint64_t clkf:7; + } cn70xx; + struct cvmx_lmcx_ddr_pll_ctl_cn70xx cn70xxp1; + struct cvmx_lmcx_ddr_pll_ctl_cn73xx { + uint64_t reserved_45_63:19; + uint64_t dclk_alt_refclk_sel:1; + uint64_t bwadj:12; + uint64_t dclk_invert:1; + uint64_t phy_dcok:1; + uint64_t ddr4_mode:1; + uint64_t pll_fbslip:1; + uint64_t pll_lock:1; + uint64_t pll_rfslip:1; + uint64_t clkr:2; + uint64_t jtg_test_mode:1; + uint64_t ddr_div_reset:1; + uint64_t ddr_ps_en:4; + uint64_t reserved_9_17:9; + uint64_t clkf_ext:1; + uint64_t reset_n:1; + uint64_t clkf:7; + } cn73xx; + struct cvmx_lmcx_ddr_pll_ctl_cn73xx cn78xx; + struct cvmx_lmcx_ddr_pll_ctl_cn73xx cn78xxp1; + struct cvmx_lmcx_ddr_pll_ctl_cn61xx cnf71xx; + struct cvmx_lmcx_ddr_pll_ctl_cn73xx cnf75xx; +}; + +/** + * cvmx_lmc#_delay_cfg + * + * LMC_DELAY_CFG = Open-loop delay line settings + * + * + * Notes: + * The DQ bits add OUTGOING delay only to dq, dqs_[p,n], cb, cbs_[p,n], dqm. + * Delay is approximately 50-80ps per setting depending on process/voltage. + * There is no need to add incoming delay since by default all strobe bits + * are delayed internally by 90 degrees (as was always the case in previous + * passes and past chips. + * + * The CMD add delay to all command bits DDR_RAS, DDR_CAS, DDR_A<15:0>, + * DDR_BA<2:0>, DDR_n_CS<1:0>_L, DDR_WE, DDR_CKE and DDR_ODT_<7:0>. + * Again, delay is 50-80ps per tap. + * + * The CLK bits add delay to all clock signals DDR_CK_<5:0>_P and + * DDR_CK_<5:0>_N. Again, delay is 50-80ps per tap. + * + * The usage scenario is the following: There is too much delay on command + * signals and setup on command is not met. The user can then delay the + * clock until setup is met. + * + * At the same time though, dq/dqs should be delayed because there is also + * a DDR spec tying dqs with clock. If clock is too much delayed with + * respect to dqs, writes will start to fail. + * + * This scheme should eliminate the board need of adding routing delay to + * clock signals to make high frequencies work. + */ +union cvmx_lmcx_delay_cfg { + u64 u64; + struct cvmx_lmcx_delay_cfg_s { + uint64_t reserved_15_63:49; + uint64_t dq:5; + uint64_t cmd:5; + uint64_t clk:5; + } s; + struct cvmx_lmcx_delay_cfg_s cn30xx; + struct cvmx_lmcx_delay_cfg_cn38xx { + uint64_t reserved_14_63:50; + uint64_t dq:4; + uint64_t reserved_9_9:1; + uint64_t cmd:4; + uint64_t reserved_4_4:1; + uint64_t clk:4; + } cn38xx; + struct cvmx_lmcx_delay_cfg_cn38xx cn50xx; + struct cvmx_lmcx_delay_cfg_cn38xx cn52xx; + struct cvmx_lmcx_delay_cfg_cn38xx cn52xxp1; + struct cvmx_lmcx_delay_cfg_cn38xx cn56xx; + struct cvmx_lmcx_delay_cfg_cn38xx cn56xxp1; + struct cvmx_lmcx_delay_cfg_cn38xx cn58xx; + struct cvmx_lmcx_delay_cfg_cn38xx cn58xxp1; +}; + +/** + * cvmx_lmc#_dimm#_ddr4_params0 + * + * This register contains values to be programmed into the extra DDR4 control + * words in the corresponding (registered) DIMM. These are control words + * RC1x through RC8x. + */ +union cvmx_lmcx_dimmx_ddr4_params0 { + u64 u64; + struct cvmx_lmcx_dimmx_ddr4_params0_s { + uint64_t rc8x:8; + uint64_t rc7x:8; + uint64_t rc6x:8; + uint64_t rc5x:8; + uint64_t rc4x:8; + uint64_t rc3x:8; + uint64_t rc2x:8; + uint64_t rc1x:8; + } s; + struct cvmx_lmcx_dimmx_ddr4_params0_s cn70xx; + struct cvmx_lmcx_dimmx_ddr4_params0_s cn70xxp1; + struct cvmx_lmcx_dimmx_ddr4_params0_s cn73xx; + struct cvmx_lmcx_dimmx_ddr4_params0_s cn78xx; + struct cvmx_lmcx_dimmx_ddr4_params0_s cn78xxp1; + struct cvmx_lmcx_dimmx_ddr4_params0_s cnf75xx; +}; + +/** + * cvmx_lmc#_dimm#_ddr4_params1 + * + * This register contains values to be programmed into the extra DDR4 control + * words in the corresponding (registered) DIMM. These are control words + * RC9x through RCBx. + */ +union cvmx_lmcx_dimmx_ddr4_params1 { + u64 u64; + struct cvmx_lmcx_dimmx_ddr4_params1_s { + uint64_t reserved_24_63:40; + uint64_t rcbx:8; + uint64_t rcax:8; + uint64_t rc9x:8; + } s; + struct cvmx_lmcx_dimmx_ddr4_params1_s cn70xx; + struct cvmx_lmcx_dimmx_ddr4_params1_s cn70xxp1; + struct cvmx_lmcx_dimmx_ddr4_params1_s cn73xx; + struct cvmx_lmcx_dimmx_ddr4_params1_s cn78xx; + struct cvmx_lmcx_dimmx_ddr4_params1_s cn78xxp1; + struct cvmx_lmcx_dimmx_ddr4_params1_s cnf75xx; +}; + +/** + * cvmx_lmc#_dimm#_params + * + * This register contains values to be programmed into each control word in + * the corresponding (registered) DIMM. The control words allow optimization + * of the device properties for different raw card designs. Note that LMC + * only uses this CSR when LMC()_CONTROL[RDIMM_ENA]=1. During a power-up/init + * sequence, LMC writes these fields into the control words in the JEDEC + * standard DDR3 SSTE32882 registering clock driver or DDR4 Register + * DDR4RCD01 on an RDIMM when corresponding LMC()_DIMM_CTL[DIMM*_WMASK] + * bits are set. + */ +union cvmx_lmcx_dimmx_params { + u64 u64; + struct cvmx_lmcx_dimmx_params_s { + uint64_t rc15:4; + uint64_t rc14:4; + uint64_t rc13:4; + uint64_t rc12:4; + uint64_t rc11:4; + uint64_t rc10:4; + uint64_t rc9:4; + uint64_t rc8:4; + uint64_t rc7:4; + uint64_t rc6:4; + uint64_t rc5:4; + uint64_t rc4:4; + uint64_t rc3:4; + uint64_t rc2:4; + uint64_t rc1:4; + uint64_t rc0:4; + } s; + struct cvmx_lmcx_dimmx_params_s cn61xx; + struct cvmx_lmcx_dimmx_params_s cn63xx; + struct cvmx_lmcx_dimmx_params_s cn63xxp1; + struct cvmx_lmcx_dimmx_params_s cn66xx; + struct cvmx_lmcx_dimmx_params_s cn68xx; + struct cvmx_lmcx_dimmx_params_s cn68xxp1; + struct cvmx_lmcx_dimmx_params_s cn70xx; + struct cvmx_lmcx_dimmx_params_s cn70xxp1; + struct cvmx_lmcx_dimmx_params_s cn73xx; + struct cvmx_lmcx_dimmx_params_s cn78xx; + struct cvmx_lmcx_dimmx_params_s cn78xxp1; + struct cvmx_lmcx_dimmx_params_s cnf71xx; + struct cvmx_lmcx_dimmx_params_s cnf75xx; +}; + +/** + * cvmx_lmc#_dimm_ctl + * + * Note that this CSR is only used when LMC()_CONTROL[RDIMM_ENA] = 1. During + * a power-up/init sequence, this CSR controls LMC's write operations to the + * control words in the JEDEC standard DDR3 SSTE32882 registering clock + * driver or DDR4 Register DDR4RCD01 on an RDIMM. + */ +union cvmx_lmcx_dimm_ctl { + u64 u64; + struct cvmx_lmcx_dimm_ctl_s { + uint64_t reserved_46_63:18; + uint64_t parity:1; + uint64_t tcws:13; + uint64_t dimm1_wmask:16; + uint64_t dimm0_wmask:16; + } s; + struct cvmx_lmcx_dimm_ctl_s cn61xx; + struct cvmx_lmcx_dimm_ctl_s cn63xx; + struct cvmx_lmcx_dimm_ctl_s cn63xxp1; + struct cvmx_lmcx_dimm_ctl_s cn66xx; + struct cvmx_lmcx_dimm_ctl_s cn68xx; + struct cvmx_lmcx_dimm_ctl_s cn68xxp1; + struct cvmx_lmcx_dimm_ctl_s cn70xx; + struct cvmx_lmcx_dimm_ctl_s cn70xxp1; + struct cvmx_lmcx_dimm_ctl_s cn73xx; + struct cvmx_lmcx_dimm_ctl_s cn78xx; + struct cvmx_lmcx_dimm_ctl_s cn78xxp1; + struct cvmx_lmcx_dimm_ctl_s cnf71xx; + struct cvmx_lmcx_dimm_ctl_s cnf75xx; +}; + +/** + * cvmx_lmc#_dll_ctl + * + * LMC_DLL_CTL = LMC DLL control and DCLK reset + * + */ +union cvmx_lmcx_dll_ctl { + u64 u64; + struct cvmx_lmcx_dll_ctl_s { + uint64_t reserved_8_63:56; + uint64_t dreset:1; + uint64_t dll90_byp:1; + uint64_t dll90_ena:1; + uint64_t dll90_vlu:5; + } s; + struct cvmx_lmcx_dll_ctl_s cn52xx; + struct cvmx_lmcx_dll_ctl_s cn52xxp1; + struct cvmx_lmcx_dll_ctl_s cn56xx; + struct cvmx_lmcx_dll_ctl_s cn56xxp1; +}; + +/** + * cvmx_lmc#_dll_ctl2 + * + * See LMC initialization sequence for the initialization sequence. + * + */ +union cvmx_lmcx_dll_ctl2 { + u64 u64; + struct cvmx_lmcx_dll_ctl2_s { + uint64_t reserved_0_63:64; + } s; + struct cvmx_lmcx_dll_ctl2_cn61xx { + uint64_t reserved_16_63:48; + uint64_t intf_en:1; + uint64_t dll_bringup:1; + uint64_t dreset:1; + uint64_t quad_dll_ena:1; + uint64_t byp_sel:4; + uint64_t byp_setting:8; + } cn61xx; + struct cvmx_lmcx_dll_ctl2_cn63xx { + uint64_t reserved_15_63:49; + uint64_t dll_bringup:1; + uint64_t dreset:1; + uint64_t quad_dll_ena:1; + uint64_t byp_sel:4; + uint64_t byp_setting:8; + } cn63xx; + struct cvmx_lmcx_dll_ctl2_cn63xx cn63xxp1; + struct cvmx_lmcx_dll_ctl2_cn63xx cn66xx; + struct cvmx_lmcx_dll_ctl2_cn61xx cn68xx; + struct cvmx_lmcx_dll_ctl2_cn61xx cn68xxp1; + struct cvmx_lmcx_dll_ctl2_cn70xx { + uint64_t reserved_17_63:47; + uint64_t intf_en:1; + uint64_t dll_bringup:1; + uint64_t dreset:1; + uint64_t quad_dll_ena:1; + uint64_t byp_sel:4; + uint64_t byp_setting:9; + } cn70xx; + struct cvmx_lmcx_dll_ctl2_cn70xx cn70xxp1; + struct cvmx_lmcx_dll_ctl2_cn70xx cn73xx; + struct cvmx_lmcx_dll_ctl2_cn70xx cn78xx; + struct cvmx_lmcx_dll_ctl2_cn70xx cn78xxp1; + struct cvmx_lmcx_dll_ctl2_cn61xx cnf71xx; + struct cvmx_lmcx_dll_ctl2_cn70xx cnf75xx; +}; + +/** + * cvmx_lmc#_dll_ctl3 + * + * LMC_DLL_CTL3 = LMC DLL control and DCLK reset + * + */ +union cvmx_lmcx_dll_ctl3 { + u64 u64; + struct cvmx_lmcx_dll_ctl3_s { + uint64_t reserved_50_63:14; + uint64_t wr_deskew_ena:1; + uint64_t wr_deskew_ld:1; + uint64_t bit_select:4; + uint64_t reserved_0_43:44; + } s; + struct cvmx_lmcx_dll_ctl3_cn61xx { + uint64_t reserved_41_63:23; + uint64_t dclk90_fwd:1; + uint64_t ddr_90_dly_byp:1; + uint64_t dclk90_recal_dis:1; + uint64_t dclk90_byp_sel:1; + uint64_t dclk90_byp_setting:8; + uint64_t dll_fast:1; + uint64_t dll90_setting:8; + uint64_t fine_tune_mode:1; + uint64_t dll_mode:1; + uint64_t dll90_byte_sel:4; + uint64_t offset_ena:1; + uint64_t load_offset:1; + uint64_t mode_sel:2; + uint64_t byte_sel:4; + uint64_t offset:6; + } cn61xx; + struct cvmx_lmcx_dll_ctl3_cn63xx { + uint64_t reserved_29_63:35; + uint64_t dll_fast:1; + uint64_t dll90_setting:8; + uint64_t fine_tune_mode:1; + uint64_t dll_mode:1; + uint64_t dll90_byte_sel:4; + uint64_t offset_ena:1; + uint64_t load_offset:1; + uint64_t mode_sel:2; + uint64_t byte_sel:4; + uint64_t offset:6; + } cn63xx; + struct cvmx_lmcx_dll_ctl3_cn63xx cn63xxp1; + struct cvmx_lmcx_dll_ctl3_cn63xx cn66xx; + struct cvmx_lmcx_dll_ctl3_cn61xx cn68xx; + struct cvmx_lmcx_dll_ctl3_cn61xx cn68xxp1; + struct cvmx_lmcx_dll_ctl3_cn70xx { + uint64_t reserved_44_63:20; + uint64_t dclk90_fwd:1; + uint64_t ddr_90_dly_byp:1; + uint64_t dclk90_recal_dis:1; + uint64_t dclk90_byp_sel:1; + uint64_t dclk90_byp_setting:9; + uint64_t dll_fast:1; + uint64_t dll90_setting:9; + uint64_t fine_tune_mode:1; + uint64_t dll_mode:1; + uint64_t dll90_byte_sel:4; + uint64_t offset_ena:1; + uint64_t load_offset:1; + uint64_t mode_sel:2; + uint64_t byte_sel:4; + uint64_t offset:7; + } cn70xx; + struct cvmx_lmcx_dll_ctl3_cn70xx cn70xxp1; + struct cvmx_lmcx_dll_ctl3_cn73xx { + uint64_t reserved_50_63:14; + uint64_t wr_deskew_ena:1; + uint64_t wr_deskew_ld:1; + uint64_t bit_select:4; + uint64_t dclk90_fwd:1; + uint64_t ddr_90_dly_byp:1; + uint64_t dclk90_recal_dis:1; + uint64_t dclk90_byp_sel:1; + uint64_t dclk90_byp_setting:9; + uint64_t dll_fast:1; + uint64_t dll90_setting:9; + uint64_t fine_tune_mode:1; + uint64_t dll_mode:1; + uint64_t dll90_byte_sel:4; + uint64_t offset_ena:1; + uint64_t load_offset:1; + uint64_t mode_sel:2; + uint64_t byte_sel:4; + uint64_t offset:7; + } cn73xx; + struct cvmx_lmcx_dll_ctl3_cn73xx cn78xx; + struct cvmx_lmcx_dll_ctl3_cn73xx cn78xxp1; + struct cvmx_lmcx_dll_ctl3_cn61xx cnf71xx; + struct cvmx_lmcx_dll_ctl3_cn73xx cnf75xx; +}; + +/** + * cvmx_lmc#_dual_memcfg + * + * This register controls certain parameters of dual-memory configuration. + * + * This register enables the design to have two separate memory + * configurations, selected dynamically by the reference address. Note + * however, that both configurations share LMC()_CONTROL[XOR_BANK], + * LMC()_CONFIG [PBANK_LSB], LMC()_CONFIG[RANK_ENA], and all timing parameters. + * + * In this description: + * * config0 refers to the normal memory configuration that is defined by the + * LMC()_CONFIG[ROW_LSB] parameter + * * config1 refers to the dual (or second) memory configuration that is + * defined by this register. + */ +union cvmx_lmcx_dual_memcfg { + u64 u64; + struct cvmx_lmcx_dual_memcfg_s { + uint64_t reserved_20_63:44; + uint64_t bank8:1; + uint64_t row_lsb:3; + uint64_t reserved_8_15:8; + uint64_t cs_mask:8; + } s; + struct cvmx_lmcx_dual_memcfg_s cn50xx; + struct cvmx_lmcx_dual_memcfg_s cn52xx; + struct cvmx_lmcx_dual_memcfg_s cn52xxp1; + struct cvmx_lmcx_dual_memcfg_s cn56xx; + struct cvmx_lmcx_dual_memcfg_s cn56xxp1; + struct cvmx_lmcx_dual_memcfg_s cn58xx; + struct cvmx_lmcx_dual_memcfg_s cn58xxp1; + struct cvmx_lmcx_dual_memcfg_cn61xx { + uint64_t reserved_19_63:45; + uint64_t row_lsb:3; + uint64_t reserved_8_15:8; + uint64_t cs_mask:8; + } cn61xx; + struct cvmx_lmcx_dual_memcfg_cn61xx cn63xx; + struct cvmx_lmcx_dual_memcfg_cn61xx cn63xxp1; + struct cvmx_lmcx_dual_memcfg_cn61xx cn66xx; + struct cvmx_lmcx_dual_memcfg_cn61xx cn68xx; + struct cvmx_lmcx_dual_memcfg_cn61xx cn68xxp1; + struct cvmx_lmcx_dual_memcfg_cn70xx { + uint64_t reserved_19_63:45; + uint64_t row_lsb:3; + uint64_t reserved_4_15:12; + uint64_t cs_mask:4; + } cn70xx; + struct cvmx_lmcx_dual_memcfg_cn70xx cn70xxp1; + struct cvmx_lmcx_dual_memcfg_cn70xx cn73xx; + struct cvmx_lmcx_dual_memcfg_cn70xx cn78xx; + struct cvmx_lmcx_dual_memcfg_cn70xx cn78xxp1; + struct cvmx_lmcx_dual_memcfg_cn61xx cnf71xx; + struct cvmx_lmcx_dual_memcfg_cn70xx cnf75xx; +}; + +/** + * cvmx_lmc#_ecc_parity_test + * + * This register has bits to control the generation of ECC and command + * address parity errors. ECC error is generated by enabling + * [CA_PARITY_CORRUPT_ENA] and selecting any of the [ECC_CORRUPT_IDX] + * index of the dataword from the cacheline to be corrupted. + * User needs to select which bit of the 128-bit dataword to corrupt by + * asserting any of the CHAR_MASK0 and CHAR_MASK2 bits. (CHAR_MASK0 and + * CHAR_MASK2 corresponds to the lower and upper 64-bit signal that can + * corrupt any individual bit of the data). + * + * Command address parity error is generated by enabling + * [CA_PARITY_CORRUPT_ENA] and selecting the DDR command that the parity + * is to be corrupted with through [CA_PARITY_SEL]. + */ +union cvmx_lmcx_ecc_parity_test { + u64 u64; + struct cvmx_lmcx_ecc_parity_test_s { + uint64_t reserved_12_63:52; + uint64_t ecc_corrupt_ena:1; + uint64_t ecc_corrupt_idx:3; + uint64_t reserved_6_7:2; + uint64_t ca_parity_corrupt_ena:1; + uint64_t ca_parity_sel:5; + } s; + struct cvmx_lmcx_ecc_parity_test_s cn73xx; + struct cvmx_lmcx_ecc_parity_test_s cn78xx; + struct cvmx_lmcx_ecc_parity_test_s cn78xxp1; + struct cvmx_lmcx_ecc_parity_test_s cnf75xx; +}; + +/** + * cvmx_lmc#_ecc_synd + * + * LMC_ECC_SYND = MRD ECC Syndromes + * + */ +union cvmx_lmcx_ecc_synd { + u64 u64; + struct cvmx_lmcx_ecc_synd_s { + uint64_t reserved_32_63:32; + uint64_t mrdsyn3:8; + uint64_t mrdsyn2:8; + uint64_t mrdsyn1:8; + uint64_t mrdsyn0:8; + } s; + struct cvmx_lmcx_ecc_synd_s cn30xx; + struct cvmx_lmcx_ecc_synd_s cn31xx; + struct cvmx_lmcx_ecc_synd_s cn38xx; + struct cvmx_lmcx_ecc_synd_s cn38xxp2; + struct cvmx_lmcx_ecc_synd_s cn50xx; + struct cvmx_lmcx_ecc_synd_s cn52xx; + struct cvmx_lmcx_ecc_synd_s cn52xxp1; + struct cvmx_lmcx_ecc_synd_s cn56xx; + struct cvmx_lmcx_ecc_synd_s cn56xxp1; + struct cvmx_lmcx_ecc_synd_s cn58xx; + struct cvmx_lmcx_ecc_synd_s cn58xxp1; + struct cvmx_lmcx_ecc_synd_s cn61xx; + struct cvmx_lmcx_ecc_synd_s cn63xx; + struct cvmx_lmcx_ecc_synd_s cn63xxp1; + struct cvmx_lmcx_ecc_synd_s cn66xx; + struct cvmx_lmcx_ecc_synd_s cn68xx; + struct cvmx_lmcx_ecc_synd_s cn68xxp1; + struct cvmx_lmcx_ecc_synd_s cn70xx; + struct cvmx_lmcx_ecc_synd_s cn70xxp1; + struct cvmx_lmcx_ecc_synd_s cn73xx; + struct cvmx_lmcx_ecc_synd_s cn78xx; + struct cvmx_lmcx_ecc_synd_s cn78xxp1; + struct cvmx_lmcx_ecc_synd_s cnf71xx; + struct cvmx_lmcx_ecc_synd_s cnf75xx; +}; + +/** + * cvmx_lmc#_ext_config + * + * This register has additional configuration and control bits for the LMC. + * + */ +union cvmx_lmcx_ext_config { + u64 u64; + struct cvmx_lmcx_ext_config_s { + uint64_t reserved_61_63:3; + uint64_t bc4_dqs_ena:1; + uint64_t ref_block:1; + uint64_t mrs_side:1; + uint64_t mrs_one_side:1; + uint64_t mrs_bside_invert_disable:1; + uint64_t dimm_sel_invert_off:1; + uint64_t dimm_sel_force_invert:1; + uint64_t coalesce_address_mode:1; + uint64_t dimm1_cid:2; + uint64_t dimm0_cid:2; + uint64_t rcd_parity_check:1; + uint64_t reserved_46_47:2; + uint64_t error_alert_n_sample:1; + uint64_t ea_int_polarity:1; + uint64_t reserved_43_43:1; + uint64_t par_addr_mask:3; + uint64_t reserved_38_39:2; + uint64_t mrs_cmd_override:1; + uint64_t mrs_cmd_select:1; + uint64_t reserved_33_35:3; + uint64_t invert_data:1; + uint64_t reserved_30_31:2; + uint64_t cmd_rti:1; + uint64_t cal_ena:1; + uint64_t reserved_27_27:1; + uint64_t par_include_a17:1; + uint64_t par_include_bg1:1; + uint64_t gen_par:1; + uint64_t reserved_21_23:3; + uint64_t vrefint_seq_deskew:1; + uint64_t read_ena_bprch:1; + uint64_t read_ena_fprch:1; + uint64_t slot_ctl_reset_force:1; + uint64_t ref_int_lsbs:9; + uint64_t drive_ena_bprch:1; + uint64_t drive_ena_fprch:1; + uint64_t dlcram_flip_synd:2; + uint64_t dlcram_cor_dis:1; + uint64_t dlc_nxm_rd:1; + uint64_t l2c_nxm_rd:1; + uint64_t l2c_nxm_wr:1; + } s; + struct cvmx_lmcx_ext_config_cn70xx { + uint64_t reserved_21_63:43; + uint64_t vrefint_seq_deskew:1; + uint64_t read_ena_bprch:1; + uint64_t read_ena_fprch:1; + uint64_t slot_ctl_reset_force:1; + uint64_t ref_int_lsbs:9; + uint64_t drive_ena_bprch:1; + uint64_t drive_ena_fprch:1; + uint64_t dlcram_flip_synd:2; + uint64_t dlcram_cor_dis:1; + uint64_t dlc_nxm_rd:1; + uint64_t l2c_nxm_rd:1; + uint64_t l2c_nxm_wr:1; + } cn70xx; + struct cvmx_lmcx_ext_config_cn70xx cn70xxp1; + struct cvmx_lmcx_ext_config_cn73xx { + uint64_t reserved_60_63:4; + uint64_t ref_block:1; + uint64_t mrs_side:1; + uint64_t mrs_one_side:1; + uint64_t mrs_bside_invert_disable:1; + uint64_t dimm_sel_invert_off:1; + uint64_t dimm_sel_force_invert:1; + uint64_t coalesce_address_mode:1; + uint64_t dimm1_cid:2; + uint64_t dimm0_cid:2; + uint64_t rcd_parity_check:1; + uint64_t reserved_46_47:2; + uint64_t error_alert_n_sample:1; + uint64_t ea_int_polarity:1; + uint64_t reserved_43_43:1; + uint64_t par_addr_mask:3; + uint64_t reserved_38_39:2; + uint64_t mrs_cmd_override:1; + uint64_t mrs_cmd_select:1; + uint64_t reserved_33_35:3; + uint64_t invert_data:1; + uint64_t reserved_30_31:2; + uint64_t cmd_rti:1; + uint64_t cal_ena:1; + uint64_t reserved_27_27:1; + uint64_t par_include_a17:1; + uint64_t par_include_bg1:1; + uint64_t gen_par:1; + uint64_t reserved_21_23:3; + uint64_t vrefint_seq_deskew:1; + uint64_t read_ena_bprch:1; + uint64_t read_ena_fprch:1; + uint64_t slot_ctl_reset_force:1; + uint64_t ref_int_lsbs:9; + uint64_t drive_ena_bprch:1; + uint64_t drive_ena_fprch:1; + uint64_t dlcram_flip_synd:2; + uint64_t dlcram_cor_dis:1; + uint64_t dlc_nxm_rd:1; + uint64_t l2c_nxm_rd:1; + uint64_t l2c_nxm_wr:1; + } cn73xx; + struct cvmx_lmcx_ext_config_s cn78xx; + struct cvmx_lmcx_ext_config_s cn78xxp1; + struct cvmx_lmcx_ext_config_cn73xx cnf75xx; +}; + +/** + * cvmx_lmc#_ext_config2 + * + * This register has additional configuration and control bits for the LMC. + * + */ +union cvmx_lmcx_ext_config2 { + u64 u64; + struct cvmx_lmcx_ext_config2_s { + uint64_t reserved_27_63:37; + uint64_t sref_auto_idle_thres:5; + uint64_t sref_auto_enable:1; + uint64_t delay_unload_r3:1; + uint64_t delay_unload_r2:1; + uint64_t delay_unload_r1:1; + uint64_t delay_unload_r0:1; + uint64_t early_dqx2:1; + uint64_t xor_bank_sel:4; + uint64_t reserved_10_11:2; + uint64_t row_col_switch:1; + uint64_t trr_on:1; + uint64_t mac:3; + uint64_t macram_scrub_done:1; + uint64_t macram_scrub:1; + uint64_t macram_flip_synd:2; + uint64_t macram_cor_dis:1; + } s; + struct cvmx_lmcx_ext_config2_cn73xx { + uint64_t reserved_10_63:54; + uint64_t row_col_switch:1; + uint64_t trr_on:1; + uint64_t mac:3; + uint64_t macram_scrub_done:1; + uint64_t macram_scrub:1; + uint64_t macram_flip_synd:2; + uint64_t macram_cor_dis:1; + } cn73xx; + struct cvmx_lmcx_ext_config2_s cn78xx; + struct cvmx_lmcx_ext_config2_cnf75xx { + uint64_t reserved_21_63:43; + uint64_t delay_unload_r3:1; + uint64_t delay_unload_r2:1; + uint64_t delay_unload_r1:1; + uint64_t delay_unload_r0:1; + uint64_t early_dqx2:1; + uint64_t xor_bank_sel:4; + uint64_t reserved_10_11:2; + uint64_t row_col_switch:1; + uint64_t trr_on:1; + uint64_t mac:3; + uint64_t macram_scrub_done:1; + uint64_t macram_scrub:1; + uint64_t macram_flip_synd:2; + uint64_t macram_cor_dis:1; + } cnf75xx; +}; + +/** + * cvmx_lmc#_fadr + * + * This register only captures the first transaction with ECC errors. A DED + * error can over-write this register with its failing addresses if the + * first error was a SEC. If you write LMC()_INT -> SEC_ERR/DED_ERR, it + * clears the error bits and captures the next failing address. If FDIMM + * is 1, that means the error is in the high DIMM. LMC()_FADR captures the + * failing pre-scrambled address location (split into DIMM, bunk, bank, etc). + * If scrambling is off, then LMC()_FADR will also capture the failing + * physical location in the DRAM parts. LMC()_SCRAMBLED_FADR captures the + * actual failing address location in the physical DRAM parts, i.e., + * If scrambling is on, LMC()_SCRAMBLED_FADR contains the failing physical + * location in the DRAM parts (split into DIMM, bunk, bank, etc.) + * If scrambling is off, the pre-scramble and post-scramble addresses are + * the same; and so the contents of LMC()_SCRAMBLED_FADR match the contents + * of LMC()_FADR. + */ +union cvmx_lmcx_fadr { + u64 u64; + struct cvmx_lmcx_fadr_s { + uint64_t reserved_43_63:21; + uint64_t fcid:3; + uint64_t fill_order:2; + uint64_t reserved_0_37:38; + } s; + struct cvmx_lmcx_fadr_cn30xx { + uint64_t reserved_32_63:32; + uint64_t fdimm:2; + uint64_t fbunk:1; + uint64_t fbank:3; + uint64_t frow:14; + uint64_t fcol:12; + } cn30xx; + struct cvmx_lmcx_fadr_cn30xx cn31xx; + struct cvmx_lmcx_fadr_cn30xx cn38xx; + struct cvmx_lmcx_fadr_cn30xx cn38xxp2; + struct cvmx_lmcx_fadr_cn30xx cn50xx; + struct cvmx_lmcx_fadr_cn30xx cn52xx; + struct cvmx_lmcx_fadr_cn30xx cn52xxp1; + struct cvmx_lmcx_fadr_cn30xx cn56xx; + struct cvmx_lmcx_fadr_cn30xx cn56xxp1; + struct cvmx_lmcx_fadr_cn30xx cn58xx; + struct cvmx_lmcx_fadr_cn30xx cn58xxp1; + struct cvmx_lmcx_fadr_cn61xx { + uint64_t reserved_36_63:28; + uint64_t fdimm:2; + uint64_t fbunk:1; + uint64_t fbank:3; + uint64_t frow:16; + uint64_t fcol:14; + } cn61xx; + struct cvmx_lmcx_fadr_cn61xx cn63xx; + struct cvmx_lmcx_fadr_cn61xx cn63xxp1; + struct cvmx_lmcx_fadr_cn61xx cn66xx; + struct cvmx_lmcx_fadr_cn61xx cn68xx; + struct cvmx_lmcx_fadr_cn61xx cn68xxp1; + struct cvmx_lmcx_fadr_cn70xx { + uint64_t reserved_40_63:24; + uint64_t fill_order:2; + uint64_t fdimm:1; + uint64_t fbunk:1; + uint64_t fbank:4; + uint64_t frow:18; + uint64_t fcol:14; + } cn70xx; + struct cvmx_lmcx_fadr_cn70xx cn70xxp1; + struct cvmx_lmcx_fadr_cn73xx { + uint64_t reserved_43_63:21; + uint64_t fcid:3; + uint64_t fill_order:2; + uint64_t fdimm:1; + uint64_t fbunk:1; + uint64_t fbank:4; + uint64_t frow:18; + uint64_t fcol:14; + } cn73xx; + struct cvmx_lmcx_fadr_cn73xx cn78xx; + struct cvmx_lmcx_fadr_cn73xx cn78xxp1; + struct cvmx_lmcx_fadr_cn61xx cnf71xx; + struct cvmx_lmcx_fadr_cn73xx cnf75xx; +}; + +/** + * cvmx_lmc#_general_purpose0 + */ +union cvmx_lmcx_general_purpose0 { + u64 u64; + struct cvmx_lmcx_general_purpose0_s { + uint64_t data:64; + } s; + struct cvmx_lmcx_general_purpose0_s cn73xx; + struct cvmx_lmcx_general_purpose0_s cn78xx; + struct cvmx_lmcx_general_purpose0_s cnf75xx; +}; + +/** + * cvmx_lmc#_general_purpose1 + */ +union cvmx_lmcx_general_purpose1 { + u64 u64; + struct cvmx_lmcx_general_purpose1_s { + uint64_t data:64; + } s; + struct cvmx_lmcx_general_purpose1_s cn73xx; + struct cvmx_lmcx_general_purpose1_s cn78xx; + struct cvmx_lmcx_general_purpose1_s cnf75xx; +}; + +/** + * cvmx_lmc#_general_purpose2 + */ +union cvmx_lmcx_general_purpose2 { + u64 u64; + struct cvmx_lmcx_general_purpose2_s { + uint64_t reserved_16_63:48; + uint64_t data:16; + } s; + struct cvmx_lmcx_general_purpose2_s cn73xx; + struct cvmx_lmcx_general_purpose2_s cn78xx; + struct cvmx_lmcx_general_purpose2_s cnf75xx; +}; + +/** + * cvmx_lmc#_ifb_cnt + * + * LMC_IFB_CNT = Performance Counters + * + */ +union cvmx_lmcx_ifb_cnt { + u64 u64; + struct cvmx_lmcx_ifb_cnt_s { + uint64_t ifbcnt:64; + } s; + struct cvmx_lmcx_ifb_cnt_s cn61xx; + struct cvmx_lmcx_ifb_cnt_s cn63xx; + struct cvmx_lmcx_ifb_cnt_s cn63xxp1; + struct cvmx_lmcx_ifb_cnt_s cn66xx; + struct cvmx_lmcx_ifb_cnt_s cn68xx; + struct cvmx_lmcx_ifb_cnt_s cn68xxp1; + struct cvmx_lmcx_ifb_cnt_s cn70xx; + struct cvmx_lmcx_ifb_cnt_s cn70xxp1; + struct cvmx_lmcx_ifb_cnt_s cn73xx; + struct cvmx_lmcx_ifb_cnt_s cn78xx; + struct cvmx_lmcx_ifb_cnt_s cn78xxp1; + struct cvmx_lmcx_ifb_cnt_s cnf71xx; + struct cvmx_lmcx_ifb_cnt_s cnf75xx; +}; + +/** + * cvmx_lmc#_ifb_cnt_hi + * + * LMC_IFB_CNT_HI = Performance Counters + * + */ +union cvmx_lmcx_ifb_cnt_hi { + u64 u64; + struct cvmx_lmcx_ifb_cnt_hi_s { + uint64_t reserved_32_63:32; + uint64_t ifbcnt_hi:32; + } s; + struct cvmx_lmcx_ifb_cnt_hi_s cn30xx; + struct cvmx_lmcx_ifb_cnt_hi_s cn31xx; + struct cvmx_lmcx_ifb_cnt_hi_s cn38xx; + struct cvmx_lmcx_ifb_cnt_hi_s cn38xxp2; + struct cvmx_lmcx_ifb_cnt_hi_s cn50xx; + struct cvmx_lmcx_ifb_cnt_hi_s cn52xx; + struct cvmx_lmcx_ifb_cnt_hi_s cn52xxp1; + struct cvmx_lmcx_ifb_cnt_hi_s cn56xx; + struct cvmx_lmcx_ifb_cnt_hi_s cn56xxp1; + struct cvmx_lmcx_ifb_cnt_hi_s cn58xx; + struct cvmx_lmcx_ifb_cnt_hi_s cn58xxp1; +}; + +/** + * cvmx_lmc#_ifb_cnt_lo + * + * LMC_IFB_CNT_LO = Performance Counters + * + */ +union cvmx_lmcx_ifb_cnt_lo { + u64 u64; + struct cvmx_lmcx_ifb_cnt_lo_s { + uint64_t reserved_32_63:32; + uint64_t ifbcnt_lo:32; + } s; + struct cvmx_lmcx_ifb_cnt_lo_s cn30xx; + struct cvmx_lmcx_ifb_cnt_lo_s cn31xx; + struct cvmx_lmcx_ifb_cnt_lo_s cn38xx; + struct cvmx_lmcx_ifb_cnt_lo_s cn38xxp2; + struct cvmx_lmcx_ifb_cnt_lo_s cn50xx; + struct cvmx_lmcx_ifb_cnt_lo_s cn52xx; + struct cvmx_lmcx_ifb_cnt_lo_s cn52xxp1; + struct cvmx_lmcx_ifb_cnt_lo_s cn56xx; + struct cvmx_lmcx_ifb_cnt_lo_s cn56xxp1; + struct cvmx_lmcx_ifb_cnt_lo_s cn58xx; + struct cvmx_lmcx_ifb_cnt_lo_s cn58xxp1; +}; + +/** + * cvmx_lmc#_int + * + * This register contains the different interrupt-summary bits of the LMC. + * + */ +union cvmx_lmcx_int { + u64 u64; + struct cvmx_lmcx_int_s { + uint64_t reserved_14_63:50; + uint64_t macram_ded_err:1; + uint64_t macram_sec_err:1; + uint64_t ddr_err:1; + uint64_t dlcram_ded_err:1; + uint64_t dlcram_sec_err:1; + uint64_t ded_err:4; + uint64_t sec_err:4; + uint64_t nxm_wr_err:1; + } s; + struct cvmx_lmcx_int_cn61xx { + uint64_t reserved_9_63:55; + uint64_t ded_err:4; + uint64_t sec_err:4; + uint64_t nxm_wr_err:1; + } cn61xx; + struct cvmx_lmcx_int_cn61xx cn63xx; + struct cvmx_lmcx_int_cn61xx cn63xxp1; + struct cvmx_lmcx_int_cn61xx cn66xx; + struct cvmx_lmcx_int_cn61xx cn68xx; + struct cvmx_lmcx_int_cn61xx cn68xxp1; + struct cvmx_lmcx_int_cn70xx { + uint64_t reserved_12_63:52; + uint64_t ddr_err:1; + uint64_t dlcram_ded_err:1; + uint64_t dlcram_sec_err:1; + uint64_t ded_err:4; + uint64_t sec_err:4; + uint64_t nxm_wr_err:1; + } cn70xx; + struct cvmx_lmcx_int_cn70xx cn70xxp1; + struct cvmx_lmcx_int_s cn73xx; + struct cvmx_lmcx_int_s cn78xx; + struct cvmx_lmcx_int_s cn78xxp1; + struct cvmx_lmcx_int_cn61xx cnf71xx; + struct cvmx_lmcx_int_s cnf75xx; +}; + +/** + * cvmx_lmc#_int_en + * + * Unused CSR in O75. + * + */ +union cvmx_lmcx_int_en { + u64 u64; + struct cvmx_lmcx_int_en_s { + uint64_t reserved_6_63:58; + uint64_t ddr_error_alert_ena:1; + uint64_t dlcram_ded_ena:1; + uint64_t dlcram_sec_ena:1; + uint64_t intr_ded_ena:1; + uint64_t intr_sec_ena:1; + uint64_t intr_nxm_wr_ena:1; + } s; + struct cvmx_lmcx_int_en_cn61xx { + uint64_t reserved_3_63:61; + uint64_t intr_ded_ena:1; + uint64_t intr_sec_ena:1; + uint64_t intr_nxm_wr_ena:1; + } cn61xx; + struct cvmx_lmcx_int_en_cn61xx cn63xx; + struct cvmx_lmcx_int_en_cn61xx cn63xxp1; + struct cvmx_lmcx_int_en_cn61xx cn66xx; + struct cvmx_lmcx_int_en_cn61xx cn68xx; + struct cvmx_lmcx_int_en_cn61xx cn68xxp1; + struct cvmx_lmcx_int_en_s cn70xx; + struct cvmx_lmcx_int_en_s cn70xxp1; + struct cvmx_lmcx_int_en_s cn73xx; + struct cvmx_lmcx_int_en_s cn78xx; + struct cvmx_lmcx_int_en_s cn78xxp1; + struct cvmx_lmcx_int_en_cn61xx cnf71xx; + struct cvmx_lmcx_int_en_s cnf75xx; +}; + +/** + * cvmx_lmc#_lane#_crc_swiz + * + * This register contains the CRC bit swizzle for even and odd ranks. + * + */ +union cvmx_lmcx_lanex_crc_swiz { + u64 u64; + struct cvmx_lmcx_lanex_crc_swiz_s { + uint64_t reserved_56_63:8; + uint64_t r1_swiz7:3; + uint64_t r1_swiz6:3; + uint64_t r1_swiz5:3; + uint64_t r1_swiz4:3; + uint64_t r1_swiz3:3; + uint64_t r1_swiz2:3; + uint64_t r1_swiz1:3; + uint64_t r1_swiz0:3; + uint64_t reserved_24_31:8; + uint64_t r0_swiz7:3; + uint64_t r0_swiz6:3; + uint64_t r0_swiz5:3; + uint64_t r0_swiz4:3; + uint64_t r0_swiz3:3; + uint64_t r0_swiz2:3; + uint64_t r0_swiz1:3; + uint64_t r0_swiz0:3; + } s; + struct cvmx_lmcx_lanex_crc_swiz_s cn73xx; + struct cvmx_lmcx_lanex_crc_swiz_s cn78xx; + struct cvmx_lmcx_lanex_crc_swiz_s cn78xxp1; + struct cvmx_lmcx_lanex_crc_swiz_s cnf75xx; +}; + +/** + * cvmx_lmc#_mem_cfg0 + * + * Specify the RSL base addresses for the block + * + * LMC_MEM_CFG0 = LMC Memory Configuration Register0 + * + * This register controls certain parameters of Memory Configuration + */ +union cvmx_lmcx_mem_cfg0 { + u64 u64; + struct cvmx_lmcx_mem_cfg0_s { + uint64_t reserved_32_63:32; + uint64_t reset:1; + uint64_t silo_qc:1; + uint64_t bunk_ena:1; + uint64_t ded_err:4; + uint64_t sec_err:4; + uint64_t intr_ded_ena:1; + uint64_t intr_sec_ena:1; + uint64_t tcl:4; + uint64_t ref_int:6; + uint64_t pbank_lsb:4; + uint64_t row_lsb:3; + uint64_t ecc_ena:1; + uint64_t init_start:1; + } s; + struct cvmx_lmcx_mem_cfg0_s cn30xx; + struct cvmx_lmcx_mem_cfg0_s cn31xx; + struct cvmx_lmcx_mem_cfg0_s cn38xx; + struct cvmx_lmcx_mem_cfg0_s cn38xxp2; + struct cvmx_lmcx_mem_cfg0_s cn50xx; + struct cvmx_lmcx_mem_cfg0_s cn52xx; + struct cvmx_lmcx_mem_cfg0_s cn52xxp1; + struct cvmx_lmcx_mem_cfg0_s cn56xx; + struct cvmx_lmcx_mem_cfg0_s cn56xxp1; + struct cvmx_lmcx_mem_cfg0_s cn58xx; + struct cvmx_lmcx_mem_cfg0_s cn58xxp1; +}; + +/** + * cvmx_lmc#_mem_cfg1 + * + * LMC_MEM_CFG1 = LMC Memory Configuration Register1 + * + * This register controls the External Memory Configuration Timing Parameters. + * Please refer to the appropriate DDR part spec from your memory vendor for + * the various values in this CSR. The details of each of these timing + * parameters can be found in the JEDEC spec or the vendor spec of the + * memory parts. + */ +union cvmx_lmcx_mem_cfg1 { + u64 u64; + struct cvmx_lmcx_mem_cfg1_s { + uint64_t reserved_32_63:32; + uint64_t comp_bypass:1; + uint64_t trrd:3; + uint64_t caslat:3; + uint64_t tmrd:3; + uint64_t trfc:5; + uint64_t trp:4; + uint64_t twtr:4; + uint64_t trcd:4; + uint64_t tras:5; + } s; + struct cvmx_lmcx_mem_cfg1_s cn30xx; + struct cvmx_lmcx_mem_cfg1_s cn31xx; + struct cvmx_lmcx_mem_cfg1_cn38xx { + uint64_t reserved_31_63:33; + uint64_t trrd:3; + uint64_t caslat:3; + uint64_t tmrd:3; + uint64_t trfc:5; + uint64_t trp:4; + uint64_t twtr:4; + uint64_t trcd:4; + uint64_t tras:5; + } cn38xx; + struct cvmx_lmcx_mem_cfg1_cn38xx cn38xxp2; + struct cvmx_lmcx_mem_cfg1_s cn50xx; + struct cvmx_lmcx_mem_cfg1_cn38xx cn52xx; + struct cvmx_lmcx_mem_cfg1_cn38xx cn52xxp1; + struct cvmx_lmcx_mem_cfg1_cn38xx cn56xx; + struct cvmx_lmcx_mem_cfg1_cn38xx cn56xxp1; + struct cvmx_lmcx_mem_cfg1_cn38xx cn58xx; + struct cvmx_lmcx_mem_cfg1_cn38xx cn58xxp1; +}; + +/** + * cvmx_lmc#_modereg_params0 + * + * These parameters are written into the DDR3/DDR4 MR0, MR1, MR2 and MR3 + * registers. + * + */ +union cvmx_lmcx_modereg_params0 { + u64 u64; + struct cvmx_lmcx_modereg_params0_s { + uint64_t reserved_28_63:36; + uint64_t wrp_ext:1; + uint64_t cl_ext:1; + uint64_t al_ext:1; + uint64_t ppd:1; + uint64_t wrp:3; + uint64_t dllr:1; + uint64_t tm:1; + uint64_t rbt:1; + uint64_t cl:4; + uint64_t bl:2; + uint64_t qoff:1; + uint64_t tdqs:1; + uint64_t wlev:1; + uint64_t al:2; + uint64_t dll:1; + uint64_t mpr:1; + uint64_t mprloc:2; + uint64_t cwl:3; + } s; + struct cvmx_lmcx_modereg_params0_cn61xx { + uint64_t reserved_25_63:39; + uint64_t ppd:1; + uint64_t wrp:3; + uint64_t dllr:1; + uint64_t tm:1; + uint64_t rbt:1; + uint64_t cl:4; + uint64_t bl:2; + uint64_t qoff:1; + uint64_t tdqs:1; + uint64_t wlev:1; + uint64_t al:2; + uint64_t dll:1; + uint64_t mpr:1; + uint64_t mprloc:2; + uint64_t cwl:3; + } cn61xx; + struct cvmx_lmcx_modereg_params0_cn61xx cn63xx; + struct cvmx_lmcx_modereg_params0_cn61xx cn63xxp1; + struct cvmx_lmcx_modereg_params0_cn61xx cn66xx; + struct cvmx_lmcx_modereg_params0_cn61xx cn68xx; + struct cvmx_lmcx_modereg_params0_cn61xx cn68xxp1; + struct cvmx_lmcx_modereg_params0_cn61xx cn70xx; + struct cvmx_lmcx_modereg_params0_cn61xx cn70xxp1; + struct cvmx_lmcx_modereg_params0_s cn73xx; + struct cvmx_lmcx_modereg_params0_s cn78xx; + struct cvmx_lmcx_modereg_params0_s cn78xxp1; + struct cvmx_lmcx_modereg_params0_cn61xx cnf71xx; + struct cvmx_lmcx_modereg_params0_s cnf75xx; +}; + +/** + * cvmx_lmc#_modereg_params1 + * + * These parameters are written into the DDR3 MR0, MR1, MR2 and MR3 registers. + * + */ +union cvmx_lmcx_modereg_params1 { + u64 u64; + struct cvmx_lmcx_modereg_params1_s { + uint64_t reserved_55_63:9; + uint64_t rtt_wr_11_ext:1; + uint64_t rtt_wr_10_ext:1; + uint64_t rtt_wr_01_ext:1; + uint64_t rtt_wr_00_ext:1; + uint64_t db_output_impedance:3; + uint64_t rtt_nom_11:3; + uint64_t dic_11:2; + uint64_t rtt_wr_11:2; + uint64_t srt_11:1; + uint64_t asr_11:1; + uint64_t pasr_11:3; + uint64_t rtt_nom_10:3; + uint64_t dic_10:2; + uint64_t rtt_wr_10:2; + uint64_t srt_10:1; + uint64_t asr_10:1; + uint64_t pasr_10:3; + uint64_t rtt_nom_01:3; + uint64_t dic_01:2; + uint64_t rtt_wr_01:2; + uint64_t srt_01:1; + uint64_t asr_01:1; + uint64_t pasr_01:3; + uint64_t rtt_nom_00:3; + uint64_t dic_00:2; + uint64_t rtt_wr_00:2; + uint64_t srt_00:1; + uint64_t asr_00:1; + uint64_t pasr_00:3; + } s; + struct cvmx_lmcx_modereg_params1_cn61xx { + uint64_t reserved_48_63:16; + uint64_t rtt_nom_11:3; + uint64_t dic_11:2; + uint64_t rtt_wr_11:2; + uint64_t srt_11:1; + uint64_t asr_11:1; + uint64_t pasr_11:3; + uint64_t rtt_nom_10:3; + uint64_t dic_10:2; + uint64_t rtt_wr_10:2; + uint64_t srt_10:1; + uint64_t asr_10:1; + uint64_t pasr_10:3; + uint64_t rtt_nom_01:3; + uint64_t dic_01:2; + uint64_t rtt_wr_01:2; + uint64_t srt_01:1; + uint64_t asr_01:1; + uint64_t pasr_01:3; + uint64_t rtt_nom_00:3; + uint64_t dic_00:2; + uint64_t rtt_wr_00:2; + uint64_t srt_00:1; + uint64_t asr_00:1; + uint64_t pasr_00:3; + } cn61xx; + struct cvmx_lmcx_modereg_params1_cn61xx cn63xx; + struct cvmx_lmcx_modereg_params1_cn61xx cn63xxp1; + struct cvmx_lmcx_modereg_params1_cn61xx cn66xx; + struct cvmx_lmcx_modereg_params1_cn61xx cn68xx; + struct cvmx_lmcx_modereg_params1_cn61xx cn68xxp1; + struct cvmx_lmcx_modereg_params1_cn61xx cn70xx; + struct cvmx_lmcx_modereg_params1_cn61xx cn70xxp1; + struct cvmx_lmcx_modereg_params1_s cn73xx; + struct cvmx_lmcx_modereg_params1_s cn78xx; + struct cvmx_lmcx_modereg_params1_s cn78xxp1; + struct cvmx_lmcx_modereg_params1_cn61xx cnf71xx; + struct cvmx_lmcx_modereg_params1_s cnf75xx; +}; + +/** + * cvmx_lmc#_modereg_params2 + * + * These parameters are written into the DDR4 mode registers. + * + */ +union cvmx_lmcx_modereg_params2 { + u64 u64; + struct cvmx_lmcx_modereg_params2_s { + uint64_t reserved_41_63:23; + uint64_t vrefdq_train_en:1; + uint64_t vref_range_11:1; + uint64_t vref_value_11:6; + uint64_t rtt_park_11:3; + uint64_t vref_range_10:1; + uint64_t vref_value_10:6; + uint64_t rtt_park_10:3; + uint64_t vref_range_01:1; + uint64_t vref_value_01:6; + uint64_t rtt_park_01:3; + uint64_t vref_range_00:1; + uint64_t vref_value_00:6; + uint64_t rtt_park_00:3; + } s; + struct cvmx_lmcx_modereg_params2_s cn70xx; + struct cvmx_lmcx_modereg_params2_cn70xxp1 { + uint64_t reserved_40_63:24; + uint64_t vref_range_11:1; + uint64_t vref_value_11:6; + uint64_t rtt_park_11:3; + uint64_t vref_range_10:1; + uint64_t vref_value_10:6; + uint64_t rtt_park_10:3; + uint64_t vref_range_01:1; + uint64_t vref_value_01:6; + uint64_t rtt_park_01:3; + uint64_t vref_range_00:1; + uint64_t vref_value_00:6; + uint64_t rtt_park_00:3; + } cn70xxp1; + struct cvmx_lmcx_modereg_params2_s cn73xx; + struct cvmx_lmcx_modereg_params2_s cn78xx; + struct cvmx_lmcx_modereg_params2_s cn78xxp1; + struct cvmx_lmcx_modereg_params2_s cnf75xx; +}; + +/** + * cvmx_lmc#_modereg_params3 + * + * These parameters are written into the DDR4 mode registers. + * + */ +union cvmx_lmcx_modereg_params3 { + u64 u64; + struct cvmx_lmcx_modereg_params3_s { + uint64_t reserved_39_63:25; + uint64_t xrank_add_tccd_l:3; + uint64_t xrank_add_tccd_s:3; + uint64_t mpr_fmt:2; + uint64_t wr_cmd_lat:2; + uint64_t fgrm:3; + uint64_t temp_sense:1; + uint64_t pda:1; + uint64_t gd:1; + uint64_t crc:1; + uint64_t lpasr:2; + uint64_t tccd_l:3; + uint64_t rd_dbi:1; + uint64_t wr_dbi:1; + uint64_t dm:1; + uint64_t ca_par_pers:1; + uint64_t odt_pd:1; + uint64_t par_lat_mode:3; + uint64_t wr_preamble:1; + uint64_t rd_preamble:1; + uint64_t sre_abort:1; + uint64_t cal:3; + uint64_t vref_mon:1; + uint64_t tc_ref:1; + uint64_t max_pd:1; + } s; + struct cvmx_lmcx_modereg_params3_cn70xx { + uint64_t reserved_33_63:31; + uint64_t mpr_fmt:2; + uint64_t wr_cmd_lat:2; + uint64_t fgrm:3; + uint64_t temp_sense:1; + uint64_t pda:1; + uint64_t gd:1; + uint64_t crc:1; + uint64_t lpasr:2; + uint64_t tccd_l:3; + uint64_t rd_dbi:1; + uint64_t wr_dbi:1; + uint64_t dm:1; + uint64_t ca_par_pers:1; + uint64_t odt_pd:1; + uint64_t par_lat_mode:3; + uint64_t wr_preamble:1; + uint64_t rd_preamble:1; + uint64_t sre_abort:1; + uint64_t cal:3; + uint64_t vref_mon:1; + uint64_t tc_ref:1; + uint64_t max_pd:1; + } cn70xx; + struct cvmx_lmcx_modereg_params3_cn70xx cn70xxp1; + struct cvmx_lmcx_modereg_params3_s cn73xx; + struct cvmx_lmcx_modereg_params3_s cn78xx; + struct cvmx_lmcx_modereg_params3_s cn78xxp1; + struct cvmx_lmcx_modereg_params3_s cnf75xx; +}; + +/** + * cvmx_lmc#_mpr_data0 + * + * This register provides bits <63:0> of MPR data register. + * + */ +union cvmx_lmcx_mpr_data0 { + u64 u64; + struct cvmx_lmcx_mpr_data0_s { + uint64_t mpr_data:64; + } s; + struct cvmx_lmcx_mpr_data0_s cn70xx; + struct cvmx_lmcx_mpr_data0_s cn70xxp1; + struct cvmx_lmcx_mpr_data0_s cn73xx; + struct cvmx_lmcx_mpr_data0_s cn78xx; + struct cvmx_lmcx_mpr_data0_s cn78xxp1; + struct cvmx_lmcx_mpr_data0_s cnf75xx; +}; + +/** + * cvmx_lmc#_mpr_data1 + * + * This register provides bits <127:64> of MPR data register. + * + */ +union cvmx_lmcx_mpr_data1 { + u64 u64; + struct cvmx_lmcx_mpr_data1_s { + uint64_t mpr_data:64; + } s; + struct cvmx_lmcx_mpr_data1_s cn70xx; + struct cvmx_lmcx_mpr_data1_s cn70xxp1; + struct cvmx_lmcx_mpr_data1_s cn73xx; + struct cvmx_lmcx_mpr_data1_s cn78xx; + struct cvmx_lmcx_mpr_data1_s cn78xxp1; + struct cvmx_lmcx_mpr_data1_s cnf75xx; +}; + +/** + * cvmx_lmc#_mpr_data2 + * + * This register provides bits <143:128> of MPR data register. + * + */ +union cvmx_lmcx_mpr_data2 { + u64 u64; + struct cvmx_lmcx_mpr_data2_s { + uint64_t reserved_16_63:48; + uint64_t mpr_data:16; + } s; + struct cvmx_lmcx_mpr_data2_s cn70xx; + struct cvmx_lmcx_mpr_data2_s cn70xxp1; + struct cvmx_lmcx_mpr_data2_s cn73xx; + struct cvmx_lmcx_mpr_data2_s cn78xx; + struct cvmx_lmcx_mpr_data2_s cn78xxp1; + struct cvmx_lmcx_mpr_data2_s cnf75xx; +}; + +/** + * cvmx_lmc#_mr_mpr_ctl + * + * This register provides the control functions when programming the MPR + * of DDR4 DRAMs. + * + */ +union cvmx_lmcx_mr_mpr_ctl { + u64 u64; + struct cvmx_lmcx_mr_mpr_ctl_s { + uint64_t reserved_61_63:3; + uint64_t mr_wr_secure_key_ena:1; + uint64_t pba_func_space:3; + uint64_t mr_wr_bg1:1; + uint64_t mpr_sample_dq_enable:1; + uint64_t pda_early_dqx:1; + uint64_t mr_wr_pba_enable:1; + uint64_t mr_wr_use_default_value:1; + uint64_t mpr_whole_byte_enable:1; + uint64_t mpr_byte_select:4; + uint64_t mpr_bit_select:2; + uint64_t mpr_wr:1; + uint64_t mpr_loc:2; + uint64_t mr_wr_pda_enable:1; + uint64_t mr_wr_pda_mask:18; + uint64_t mr_wr_rank:2; + uint64_t mr_wr_sel:3; + uint64_t mr_wr_addr:18; + } s; + struct cvmx_lmcx_mr_mpr_ctl_cn70xx { + uint64_t reserved_52_63:12; + uint64_t mpr_whole_byte_enable:1; + uint64_t mpr_byte_select:4; + uint64_t mpr_bit_select:2; + uint64_t mpr_wr:1; + uint64_t mpr_loc:2; + uint64_t mr_wr_pda_enable:1; + uint64_t mr_wr_pda_mask:18; + uint64_t mr_wr_rank:2; + uint64_t mr_wr_sel:3; + uint64_t mr_wr_addr:18; + } cn70xx; + struct cvmx_lmcx_mr_mpr_ctl_cn70xx cn70xxp1; + struct cvmx_lmcx_mr_mpr_ctl_s cn73xx; + struct cvmx_lmcx_mr_mpr_ctl_s cn78xx; + struct cvmx_lmcx_mr_mpr_ctl_s cn78xxp1; + struct cvmx_lmcx_mr_mpr_ctl_s cnf75xx; +}; + +/** + * cvmx_lmc#_ns_ctl + * + * This register contains control parameters for handling nonsecure accesses. + * + */ +union cvmx_lmcx_ns_ctl { + u64 u64; + struct cvmx_lmcx_ns_ctl_s { + uint64_t reserved_26_63:38; + uint64_t ns_scramble_dis:1; + uint64_t reserved_18_24:7; + uint64_t adr_offset:18; + } s; + struct cvmx_lmcx_ns_ctl_s cn73xx; + struct cvmx_lmcx_ns_ctl_s cn78xx; + struct cvmx_lmcx_ns_ctl_s cnf75xx; +}; + +/** + * cvmx_lmc#_nxm + * + * Following is the decoding for mem_msb/rank: + * 0x0: mem_msb = mem_adr[25]. + * 0x1: mem_msb = mem_adr[26]. + * 0x2: mem_msb = mem_adr[27]. + * 0x3: mem_msb = mem_adr[28]. + * 0x4: mem_msb = mem_adr[29]. + * 0x5: mem_msb = mem_adr[30]. + * 0x6: mem_msb = mem_adr[31]. + * 0x7: mem_msb = mem_adr[32]. + * 0x8: mem_msb = mem_adr[33]. + * 0x9: mem_msb = mem_adr[34]. + * 0xA: mem_msb = mem_adr[35]. + * 0xB: mem_msb = mem_adr[36]. + * 0xC-0xF = Reserved. + * + * For example, for a DIMM made of Samsung's K4B1G0846C-ZCF7 1Gb + * (16M * 8 bit * 8 bank) parts, the column address width = 10; so with + * 10b of col, 3b of bus, 3b of bank, row_lsb = 16. + * Therefore, row = mem_adr[29:16] and mem_msb = 4. + * + * Note also that addresses greater than the max defined space (pbank_msb) + * are also treated as NXM accesses. + */ +union cvmx_lmcx_nxm { + u64 u64; + struct cvmx_lmcx_nxm_s { + uint64_t reserved_40_63:24; + uint64_t mem_msb_d3_r1:4; + uint64_t mem_msb_d3_r0:4; + uint64_t mem_msb_d2_r1:4; + uint64_t mem_msb_d2_r0:4; + uint64_t mem_msb_d1_r1:4; + uint64_t mem_msb_d1_r0:4; + uint64_t mem_msb_d0_r1:4; + uint64_t mem_msb_d0_r0:4; + uint64_t cs_mask:8; + } s; + struct cvmx_lmcx_nxm_cn52xx { + uint64_t reserved_8_63:56; + uint64_t cs_mask:8; + } cn52xx; + struct cvmx_lmcx_nxm_cn52xx cn56xx; + struct cvmx_lmcx_nxm_cn52xx cn58xx; + struct cvmx_lmcx_nxm_s cn61xx; + struct cvmx_lmcx_nxm_s cn63xx; + struct cvmx_lmcx_nxm_s cn63xxp1; + struct cvmx_lmcx_nxm_s cn66xx; + struct cvmx_lmcx_nxm_s cn68xx; + struct cvmx_lmcx_nxm_s cn68xxp1; + struct cvmx_lmcx_nxm_cn70xx { + uint64_t reserved_24_63:40; + uint64_t mem_msb_d1_r1:4; + uint64_t mem_msb_d1_r0:4; + uint64_t mem_msb_d0_r1:4; + uint64_t mem_msb_d0_r0:4; + uint64_t reserved_4_7:4; + uint64_t cs_mask:4; + } cn70xx; + struct cvmx_lmcx_nxm_cn70xx cn70xxp1; + struct cvmx_lmcx_nxm_cn70xx cn73xx; + struct cvmx_lmcx_nxm_cn70xx cn78xx; + struct cvmx_lmcx_nxm_cn70xx cn78xxp1; + struct cvmx_lmcx_nxm_s cnf71xx; + struct cvmx_lmcx_nxm_cn70xx cnf75xx; +}; + +/** + * cvmx_lmc#_nxm_fadr + * + * This register captures only the first transaction with a NXM error while + * an interrupt is pending, and only captures a subsequent event once the + * interrupt is cleared by writing a one to LMC()_INT[NXM_ERR]. It captures + * the actual L2C-LMC address provided to the LMC that caused the NXM error. + * A read or write NXM error is captured only if enabled using the NXM + * event enables. + */ +union cvmx_lmcx_nxm_fadr { + u64 u64; + struct cvmx_lmcx_nxm_fadr_s { + uint64_t reserved_40_63:24; + uint64_t nxm_faddr_ext:1; + uint64_t nxm_src:1; + uint64_t nxm_type:1; + uint64_t nxm_faddr:37; + } s; + struct cvmx_lmcx_nxm_fadr_cn70xx { + uint64_t reserved_39_63:25; + uint64_t nxm_src:1; + uint64_t nxm_type:1; + uint64_t nxm_faddr:37; + } cn70xx; + struct cvmx_lmcx_nxm_fadr_cn70xx cn70xxp1; + struct cvmx_lmcx_nxm_fadr_s cn73xx; + struct cvmx_lmcx_nxm_fadr_s cn78xx; + struct cvmx_lmcx_nxm_fadr_s cn78xxp1; + struct cvmx_lmcx_nxm_fadr_s cnf75xx; +}; + +/** + * cvmx_lmc#_ops_cnt + * + * LMC_OPS_CNT = Performance Counters + * + */ +union cvmx_lmcx_ops_cnt { + u64 u64; + struct cvmx_lmcx_ops_cnt_s { + uint64_t opscnt:64; + } s; + struct cvmx_lmcx_ops_cnt_s cn61xx; + struct cvmx_lmcx_ops_cnt_s cn63xx; + struct cvmx_lmcx_ops_cnt_s cn63xxp1; + struct cvmx_lmcx_ops_cnt_s cn66xx; + struct cvmx_lmcx_ops_cnt_s cn68xx; + struct cvmx_lmcx_ops_cnt_s cn68xxp1; + struct cvmx_lmcx_ops_cnt_s cn70xx; + struct cvmx_lmcx_ops_cnt_s cn70xxp1; + struct cvmx_lmcx_ops_cnt_s cn73xx; + struct cvmx_lmcx_ops_cnt_s cn78xx; + struct cvmx_lmcx_ops_cnt_s cn78xxp1; + struct cvmx_lmcx_ops_cnt_s cnf71xx; + struct cvmx_lmcx_ops_cnt_s cnf75xx; +}; + +/** + * cvmx_lmc#_ops_cnt_hi + * + * LMC_OPS_CNT_HI = Performance Counters + * + */ +union cvmx_lmcx_ops_cnt_hi { + u64 u64; + struct cvmx_lmcx_ops_cnt_hi_s { + uint64_t reserved_32_63:32; + uint64_t opscnt_hi:32; + } s; + struct cvmx_lmcx_ops_cnt_hi_s cn30xx; + struct cvmx_lmcx_ops_cnt_hi_s cn31xx; + struct cvmx_lmcx_ops_cnt_hi_s cn38xx; + struct cvmx_lmcx_ops_cnt_hi_s cn38xxp2; + struct cvmx_lmcx_ops_cnt_hi_s cn50xx; + struct cvmx_lmcx_ops_cnt_hi_s cn52xx; + struct cvmx_lmcx_ops_cnt_hi_s cn52xxp1; + struct cvmx_lmcx_ops_cnt_hi_s cn56xx; + struct cvmx_lmcx_ops_cnt_hi_s cn56xxp1; + struct cvmx_lmcx_ops_cnt_hi_s cn58xx; + struct cvmx_lmcx_ops_cnt_hi_s cn58xxp1; +}; + +/** + * cvmx_lmc#_ops_cnt_lo + * + * LMC_OPS_CNT_LO = Performance Counters + * + */ +union cvmx_lmcx_ops_cnt_lo { + u64 u64; + struct cvmx_lmcx_ops_cnt_lo_s { + uint64_t reserved_32_63:32; + uint64_t opscnt_lo:32; + } s; + struct cvmx_lmcx_ops_cnt_lo_s cn30xx; + struct cvmx_lmcx_ops_cnt_lo_s cn31xx; + struct cvmx_lmcx_ops_cnt_lo_s cn38xx; + struct cvmx_lmcx_ops_cnt_lo_s cn38xxp2; + struct cvmx_lmcx_ops_cnt_lo_s cn50xx; + struct cvmx_lmcx_ops_cnt_lo_s cn52xx; + struct cvmx_lmcx_ops_cnt_lo_s cn52xxp1; + struct cvmx_lmcx_ops_cnt_lo_s cn56xx; + struct cvmx_lmcx_ops_cnt_lo_s cn56xxp1; + struct cvmx_lmcx_ops_cnt_lo_s cn58xx; + struct cvmx_lmcx_ops_cnt_lo_s cn58xxp1; +}; + +/** + * cvmx_lmc#_phy_ctl + * + * LMC_PHY_CTL = LMC PHY Control + * + */ +union cvmx_lmcx_phy_ctl { + u64 u64; + struct cvmx_lmcx_phy_ctl_s { + uint64_t reserved_61_63:3; + uint64_t dsk_dbg_load_dis:1; + uint64_t dsk_dbg_overwrt_ena:1; + uint64_t dsk_dbg_wr_mode:1; + uint64_t data_rate_loopback:1; + uint64_t dq_shallow_loopback:1; + uint64_t dm_disable:1; + uint64_t c1_sel:2; + uint64_t c0_sel:2; + uint64_t phy_reset:1; + uint64_t dsk_dbg_rd_complete:1; + uint64_t dsk_dbg_rd_data:10; + uint64_t dsk_dbg_rd_start:1; + uint64_t dsk_dbg_clk_scaler:2; + uint64_t dsk_dbg_offset:2; + uint64_t dsk_dbg_num_bits_sel:1; + uint64_t dsk_dbg_byte_sel:4; + uint64_t dsk_dbg_bit_sel:4; + uint64_t dbi_mode_ena:1; + uint64_t ddr_error_n_ena:1; + uint64_t ref_pin_on:1; + uint64_t dac_on:1; + uint64_t int_pad_loopback_ena:1; + uint64_t int_phy_loopback_ena:1; + uint64_t phy_dsk_reset:1; + uint64_t phy_dsk_byp:1; + uint64_t phy_pwr_save_disable:1; + uint64_t ten:1; + uint64_t rx_always_on:1; + uint64_t lv_mode:1; + uint64_t ck_tune1:1; + uint64_t ck_dlyout1:4; + uint64_t ck_tune0:1; + uint64_t ck_dlyout0:4; + uint64_t loopback:1; + uint64_t loopback_pos:1; + uint64_t ts_stagger:1; + } s; + struct cvmx_lmcx_phy_ctl_cn61xx { + uint64_t reserved_15_63:49; + uint64_t rx_always_on:1; + uint64_t lv_mode:1; + uint64_t ck_tune1:1; + uint64_t ck_dlyout1:4; + uint64_t ck_tune0:1; + uint64_t ck_dlyout0:4; + uint64_t loopback:1; + uint64_t loopback_pos:1; + uint64_t ts_stagger:1; + } cn61xx; + struct cvmx_lmcx_phy_ctl_cn61xx cn63xx; + struct cvmx_lmcx_phy_ctl_cn63xxp1 { + uint64_t reserved_14_63:50; + uint64_t lv_mode:1; + uint64_t ck_tune1:1; + uint64_t ck_dlyout1:4; + uint64_t ck_tune0:1; + uint64_t ck_dlyout0:4; + uint64_t loopback:1; + uint64_t loopback_pos:1; + uint64_t ts_stagger:1; + } cn63xxp1; + struct cvmx_lmcx_phy_ctl_cn61xx cn66xx; + struct cvmx_lmcx_phy_ctl_cn61xx cn68xx; + struct cvmx_lmcx_phy_ctl_cn61xx cn68xxp1; + struct cvmx_lmcx_phy_ctl_cn70xx { + uint64_t reserved_51_63:13; + uint64_t phy_reset:1; + uint64_t dsk_dbg_rd_complete:1; + uint64_t dsk_dbg_rd_data:10; + uint64_t dsk_dbg_rd_start:1; + uint64_t dsk_dbg_clk_scaler:2; + uint64_t dsk_dbg_offset:2; + uint64_t dsk_dbg_num_bits_sel:1; + uint64_t dsk_dbg_byte_sel:4; + uint64_t dsk_dbg_bit_sel:4; + uint64_t dbi_mode_ena:1; + uint64_t ddr_error_n_ena:1; + uint64_t ref_pin_on:1; + uint64_t dac_on:1; + uint64_t int_pad_loopback_ena:1; + uint64_t int_phy_loopback_ena:1; + uint64_t phy_dsk_reset:1; + uint64_t phy_dsk_byp:1; + uint64_t phy_pwr_save_disable:1; + uint64_t ten:1; + uint64_t rx_always_on:1; + uint64_t lv_mode:1; + uint64_t ck_tune1:1; + uint64_t ck_dlyout1:4; + uint64_t ck_tune0:1; + uint64_t ck_dlyout0:4; + uint64_t loopback:1; + uint64_t loopback_pos:1; + uint64_t ts_stagger:1; + } cn70xx; + struct cvmx_lmcx_phy_ctl_cn70xx cn70xxp1; + struct cvmx_lmcx_phy_ctl_cn73xx { + uint64_t reserved_58_63:6; + uint64_t data_rate_loopback:1; + uint64_t dq_shallow_loopback:1; + uint64_t dm_disable:1; + uint64_t c1_sel:2; + uint64_t c0_sel:2; + uint64_t phy_reset:1; + uint64_t dsk_dbg_rd_complete:1; + uint64_t dsk_dbg_rd_data:10; + uint64_t dsk_dbg_rd_start:1; + uint64_t dsk_dbg_clk_scaler:2; + uint64_t dsk_dbg_offset:2; + uint64_t dsk_dbg_num_bits_sel:1; + uint64_t dsk_dbg_byte_sel:4; + uint64_t dsk_dbg_bit_sel:4; + uint64_t dbi_mode_ena:1; + uint64_t ddr_error_n_ena:1; + uint64_t ref_pin_on:1; + uint64_t dac_on:1; + uint64_t int_pad_loopback_ena:1; + uint64_t int_phy_loopback_ena:1; + uint64_t phy_dsk_reset:1; + uint64_t phy_dsk_byp:1; + uint64_t phy_pwr_save_disable:1; + uint64_t ten:1; + uint64_t rx_always_on:1; + uint64_t lv_mode:1; + uint64_t ck_tune1:1; + uint64_t ck_dlyout1:4; + uint64_t ck_tune0:1; + uint64_t ck_dlyout0:4; + uint64_t loopback:1; + uint64_t loopback_pos:1; + uint64_t ts_stagger:1; + } cn73xx; + struct cvmx_lmcx_phy_ctl_s cn78xx; + struct cvmx_lmcx_phy_ctl_s cn78xxp1; + struct cvmx_lmcx_phy_ctl_cn61xx cnf71xx; + struct cvmx_lmcx_phy_ctl_s cnf75xx; +}; + +/** + * cvmx_lmc#_phy_ctl2 + */ +union cvmx_lmcx_phy_ctl2 { + u64 u64; + struct cvmx_lmcx_phy_ctl2_s { + uint64_t reserved_27_63:37; + uint64_t dqs8_dsk_adj:3; + uint64_t dqs7_dsk_adj:3; + uint64_t dqs6_dsk_adj:3; + uint64_t dqs5_dsk_adj:3; + uint64_t dqs4_dsk_adj:3; + uint64_t dqs3_dsk_adj:3; + uint64_t dqs2_dsk_adj:3; + uint64_t dqs1_dsk_adj:3; + uint64_t dqs0_dsk_adj:3; + } s; + struct cvmx_lmcx_phy_ctl2_s cn78xx; + struct cvmx_lmcx_phy_ctl2_s cnf75xx; +}; + +/** + * cvmx_lmc#_pll_bwctl + * + * LMC_PLL_BWCTL = DDR PLL Bandwidth Control Register + * + */ +union cvmx_lmcx_pll_bwctl { + u64 u64; + struct cvmx_lmcx_pll_bwctl_s { + uint64_t reserved_5_63:59; + uint64_t bwupd:1; + uint64_t bwctl:4; + } s; + struct cvmx_lmcx_pll_bwctl_s cn30xx; + struct cvmx_lmcx_pll_bwctl_s cn31xx; + struct cvmx_lmcx_pll_bwctl_s cn38xx; + struct cvmx_lmcx_pll_bwctl_s cn38xxp2; +}; + +/** + * cvmx_lmc#_pll_ctl + * + * LMC_PLL_CTL = LMC pll control + * + * + * Notes: + * This CSR is only relevant for LMC0. LMC1_PLL_CTL is not used. + * + * Exactly one of EN2, EN4, EN6, EN8, EN12, EN16 must be set. + * + * The resultant DDR_CK frequency is the DDR2_REF_CLK + * frequency multiplied by: + * + * (CLKF + 1) / ((CLKR + 1) * EN(2,4,6,8,12,16)) + * + * The PLL frequency, which is: + * + * (DDR2_REF_CLK freq) * ((CLKF + 1) / (CLKR + 1)) + * + * must reside between 1.2 and 2.5 GHz. A faster PLL frequency is + * desirable if there is a choice. + */ +union cvmx_lmcx_pll_ctl { + u64 u64; + struct cvmx_lmcx_pll_ctl_s { + uint64_t reserved_30_63:34; + uint64_t bypass:1; + uint64_t fasten_n:1; + uint64_t div_reset:1; + uint64_t reset_n:1; + uint64_t clkf:12; + uint64_t clkr:6; + uint64_t reserved_6_7:2; + uint64_t en16:1; + uint64_t en12:1; + uint64_t en8:1; + uint64_t en6:1; + uint64_t en4:1; + uint64_t en2:1; + } s; + struct cvmx_lmcx_pll_ctl_cn50xx { + uint64_t reserved_29_63:35; + uint64_t fasten_n:1; + uint64_t div_reset:1; + uint64_t reset_n:1; + uint64_t clkf:12; + uint64_t clkr:6; + uint64_t reserved_6_7:2; + uint64_t en16:1; + uint64_t en12:1; + uint64_t en8:1; + uint64_t en6:1; + uint64_t en4:1; + uint64_t en2:1; + } cn50xx; + struct cvmx_lmcx_pll_ctl_s cn52xx; + struct cvmx_lmcx_pll_ctl_s cn52xxp1; + struct cvmx_lmcx_pll_ctl_cn50xx cn56xx; + struct cvmx_lmcx_pll_ctl_cn56xxp1 { + uint64_t reserved_28_63:36; + uint64_t div_reset:1; + uint64_t reset_n:1; + uint64_t clkf:12; + uint64_t clkr:6; + uint64_t reserved_6_7:2; + uint64_t en16:1; + uint64_t en12:1; + uint64_t en8:1; + uint64_t en6:1; + uint64_t en4:1; + uint64_t en2:1; + } cn56xxp1; + struct cvmx_lmcx_pll_ctl_cn56xxp1 cn58xx; + struct cvmx_lmcx_pll_ctl_cn56xxp1 cn58xxp1; +}; + +/** + * cvmx_lmc#_pll_status + * + * LMC_PLL_STATUS = LMC pll status + * + */ +union cvmx_lmcx_pll_status { + u64 u64; + struct cvmx_lmcx_pll_status_s { + uint64_t reserved_32_63:32; + uint64_t ddr__nctl:5; + uint64_t ddr__pctl:5; + uint64_t reserved_2_21:20; + uint64_t rfslip:1; + uint64_t fbslip:1; + } s; + struct cvmx_lmcx_pll_status_s cn50xx; + struct cvmx_lmcx_pll_status_s cn52xx; + struct cvmx_lmcx_pll_status_s cn52xxp1; + struct cvmx_lmcx_pll_status_s cn56xx; + struct cvmx_lmcx_pll_status_s cn56xxp1; + struct cvmx_lmcx_pll_status_s cn58xx; + struct cvmx_lmcx_pll_status_cn58xxp1 { + uint64_t reserved_2_63:62; + uint64_t rfslip:1; + uint64_t fbslip:1; + } cn58xxp1; +}; + +/** + * cvmx_lmc#_ppr_ctl + * + * This register contains programmable timing and control parameters used + * when running the post package repair sequence. The timing fields + * PPR_CTL[TPGMPST], PPR_CTL[TPGM_EXIT] and PPR_CTL[TPGM] need to be set as + * to satisfy the minimum values mentioned in the JEDEC DDR4 spec before + * running the PPR sequence. See LMC()_SEQ_CTL[SEQ_SEL,INIT_START] to run + * the PPR sequence. + * + * Running hard PPR may require LMC to issue security key as four consecutive + * MR0 commands, each with a unique address field A[17:0]. Set the security + * key in the general purpose CSRs as follows: + * + * _ Security key 0 = LMC()_GENERAL_PURPOSE0[DATA]<17:0>. + * _ Security key 1 = LMC()_GENERAL_PURPOSE0[DATA]<35:18>. + * _ Security key 2 = LMC()_GENERAL_PURPOSE1[DATA]<17:0>. + * _ Security key 3 = LMC()_GENERAL_PURPOSE1[DATA]<35:18>. + */ +union cvmx_lmcx_ppr_ctl { + u64 u64; + struct cvmx_lmcx_ppr_ctl_s { + uint64_t reserved_27_63:37; + uint64_t lrank_sel:3; + uint64_t skip_issue_security:1; + uint64_t sppr:1; + uint64_t tpgm:10; + uint64_t tpgm_exit:5; + uint64_t tpgmpst:7; + } s; + struct cvmx_lmcx_ppr_ctl_cn73xx { + uint64_t reserved_24_63:40; + uint64_t skip_issue_security:1; + uint64_t sppr:1; + uint64_t tpgm:10; + uint64_t tpgm_exit:5; + uint64_t tpgmpst:7; + } cn73xx; + struct cvmx_lmcx_ppr_ctl_s cn78xx; + struct cvmx_lmcx_ppr_ctl_cn73xx cnf75xx; +}; + +/** + * cvmx_lmc#_read_level_ctl + * + * Notes: + * The HW writes and reads the cache block selected by ROW, COL, BNK and + * the rank as part of a read-leveling sequence for a rank. + * A cache block write is 16 72-bit words. PATTERN selects the write value. + * For the first 8 words, the write value is the bit PATTERN duplicated + * into a 72-bit vector. The write value of the last 8 words is the inverse + * of the write value of the first 8 words. See LMC*_READ_LEVEL_RANK*. + */ +union cvmx_lmcx_read_level_ctl { + u64 u64; + struct cvmx_lmcx_read_level_ctl_s { + uint64_t reserved_44_63:20; + uint64_t rankmask:4; + uint64_t pattern:8; + uint64_t row:16; + uint64_t col:12; + uint64_t reserved_3_3:1; + uint64_t bnk:3; + } s; + struct cvmx_lmcx_read_level_ctl_s cn52xx; + struct cvmx_lmcx_read_level_ctl_s cn52xxp1; + struct cvmx_lmcx_read_level_ctl_s cn56xx; + struct cvmx_lmcx_read_level_ctl_s cn56xxp1; +}; + +/** + * cvmx_lmc#_read_level_dbg + * + * Notes: + * A given read of LMC*_READ_LEVEL_DBG returns the read-leveling pass/fail + * results for all possible delay settings (i.e. the BITMASK) for only one + * byte in the last rank that the HW read-leveled. + * LMC*_READ_LEVEL_DBG[BYTE] selects the particular byte. + * To get these pass/fail results for another different rank, you must run + * the hardware read-leveling again. For example, it is possible to get the + * BITMASK results for every byte of every rank if you run read-leveling + * separately for each rank, probing LMC*_READ_LEVEL_DBG between each + * read-leveling. + */ +union cvmx_lmcx_read_level_dbg { + u64 u64; + struct cvmx_lmcx_read_level_dbg_s { + uint64_t reserved_32_63:32; + uint64_t bitmask:16; + uint64_t reserved_4_15:12; + uint64_t byte:4; + } s; + struct cvmx_lmcx_read_level_dbg_s cn52xx; + struct cvmx_lmcx_read_level_dbg_s cn52xxp1; + struct cvmx_lmcx_read_level_dbg_s cn56xx; + struct cvmx_lmcx_read_level_dbg_s cn56xxp1; +}; + +/** + * cvmx_lmc#_read_level_rank# + * + * Notes: + * This is four CSRs per LMC, one per each rank. + * Each CSR is written by HW during a read-leveling sequence for the rank. + * (HW sets STATUS==3 after HW read-leveling completes for the rank.) + * Each CSR may also be written by SW, but not while a read-leveling sequence + * is in progress. (HW sets STATUS==1 after a CSR write.) + * Deskew setting is measured in units of 1/4 DCLK, so the above BYTE* + * values can range over 4 DCLKs. + * SW initiates a HW read-leveling sequence by programming + * LMC*_READ_LEVEL_CTL and writing INIT_START=1 with SEQUENCE=1. + * See LMC*_READ_LEVEL_CTL. + */ +union cvmx_lmcx_read_level_rankx { + u64 u64; + struct cvmx_lmcx_read_level_rankx_s { + uint64_t reserved_38_63:26; + uint64_t status:2; + uint64_t byte8:4; + uint64_t byte7:4; + uint64_t byte6:4; + uint64_t byte5:4; + uint64_t byte4:4; + uint64_t byte3:4; + uint64_t byte2:4; + uint64_t byte1:4; + uint64_t byte0:4; + } s; + struct cvmx_lmcx_read_level_rankx_s cn52xx; + struct cvmx_lmcx_read_level_rankx_s cn52xxp1; + struct cvmx_lmcx_read_level_rankx_s cn56xx; + struct cvmx_lmcx_read_level_rankx_s cn56xxp1; +}; + +/** + * cvmx_lmc#_ref_status + * + * This register contains the status of the refresh pending counter. + * + */ +union cvmx_lmcx_ref_status { + u64 u64; + struct cvmx_lmcx_ref_status_s { + uint64_t reserved_4_63:60; + uint64_t ref_pend_max_clr:1; + uint64_t ref_count:3; + } s; + struct cvmx_lmcx_ref_status_s cn73xx; + struct cvmx_lmcx_ref_status_s cn78xx; + struct cvmx_lmcx_ref_status_s cnf75xx; +}; + +/** + * cvmx_lmc#_reset_ctl + * + * Specify the RSL base addresses for the block. + * + */ +union cvmx_lmcx_reset_ctl { + u64 u64; + struct cvmx_lmcx_reset_ctl_s { + uint64_t reserved_4_63:60; + uint64_t ddr3psv:1; + uint64_t ddr3psoft:1; + uint64_t ddr3pwarm:1; + uint64_t ddr3rst:1; + } s; + struct cvmx_lmcx_reset_ctl_s cn61xx; + struct cvmx_lmcx_reset_ctl_s cn63xx; + struct cvmx_lmcx_reset_ctl_s cn63xxp1; + struct cvmx_lmcx_reset_ctl_s cn66xx; + struct cvmx_lmcx_reset_ctl_s cn68xx; + struct cvmx_lmcx_reset_ctl_s cn68xxp1; + struct cvmx_lmcx_reset_ctl_s cn70xx; + struct cvmx_lmcx_reset_ctl_s cn70xxp1; + struct cvmx_lmcx_reset_ctl_s cn73xx; + struct cvmx_lmcx_reset_ctl_s cn78xx; + struct cvmx_lmcx_reset_ctl_s cn78xxp1; + struct cvmx_lmcx_reset_ctl_s cnf71xx; + struct cvmx_lmcx_reset_ctl_s cnf75xx; +}; + +/** + * cvmx_lmc#_retry_config + * + * This register configures automatic retry operation. + * + */ +union cvmx_lmcx_retry_config { + u64 u64; + struct cvmx_lmcx_retry_config_s { + uint64_t reserved_56_63:8; + uint64_t max_errors:24; + uint64_t reserved_13_31:19; + uint64_t error_continue:1; + uint64_t reserved_9_11:3; + uint64_t auto_error_continue:1; + uint64_t reserved_5_7:3; + uint64_t pulse_count_auto_clr:1; + uint64_t reserved_1_3:3; + uint64_t retry_enable:1; + } s; + struct cvmx_lmcx_retry_config_s cn73xx; + struct cvmx_lmcx_retry_config_s cn78xx; + struct cvmx_lmcx_retry_config_s cnf75xx; +}; + +/** + * cvmx_lmc#_retry_status + * + * This register provides status on automatic retry operation. + * + */ +union cvmx_lmcx_retry_status { + u64 u64; + struct cvmx_lmcx_retry_status_s { + uint64_t clear_error_count:1; + uint64_t clear_error_pulse_count:1; + uint64_t reserved_57_61:5; + uint64_t error_pulse_count_valid:1; + uint64_t error_pulse_count_sat:1; + uint64_t reserved_52_54:3; + uint64_t error_pulse_count:4; + uint64_t reserved_45_47:3; + uint64_t error_sequence:5; + uint64_t reserved_33_39:7; + uint64_t error_type:1; + uint64_t reserved_24_31:8; + uint64_t error_count:24; + } s; + struct cvmx_lmcx_retry_status_s cn73xx; + struct cvmx_lmcx_retry_status_s cn78xx; + struct cvmx_lmcx_retry_status_s cnf75xx; +}; + +/** + * cvmx_lmc#_rlevel_ctl + */ +union cvmx_lmcx_rlevel_ctl { + u64 u64; + struct cvmx_lmcx_rlevel_ctl_s { + uint64_t reserved_33_63:31; + uint64_t tccd_sel:1; + uint64_t pattern:8; + uint64_t reserved_22_23:2; + uint64_t delay_unload_3:1; + uint64_t delay_unload_2:1; + uint64_t delay_unload_1:1; + uint64_t delay_unload_0:1; + uint64_t bitmask:8; + uint64_t or_dis:1; + uint64_t offset_en:1; + uint64_t offset:4; + uint64_t byte:4; + } s; + struct cvmx_lmcx_rlevel_ctl_cn61xx { + uint64_t reserved_22_63:42; + uint64_t delay_unload_3:1; + uint64_t delay_unload_2:1; + uint64_t delay_unload_1:1; + uint64_t delay_unload_0:1; + uint64_t bitmask:8; + uint64_t or_dis:1; + uint64_t offset_en:1; + uint64_t offset:4; + uint64_t byte:4; + } cn61xx; + struct cvmx_lmcx_rlevel_ctl_cn61xx cn63xx; + struct cvmx_lmcx_rlevel_ctl_cn63xxp1 { + uint64_t reserved_9_63:55; + uint64_t offset_en:1; + uint64_t offset:4; + uint64_t byte:4; + } cn63xxp1; + struct cvmx_lmcx_rlevel_ctl_cn61xx cn66xx; + struct cvmx_lmcx_rlevel_ctl_cn61xx cn68xx; + struct cvmx_lmcx_rlevel_ctl_cn61xx cn68xxp1; + struct cvmx_lmcx_rlevel_ctl_cn70xx { + uint64_t reserved_32_63:32; + uint64_t pattern:8; + uint64_t reserved_22_23:2; + uint64_t delay_unload_3:1; + uint64_t delay_unload_2:1; + uint64_t delay_unload_1:1; + uint64_t delay_unload_0:1; + uint64_t bitmask:8; + uint64_t or_dis:1; + uint64_t offset_en:1; + uint64_t offset:4; + uint64_t byte:4; + } cn70xx; + struct cvmx_lmcx_rlevel_ctl_cn70xx cn70xxp1; + struct cvmx_lmcx_rlevel_ctl_cn70xx cn73xx; + struct cvmx_lmcx_rlevel_ctl_s cn78xx; + struct cvmx_lmcx_rlevel_ctl_s cn78xxp1; + struct cvmx_lmcx_rlevel_ctl_cn61xx cnf71xx; + struct cvmx_lmcx_rlevel_ctl_s cnf75xx; +}; + +/** + * cvmx_lmc#_rlevel_dbg + * + * A given read of LMC()_RLEVEL_DBG returns the read leveling pass/fail + * results for all possible delay settings (i.e. the BITMASK) for only + * one byte in the last rank that the hardware ran read leveling on. + * LMC()_RLEVEL_CTL[BYTE] selects the particular byte. To get these + * pass/fail results for a different rank, you must run the hardware + * read leveling again. For example, it is possible to get the [BITMASK] + * results for every byte of every rank if you run read leveling separately + * for each rank, probing LMC()_RLEVEL_DBG between each read- leveling. + */ +union cvmx_lmcx_rlevel_dbg { + u64 u64; + struct cvmx_lmcx_rlevel_dbg_s { + uint64_t bitmask:64; + } s; + struct cvmx_lmcx_rlevel_dbg_s cn61xx; + struct cvmx_lmcx_rlevel_dbg_s cn63xx; + struct cvmx_lmcx_rlevel_dbg_s cn63xxp1; + struct cvmx_lmcx_rlevel_dbg_s cn66xx; + struct cvmx_lmcx_rlevel_dbg_s cn68xx; + struct cvmx_lmcx_rlevel_dbg_s cn68xxp1; + struct cvmx_lmcx_rlevel_dbg_s cn70xx; + struct cvmx_lmcx_rlevel_dbg_s cn70xxp1; + struct cvmx_lmcx_rlevel_dbg_s cn73xx; + struct cvmx_lmcx_rlevel_dbg_s cn78xx; + struct cvmx_lmcx_rlevel_dbg_s cn78xxp1; + struct cvmx_lmcx_rlevel_dbg_s cnf71xx; + struct cvmx_lmcx_rlevel_dbg_s cnf75xx; +}; + +/** + * cvmx_lmc#_rlevel_rank# + * + * Four of these CSRs exist per LMC, one for each rank. Read level setting + * is measured in units of 1/4 CK, so the BYTEn values can range over 16 CK + * cycles. Each CSR is written by hardware during a read leveling sequence + * for the rank. (Hardware sets [STATUS] to 3 after hardware read leveling + * completes for the rank.) + * + * If hardware is unable to find a match per LMC()_RLEVEL_CTL[OFFSET_EN] and + * LMC()_RLEVEL_CTL[OFFSET], then hardware sets + * LMC()_RLEVEL_RANK()[BYTEn<5:0>] to 0x0. + * + * Each CSR may also be written by software, but not while a read leveling + * sequence is in progress. (Hardware sets [STATUS] to 1 after a CSR write.) + * Software initiates a hardware read leveling sequence by programming + * LMC()_RLEVEL_CTL and writing [INIT_START] = 1 with [SEQ_SEL]=1. + * See LMC()_RLEVEL_CTL. + * + * LMC()_RLEVEL_RANKi values for ranks i without attached DRAM should be set + * such that they do not increase the range of possible BYTE values for any + * byte lane. The easiest way to do this is to set LMC()_RLEVEL_RANKi = + * LMC()_RLEVEL_RANKj, where j is some rank with attached DRAM whose + * LMC()_RLEVEL_RANKj is already fully initialized. + */ +union cvmx_lmcx_rlevel_rankx { + u64 u64; + struct cvmx_lmcx_rlevel_rankx_s { + uint64_t reserved_56_63:8; + uint64_t status:2; + uint64_t byte8:6; + uint64_t byte7:6; + uint64_t byte6:6; + uint64_t byte5:6; + uint64_t byte4:6; + uint64_t byte3:6; + uint64_t byte2:6; + uint64_t byte1:6; + uint64_t byte0:6; + } s; + struct cvmx_lmcx_rlevel_rankx_s cn61xx; + struct cvmx_lmcx_rlevel_rankx_s cn63xx; + struct cvmx_lmcx_rlevel_rankx_s cn63xxp1; + struct cvmx_lmcx_rlevel_rankx_s cn66xx; + struct cvmx_lmcx_rlevel_rankx_s cn68xx; + struct cvmx_lmcx_rlevel_rankx_s cn68xxp1; + struct cvmx_lmcx_rlevel_rankx_s cn70xx; + struct cvmx_lmcx_rlevel_rankx_s cn70xxp1; + struct cvmx_lmcx_rlevel_rankx_s cn73xx; + struct cvmx_lmcx_rlevel_rankx_s cn78xx; + struct cvmx_lmcx_rlevel_rankx_s cn78xxp1; + struct cvmx_lmcx_rlevel_rankx_s cnf71xx; + struct cvmx_lmcx_rlevel_rankx_s cnf75xx; +}; + +/** + * cvmx_lmc#_rodt_comp_ctl + * + * LMC_RODT_COMP_CTL = LMC Compensation control + * + */ +union cvmx_lmcx_rodt_comp_ctl { + u64 u64; + struct cvmx_lmcx_rodt_comp_ctl_s { + uint64_t reserved_17_63:47; + uint64_t enable:1; + uint64_t reserved_12_15:4; + uint64_t nctl:4; + uint64_t reserved_5_7:3; + uint64_t pctl:5; + } s; + struct cvmx_lmcx_rodt_comp_ctl_s cn50xx; + struct cvmx_lmcx_rodt_comp_ctl_s cn52xx; + struct cvmx_lmcx_rodt_comp_ctl_s cn52xxp1; + struct cvmx_lmcx_rodt_comp_ctl_s cn56xx; + struct cvmx_lmcx_rodt_comp_ctl_s cn56xxp1; + struct cvmx_lmcx_rodt_comp_ctl_s cn58xx; + struct cvmx_lmcx_rodt_comp_ctl_s cn58xxp1; +}; + +/** + * cvmx_lmc#_rodt_ctl + * + * LMC_RODT_CTL = Obsolete LMC Read OnDieTermination control + * See the description in LMC_WODT_CTL1. On Reads, Octeon only supports + * turning on ODT's in the lower 2 DIMM's with the masks as below. + * + * Notes: + * When a given RANK in position N is selected, the RODT _HI and _LO masks + * for that position are used. + * Mask[3:0] is used for RODT control of the RANKs in positions 3, 2, 1, + * and 0, respectively. + * In 64b mode, DIMMs are assumed to be ordered in the following order: + * position 3: [unused , DIMM1_RANK1_LO] + * position 2: [unused , DIMM1_RANK0_LO] + * position 1: [unused , DIMM0_RANK1_LO] + * position 0: [unused , DIMM0_RANK0_LO] + * In 128b mode, DIMMs are assumed to be ordered in the following order: + * position 3: [DIMM3_RANK1_HI, DIMM1_RANK1_LO] + * position 2: [DIMM3_RANK0_HI, DIMM1_RANK0_LO] + * position 1: [DIMM2_RANK1_HI, DIMM0_RANK1_LO] + * position 0: [DIMM2_RANK0_HI, DIMM0_RANK0_LO] + */ +union cvmx_lmcx_rodt_ctl { + u64 u64; + struct cvmx_lmcx_rodt_ctl_s { + uint64_t reserved_32_63:32; + uint64_t rodt_hi3:4; + uint64_t rodt_hi2:4; + uint64_t rodt_hi1:4; + uint64_t rodt_hi0:4; + uint64_t rodt_lo3:4; + uint64_t rodt_lo2:4; + uint64_t rodt_lo1:4; + uint64_t rodt_lo0:4; + } s; + struct cvmx_lmcx_rodt_ctl_s cn30xx; + struct cvmx_lmcx_rodt_ctl_s cn31xx; + struct cvmx_lmcx_rodt_ctl_s cn38xx; + struct cvmx_lmcx_rodt_ctl_s cn38xxp2; + struct cvmx_lmcx_rodt_ctl_s cn50xx; + struct cvmx_lmcx_rodt_ctl_s cn52xx; + struct cvmx_lmcx_rodt_ctl_s cn52xxp1; + struct cvmx_lmcx_rodt_ctl_s cn56xx; + struct cvmx_lmcx_rodt_ctl_s cn56xxp1; + struct cvmx_lmcx_rodt_ctl_s cn58xx; + struct cvmx_lmcx_rodt_ctl_s cn58xxp1; +}; + +/** + * cvmx_lmc#_rodt_mask + * + * System designers may desire to terminate DQ/DQS lines for higher frequency + * DDR operations, especially on a multirank system. DDR3 DQ/DQS I/Os have + * built-in termination resistors that can be turned on or off by the + * controller, after meeting TAOND and TAOF timing requirements. + * + * Each rank has its own ODT pin that fans out to all the memory parts in + * that DIMM. System designers may prefer different combinations of ODT ONs + * for read operations into different ranks. CNXXXX supports full + * programmability by way of the mask register below. Each rank position has + * its own 4-bit programmable field. When the controller does a read to that + * rank, it sets the 4 ODT pins to the MASK pins below. For example, when + * doing a read from Rank0, a system designer may desire to terminate the + * lines with the resistor on DIMM0/Rank1. The mask [RODT_D0_R0] would then + * be [0010]. + * + * CNXXXX drives the appropriate mask values on the ODT pins by default. + * If this feature is not required, write 0x0 in this register. Note that, + * as per the JEDEC DDR3 specifications, the ODT pin for the rank that is + * being read should always be 0x0. When a given RANK is selected, the RODT + * mask for that rank is used. The resulting RODT mask is driven to the + * DIMMs in the following manner: + */ +union cvmx_lmcx_rodt_mask { + u64 u64; + struct cvmx_lmcx_rodt_mask_s { + uint64_t rodt_d3_r1:8; + uint64_t rodt_d3_r0:8; + uint64_t rodt_d2_r1:8; + uint64_t rodt_d2_r0:8; + uint64_t rodt_d1_r1:8; + uint64_t rodt_d1_r0:8; + uint64_t rodt_d0_r1:8; + uint64_t rodt_d0_r0:8; + } s; + struct cvmx_lmcx_rodt_mask_s cn61xx; + struct cvmx_lmcx_rodt_mask_s cn63xx; + struct cvmx_lmcx_rodt_mask_s cn63xxp1; + struct cvmx_lmcx_rodt_mask_s cn66xx; + struct cvmx_lmcx_rodt_mask_s cn68xx; + struct cvmx_lmcx_rodt_mask_s cn68xxp1; + struct cvmx_lmcx_rodt_mask_cn70xx { + uint64_t reserved_28_63:36; + uint64_t rodt_d1_r1:4; + uint64_t reserved_20_23:4; + uint64_t rodt_d1_r0:4; + uint64_t reserved_12_15:4; + uint64_t rodt_d0_r1:4; + uint64_t reserved_4_7:4; + uint64_t rodt_d0_r0:4; + } cn70xx; + struct cvmx_lmcx_rodt_mask_cn70xx cn70xxp1; + struct cvmx_lmcx_rodt_mask_cn70xx cn73xx; + struct cvmx_lmcx_rodt_mask_cn70xx cn78xx; + struct cvmx_lmcx_rodt_mask_cn70xx cn78xxp1; + struct cvmx_lmcx_rodt_mask_s cnf71xx; + struct cvmx_lmcx_rodt_mask_cn70xx cnf75xx; +}; + +/** + * cvmx_lmc#_scramble_cfg0 + * + * LMC_SCRAMBLE_CFG0 = LMC Scramble Config0 + * + */ +union cvmx_lmcx_scramble_cfg0 { + u64 u64; + struct cvmx_lmcx_scramble_cfg0_s { + uint64_t key:64; + } s; + struct cvmx_lmcx_scramble_cfg0_s cn61xx; + struct cvmx_lmcx_scramble_cfg0_s cn66xx; + struct cvmx_lmcx_scramble_cfg0_s cn70xx; + struct cvmx_lmcx_scramble_cfg0_s cn70xxp1; + struct cvmx_lmcx_scramble_cfg0_s cn73xx; + struct cvmx_lmcx_scramble_cfg0_s cn78xx; + struct cvmx_lmcx_scramble_cfg0_s cn78xxp1; + struct cvmx_lmcx_scramble_cfg0_s cnf71xx; + struct cvmx_lmcx_scramble_cfg0_s cnf75xx; +}; + +/** + * cvmx_lmc#_scramble_cfg1 + * + * These registers set the aliasing that uses the lowest, legal chip select(s). + * + */ +union cvmx_lmcx_scramble_cfg1 { + u64 u64; + struct cvmx_lmcx_scramble_cfg1_s { + uint64_t key:64; + } s; + struct cvmx_lmcx_scramble_cfg1_s cn61xx; + struct cvmx_lmcx_scramble_cfg1_s cn66xx; + struct cvmx_lmcx_scramble_cfg1_s cn70xx; + struct cvmx_lmcx_scramble_cfg1_s cn70xxp1; + struct cvmx_lmcx_scramble_cfg1_s cn73xx; + struct cvmx_lmcx_scramble_cfg1_s cn78xx; + struct cvmx_lmcx_scramble_cfg1_s cn78xxp1; + struct cvmx_lmcx_scramble_cfg1_s cnf71xx; + struct cvmx_lmcx_scramble_cfg1_s cnf75xx; +}; + +/** + * cvmx_lmc#_scramble_cfg2 + */ +union cvmx_lmcx_scramble_cfg2 { + u64 u64; + struct cvmx_lmcx_scramble_cfg2_s { + uint64_t key:64; + } s; + struct cvmx_lmcx_scramble_cfg2_s cn73xx; + struct cvmx_lmcx_scramble_cfg2_s cn78xx; + struct cvmx_lmcx_scramble_cfg2_s cnf75xx; +}; + +/** + * cvmx_lmc#_scrambled_fadr + * + * LMC()_FADR captures the failing pre-scrambled address location (split into + * DIMM, bunk, bank, etc). If scrambling is off, LMC()_FADR also captures the + * failing physical location in the DRAM parts. LMC()_SCRAMBLED_FADR captures + * the actual failing address location in the physical DRAM parts, i.e.: + * + * * If scrambling is on, LMC()_SCRAMBLED_FADR contains the failing physical + * location in the + * DRAM parts (split into DIMM, bunk, bank, etc). + * + * * If scrambling is off, the pre-scramble and post-scramble addresses are + * the same, and so the + * contents of LMC()_SCRAMBLED_FADR match the contents of LMC()_FADR. + * + * This register only captures the first transaction with ECC errors. A DED + * error can over-write this register with its failing addresses if the first + * error was a SEC. If you write LMC()_CONFIG -> SEC_ERR/DED_ERR, it clears + * the error bits and captures the next failing address. If [FDIMM] is 1, + * that means the error is in the higher DIMM. + */ +union cvmx_lmcx_scrambled_fadr { + u64 u64; + struct cvmx_lmcx_scrambled_fadr_s { + uint64_t reserved_43_63:21; + uint64_t fcid:3; + uint64_t fill_order:2; + uint64_t reserved_14_37:24; + uint64_t fcol:14; + } s; + struct cvmx_lmcx_scrambled_fadr_cn61xx { + uint64_t reserved_36_63:28; + uint64_t fdimm:2; + uint64_t fbunk:1; + uint64_t fbank:3; + uint64_t frow:16; + uint64_t fcol:14; + } cn61xx; + struct cvmx_lmcx_scrambled_fadr_cn61xx cn66xx; + struct cvmx_lmcx_scrambled_fadr_cn70xx { + uint64_t reserved_40_63:24; + uint64_t fill_order:2; + uint64_t fdimm:1; + uint64_t fbunk:1; + uint64_t fbank:4; + uint64_t frow:18; + uint64_t fcol:14; + } cn70xx; + struct cvmx_lmcx_scrambled_fadr_cn70xx cn70xxp1; + struct cvmx_lmcx_scrambled_fadr_cn73xx { + uint64_t reserved_43_63:21; + uint64_t fcid:3; + uint64_t fill_order:2; + uint64_t fdimm:1; + uint64_t fbunk:1; + uint64_t fbank:4; + uint64_t frow:18; + uint64_t fcol:14; + } cn73xx; + struct cvmx_lmcx_scrambled_fadr_cn73xx cn78xx; + struct cvmx_lmcx_scrambled_fadr_cn73xx cn78xxp1; + struct cvmx_lmcx_scrambled_fadr_cn61xx cnf71xx; + struct cvmx_lmcx_scrambled_fadr_cn73xx cnf75xx; +}; + +/** + * cvmx_lmc#_seq_ctl + * + * This register is used to initiate the various control sequences in the LMC. + * + */ +union cvmx_lmcx_seq_ctl { + u64 u64; + struct cvmx_lmcx_seq_ctl_s { + uint64_t reserved_6_63:58; + uint64_t seq_complete:1; + uint64_t seq_sel:4; + uint64_t init_start:1; + } s; + struct cvmx_lmcx_seq_ctl_s cn70xx; + struct cvmx_lmcx_seq_ctl_s cn70xxp1; + struct cvmx_lmcx_seq_ctl_s cn73xx; + struct cvmx_lmcx_seq_ctl_s cn78xx; + struct cvmx_lmcx_seq_ctl_s cn78xxp1; + struct cvmx_lmcx_seq_ctl_s cnf75xx; +}; + +/** + * cvmx_lmc#_slot_ctl0 + * + * This register is an assortment of control fields needed by the memory + * controller. If software has not previously written to this register + * (since the last DRESET), hardware updates the fields in this register to + * the minimum allowed value when any of LMC()_RLEVEL_RANK(), + * LMC()_WLEVEL_RANK(), LMC()_CONTROL, and LMC()_MODEREG_PARAMS0 registers + * change. Ideally, only read this register after LMC has been initialized and + * LMC()_RLEVEL_RANK(), LMC()_WLEVEL_RANK() have valid data. + * + * The interpretation of the fields in this register depends on + * LMC(0)_CONFIG[DDR2T]: + * + * * If LMC()_CONFIG[DDR2T]=1, (FieldValue + 4) is the minimum CK cycles + * between when the DRAM part registers CAS commands of the first and + * second types from different cache blocks. + * + * If LMC()_CONFIG[DDR2T]=0, (FieldValue + 3) is the minimum CK cycles + * between when the DRAM part registers CAS commands of the first and second + * types from different cache blocks. + * FieldValue = 0 is always illegal in this case. + * The hardware-calculated minimums for these fields are shown in + * LMC(0)_SLOT_CTL0 Hardware-Calculated Minimums. + */ +union cvmx_lmcx_slot_ctl0 { + u64 u64; + struct cvmx_lmcx_slot_ctl0_s { + uint64_t reserved_50_63:14; + uint64_t w2r_l_init_ext:1; + uint64_t w2r_init_ext:1; + uint64_t w2w_l_init:6; + uint64_t w2r_l_init:6; + uint64_t r2w_l_init:6; + uint64_t r2r_l_init:6; + uint64_t w2w_init:6; + uint64_t w2r_init:6; + uint64_t r2w_init:6; + uint64_t r2r_init:6; + } s; + struct cvmx_lmcx_slot_ctl0_cn61xx { + uint64_t reserved_24_63:40; + uint64_t w2w_init:6; + uint64_t w2r_init:6; + uint64_t r2w_init:6; + uint64_t r2r_init:6; + } cn61xx; + struct cvmx_lmcx_slot_ctl0_cn61xx cn63xx; + struct cvmx_lmcx_slot_ctl0_cn61xx cn63xxp1; + struct cvmx_lmcx_slot_ctl0_cn61xx cn66xx; + struct cvmx_lmcx_slot_ctl0_cn61xx cn68xx; + struct cvmx_lmcx_slot_ctl0_cn61xx cn68xxp1; + struct cvmx_lmcx_slot_ctl0_cn70xx { + uint64_t reserved_48_63:16; + uint64_t w2w_l_init:6; + uint64_t w2r_l_init:6; + uint64_t r2w_l_init:6; + uint64_t r2r_l_init:6; + uint64_t w2w_init:6; + uint64_t w2r_init:6; + uint64_t r2w_init:6; + uint64_t r2r_init:6; + } cn70xx; + struct cvmx_lmcx_slot_ctl0_cn70xx cn70xxp1; + struct cvmx_lmcx_slot_ctl0_s cn73xx; + struct cvmx_lmcx_slot_ctl0_s cn78xx; + struct cvmx_lmcx_slot_ctl0_s cn78xxp1; + struct cvmx_lmcx_slot_ctl0_cn61xx cnf71xx; + struct cvmx_lmcx_slot_ctl0_s cnf75xx; +}; + +/** + * cvmx_lmc#_slot_ctl1 + * + * This register is an assortment of control fields needed by the memory + * controller. If software has not previously written to this register + * (since the last DRESET), hardware updates the fields in this register to + * the minimum allowed value when any of LMC()_RLEVEL_RANK(), + * LMC()_WLEVEL_RANK(), LMC()_CONTROL and LMC()_MODEREG_PARAMS0 change. + * Ideally, only read this register after LMC has been initialized and + * LMC()_RLEVEL_RANK(), LMC()_WLEVEL_RANK() have valid data. + * + * The interpretation of the fields in this CSR depends on + * LMC(0)_CONFIG[DDR2T]: + * + * * If LMC()_CONFIG[DDR2T]=1, (FieldValue + 4) is the minimum CK cycles + * between when the DRAM part registers CAS commands of the first and + * second types from different cache blocks. + * + * * If LMC()_CONFIG[DDR2T]=0, (FieldValue + 3) is the minimum CK cycles + * between when the DRAM part registers CAS commands of the first and + * second types from different cache blocks. + * FieldValue = 0 is always illegal in this case. + * + * The hardware-calculated minimums for these fields are shown in + * LMC(0)_SLOT_CTL1 Hardware-Calculated Minimums. + */ +union cvmx_lmcx_slot_ctl1 { + u64 u64; + struct cvmx_lmcx_slot_ctl1_s { + uint64_t reserved_24_63:40; + uint64_t w2w_xrank_init:6; + uint64_t w2r_xrank_init:6; + uint64_t r2w_xrank_init:6; + uint64_t r2r_xrank_init:6; + } s; + struct cvmx_lmcx_slot_ctl1_s cn61xx; + struct cvmx_lmcx_slot_ctl1_s cn63xx; + struct cvmx_lmcx_slot_ctl1_s cn63xxp1; + struct cvmx_lmcx_slot_ctl1_s cn66xx; + struct cvmx_lmcx_slot_ctl1_s cn68xx; + struct cvmx_lmcx_slot_ctl1_s cn68xxp1; + struct cvmx_lmcx_slot_ctl1_s cn70xx; + struct cvmx_lmcx_slot_ctl1_s cn70xxp1; + struct cvmx_lmcx_slot_ctl1_s cn73xx; + struct cvmx_lmcx_slot_ctl1_s cn78xx; + struct cvmx_lmcx_slot_ctl1_s cn78xxp1; + struct cvmx_lmcx_slot_ctl1_s cnf71xx; + struct cvmx_lmcx_slot_ctl1_s cnf75xx; +}; + +/** + * cvmx_lmc#_slot_ctl2 + * + * This register is an assortment of control fields needed by the memory + * controller. If software has not previously written to this register + * (since the last DRESET), hardware updates the fields in this register + * to the minimum allowed value when any of LMC()_RLEVEL_RANK(), + * LMC()_WLEVEL_RANK(), LMC()_CONTROL and LMC()_MODEREG_PARAMS0 change. + * Ideally, only read this register after LMC has been initialized and + * LMC()_RLEVEL_RANK(), LMC()_WLEVEL_RANK() have valid data. + * + * The interpretation of the fields in this CSR depends on LMC(0)_CONFIG[DDR2T]: + * + * * If LMC()_CONFIG[DDR2T] = 1, (FieldValue + 4) is the minimum CK cycles + * between when the DRAM part registers CAS commands of the first and + * second types from different cache blocks. + * + * * If LMC()_CONFIG[DDR2T] = 0, (FieldValue + 3) is the minimum CK cycles + * between when the DRAM part registers CAS commands of the first and second + * types from different cache blocks. + * FieldValue = 0 is always illegal in this case. + * + * The hardware-calculated minimums for these fields are shown in LMC Registers. + */ +union cvmx_lmcx_slot_ctl2 { + u64 u64; + struct cvmx_lmcx_slot_ctl2_s { + uint64_t reserved_24_63:40; + uint64_t w2w_xdimm_init:6; + uint64_t w2r_xdimm_init:6; + uint64_t r2w_xdimm_init:6; + uint64_t r2r_xdimm_init:6; + } s; + struct cvmx_lmcx_slot_ctl2_s cn61xx; + struct cvmx_lmcx_slot_ctl2_s cn63xx; + struct cvmx_lmcx_slot_ctl2_s cn63xxp1; + struct cvmx_lmcx_slot_ctl2_s cn66xx; + struct cvmx_lmcx_slot_ctl2_s cn68xx; + struct cvmx_lmcx_slot_ctl2_s cn68xxp1; + struct cvmx_lmcx_slot_ctl2_s cn70xx; + struct cvmx_lmcx_slot_ctl2_s cn70xxp1; + struct cvmx_lmcx_slot_ctl2_s cn73xx; + struct cvmx_lmcx_slot_ctl2_s cn78xx; + struct cvmx_lmcx_slot_ctl2_s cn78xxp1; + struct cvmx_lmcx_slot_ctl2_s cnf71xx; + struct cvmx_lmcx_slot_ctl2_s cnf75xx; +}; + +/** + * cvmx_lmc#_slot_ctl3 + * + * This register is an assortment of control fields needed by the memory + * controller. If software has not previously written to this register + * (since the last DRESET), hardware updates the fields in this register + * to the minimum allowed value when any of LMC()_RLEVEL_RANK(), + * LMC()_WLEVEL_RANK(), LMC()_CONTROL and LMC()_MODEREG_PARAMS0 change. + * Ideally, only read this register after LMC has been initialized and + * LMC()_RLEVEL_RANK(), LMC()_WLEVEL_RANK() have valid data. + * + * The interpretation of the fields in this CSR depends on LMC(0)_CONFIG[DDR2T]: + * + * * If LMC()_CONFIG[DDR2T] = 1, (FieldValue + 4) is the minimum CK cycles + * between when the DRAM part registers CAS commands of the first and + * second types from different cache blocks. + * + * * If LMC()_CONFIG[DDR2T] = 0, (FieldValue + 3) is the minimum CK cycles + * between when the DRAM part registers CAS commands of the first and second + * types from different cache blocks. + * FieldValue = 0 is always illegal in this case. + * + * The hardware-calculated minimums for these fields are shown in LMC Registers. + */ +union cvmx_lmcx_slot_ctl3 { + u64 u64; + struct cvmx_lmcx_slot_ctl3_s { + uint64_t reserved_50_63:14; + uint64_t w2r_l_xrank_init_ext:1; + uint64_t w2r_xrank_init_ext:1; + uint64_t w2w_l_xrank_init:6; + uint64_t w2r_l_xrank_init:6; + uint64_t r2w_l_xrank_init:6; + uint64_t r2r_l_xrank_init:6; + uint64_t w2w_xrank_init:6; + uint64_t w2r_xrank_init:6; + uint64_t r2w_xrank_init:6; + uint64_t r2r_xrank_init:6; + } s; + struct cvmx_lmcx_slot_ctl3_s cn73xx; + struct cvmx_lmcx_slot_ctl3_s cn78xx; + struct cvmx_lmcx_slot_ctl3_s cnf75xx; +}; + +/** + * cvmx_lmc#_timing_params0 + */ +union cvmx_lmcx_timing_params0 { + u64 u64; + struct cvmx_lmcx_timing_params0_s { + uint64_t reserved_54_63:10; + uint64_t tbcw:6; + uint64_t reserved_26_47:22; + uint64_t tmrd:4; + uint64_t reserved_8_21:14; + uint64_t tckeon:8; + } s; + struct cvmx_lmcx_timing_params0_cn61xx { + uint64_t reserved_47_63:17; + uint64_t trp_ext:1; + uint64_t tcksre:4; + uint64_t trp:4; + uint64_t tzqinit:4; + uint64_t tdllk:4; + uint64_t tmod:4; + uint64_t tmrd:4; + uint64_t txpr:4; + uint64_t tcke:4; + uint64_t tzqcs:4; + uint64_t reserved_0_9:10; + } cn61xx; + struct cvmx_lmcx_timing_params0_cn61xx cn63xx; + struct cvmx_lmcx_timing_params0_cn63xxp1 { + uint64_t reserved_46_63:18; + uint64_t tcksre:4; + uint64_t trp:4; + uint64_t tzqinit:4; + uint64_t tdllk:4; + uint64_t tmod:4; + uint64_t tmrd:4; + uint64_t txpr:4; + uint64_t tcke:4; + uint64_t tzqcs:4; + uint64_t tckeon:10; + } cn63xxp1; + struct cvmx_lmcx_timing_params0_cn61xx cn66xx; + struct cvmx_lmcx_timing_params0_cn61xx cn68xx; + struct cvmx_lmcx_timing_params0_cn61xx cn68xxp1; + struct cvmx_lmcx_timing_params0_cn70xx { + uint64_t reserved_48_63:16; + uint64_t tcksre:4; + uint64_t trp:5; + uint64_t tzqinit:4; + uint64_t tdllk:4; + uint64_t tmod:5; + uint64_t tmrd:4; + uint64_t txpr:6; + uint64_t tcke:4; + uint64_t tzqcs:4; + uint64_t reserved_0_7:8; + } cn70xx; + struct cvmx_lmcx_timing_params0_cn70xx cn70xxp1; + struct cvmx_lmcx_timing_params0_cn73xx { + uint64_t reserved_54_63:10; + uint64_t tbcw:6; + uint64_t tcksre:4; + uint64_t trp:5; + uint64_t tzqinit:4; + uint64_t tdllk:4; + uint64_t tmod:5; + uint64_t tmrd:4; + uint64_t txpr:6; + uint64_t tcke:4; + uint64_t tzqcs:4; + uint64_t reserved_0_7:8; + } cn73xx; + struct cvmx_lmcx_timing_params0_cn73xx cn78xx; + struct cvmx_lmcx_timing_params0_cn73xx cn78xxp1; + struct cvmx_lmcx_timing_params0_cn61xx cnf71xx; + struct cvmx_lmcx_timing_params0_cn73xx cnf75xx; +}; + +/** + * cvmx_lmc#_timing_params1 + */ +union cvmx_lmcx_timing_params1 { + u64 u64; + struct cvmx_lmcx_timing_params1_s { + uint64_t reserved_59_63:5; + uint64_t txp_ext:1; + uint64_t trcd_ext:1; + uint64_t tpdm_full_cycle_ena:1; + uint64_t trfc_dlr:7; + uint64_t reserved_4_48:45; + uint64_t tmprr:4; + } s; + struct cvmx_lmcx_timing_params1_cn61xx { + uint64_t reserved_47_63:17; + uint64_t tras_ext:1; + uint64_t txpdll:5; + uint64_t tfaw:5; + uint64_t twldqsen:4; + uint64_t twlmrd:4; + uint64_t txp:3; + uint64_t trrd:3; + uint64_t trfc:5; + uint64_t twtr:4; + uint64_t trcd:4; + uint64_t tras:5; + uint64_t tmprr:4; + } cn61xx; + struct cvmx_lmcx_timing_params1_cn61xx cn63xx; + struct cvmx_lmcx_timing_params1_cn63xxp1 { + uint64_t reserved_46_63:18; + uint64_t txpdll:5; + uint64_t tfaw:5; + uint64_t twldqsen:4; + uint64_t twlmrd:4; + uint64_t txp:3; + uint64_t trrd:3; + uint64_t trfc:5; + uint64_t twtr:4; + uint64_t trcd:4; + uint64_t tras:5; + uint64_t tmprr:4; + } cn63xxp1; + struct cvmx_lmcx_timing_params1_cn61xx cn66xx; + struct cvmx_lmcx_timing_params1_cn61xx cn68xx; + struct cvmx_lmcx_timing_params1_cn61xx cn68xxp1; + struct cvmx_lmcx_timing_params1_cn70xx { + uint64_t reserved_49_63:15; + uint64_t txpdll:5; + uint64_t tfaw:5; + uint64_t twldqsen:4; + uint64_t twlmrd:4; + uint64_t txp:3; + uint64_t trrd:3; + uint64_t trfc:7; + uint64_t twtr:4; + uint64_t trcd:4; + uint64_t tras:6; + uint64_t tmprr:4; + } cn70xx; + struct cvmx_lmcx_timing_params1_cn70xx cn70xxp1; + struct cvmx_lmcx_timing_params1_cn73xx { + uint64_t reserved_59_63:5; + uint64_t txp_ext:1; + uint64_t trcd_ext:1; + uint64_t tpdm_full_cycle_ena:1; + uint64_t trfc_dlr:7; + uint64_t txpdll:5; + uint64_t tfaw:5; + uint64_t twldqsen:4; + uint64_t twlmrd:4; + uint64_t txp:3; + uint64_t trrd:3; + uint64_t trfc:7; + uint64_t twtr:4; + uint64_t trcd:4; + uint64_t tras:6; + uint64_t tmprr:4; + } cn73xx; + struct cvmx_lmcx_timing_params1_cn73xx cn78xx; + struct cvmx_lmcx_timing_params1_cn73xx cn78xxp1; + struct cvmx_lmcx_timing_params1_cn61xx cnf71xx; + struct cvmx_lmcx_timing_params1_cn73xx cnf75xx; +}; + +/** + * cvmx_lmc#_timing_params2 + * + * This register sets timing parameters for DDR4. + * + */ +union cvmx_lmcx_timing_params2 { + u64 u64; + struct cvmx_lmcx_timing_params2_s { + uint64_t reserved_16_63:48; + uint64_t trrd_l_ext:1; + uint64_t trtp:4; + uint64_t t_rw_op_max:4; + uint64_t twtr_l:4; + uint64_t trrd_l:3; + } s; + struct cvmx_lmcx_timing_params2_cn70xx { + uint64_t reserved_15_63:49; + uint64_t trtp:4; + uint64_t t_rw_op_max:4; + uint64_t twtr_l:4; + uint64_t trrd_l:3; + } cn70xx; + struct cvmx_lmcx_timing_params2_cn70xx cn70xxp1; + struct cvmx_lmcx_timing_params2_s cn73xx; + struct cvmx_lmcx_timing_params2_s cn78xx; + struct cvmx_lmcx_timing_params2_s cn78xxp1; + struct cvmx_lmcx_timing_params2_s cnf75xx; +}; + +/** + * cvmx_lmc#_tro_ctl + * + * LMC_TRO_CTL = LMC Temperature Ring Osc Control + * This register is an assortment of various control fields needed to + * control the temperature ring oscillator + * + * Notes: + * To bring up the temperature ring oscillator, write TRESET to 0, and + * follow by initializing RCLK_CNT to desired value + */ +union cvmx_lmcx_tro_ctl { + u64 u64; + struct cvmx_lmcx_tro_ctl_s { + uint64_t reserved_33_63:31; + uint64_t rclk_cnt:32; + uint64_t treset:1; + } s; + struct cvmx_lmcx_tro_ctl_s cn61xx; + struct cvmx_lmcx_tro_ctl_s cn63xx; + struct cvmx_lmcx_tro_ctl_s cn63xxp1; + struct cvmx_lmcx_tro_ctl_s cn66xx; + struct cvmx_lmcx_tro_ctl_s cn68xx; + struct cvmx_lmcx_tro_ctl_s cn68xxp1; + struct cvmx_lmcx_tro_ctl_s cnf71xx; +}; + +/** + * cvmx_lmc#_tro_stat + * + * LMC_TRO_STAT = LMC Temperature Ring Osc Status + * This register is an assortment of various control fields needed to + * control the temperature ring oscillator + */ +union cvmx_lmcx_tro_stat { + u64 u64; + struct cvmx_lmcx_tro_stat_s { + uint64_t reserved_32_63:32; + uint64_t ring_cnt:32; + } s; + struct cvmx_lmcx_tro_stat_s cn61xx; + struct cvmx_lmcx_tro_stat_s cn63xx; + struct cvmx_lmcx_tro_stat_s cn63xxp1; + struct cvmx_lmcx_tro_stat_s cn66xx; + struct cvmx_lmcx_tro_stat_s cn68xx; + struct cvmx_lmcx_tro_stat_s cn68xxp1; + struct cvmx_lmcx_tro_stat_s cnf71xx; +}; + +/** + * cvmx_lmc#_wlevel_ctl + */ +union cvmx_lmcx_wlevel_ctl { + u64 u64; + struct cvmx_lmcx_wlevel_ctl_s { + uint64_t reserved_22_63:42; + uint64_t rtt_nom:3; + uint64_t bitmask:8; + uint64_t or_dis:1; + uint64_t sset:1; + uint64_t lanemask:9; + } s; + struct cvmx_lmcx_wlevel_ctl_s cn61xx; + struct cvmx_lmcx_wlevel_ctl_s cn63xx; + struct cvmx_lmcx_wlevel_ctl_cn63xxp1 { + uint64_t reserved_10_63:54; + uint64_t sset:1; + uint64_t lanemask:9; + } cn63xxp1; + struct cvmx_lmcx_wlevel_ctl_s cn66xx; + struct cvmx_lmcx_wlevel_ctl_s cn68xx; + struct cvmx_lmcx_wlevel_ctl_s cn68xxp1; + struct cvmx_lmcx_wlevel_ctl_s cn70xx; + struct cvmx_lmcx_wlevel_ctl_s cn70xxp1; + struct cvmx_lmcx_wlevel_ctl_s cn73xx; + struct cvmx_lmcx_wlevel_ctl_s cn78xx; + struct cvmx_lmcx_wlevel_ctl_s cn78xxp1; + struct cvmx_lmcx_wlevel_ctl_s cnf71xx; + struct cvmx_lmcx_wlevel_ctl_s cnf75xx; +}; + +/** + * cvmx_lmc#_wlevel_dbg + * + * A given write of LMC()_WLEVEL_DBG returns the write leveling pass/fail + * results for all possible delay settings (i.e. the BITMASK) for only one + * byte in the last rank that the hardware write leveled. + * LMC()_WLEVEL_DBG[BYTE] selects the particular byte. To get these + * pass/fail results for a different rank, you must run the hardware write + * leveling again. For example, it is possible to get the [BITMASK] results + * for every byte of every rank if you run write leveling separately for + * each rank, probing LMC()_WLEVEL_DBG between each write-leveling. + */ +union cvmx_lmcx_wlevel_dbg { + u64 u64; + struct cvmx_lmcx_wlevel_dbg_s { + uint64_t reserved_12_63:52; + uint64_t bitmask:8; + uint64_t byte:4; + } s; + struct cvmx_lmcx_wlevel_dbg_s cn61xx; + struct cvmx_lmcx_wlevel_dbg_s cn63xx; + struct cvmx_lmcx_wlevel_dbg_s cn63xxp1; + struct cvmx_lmcx_wlevel_dbg_s cn66xx; + struct cvmx_lmcx_wlevel_dbg_s cn68xx; + struct cvmx_lmcx_wlevel_dbg_s cn68xxp1; + struct cvmx_lmcx_wlevel_dbg_s cn70xx; + struct cvmx_lmcx_wlevel_dbg_s cn70xxp1; + struct cvmx_lmcx_wlevel_dbg_s cn73xx; + struct cvmx_lmcx_wlevel_dbg_s cn78xx; + struct cvmx_lmcx_wlevel_dbg_s cn78xxp1; + struct cvmx_lmcx_wlevel_dbg_s cnf71xx; + struct cvmx_lmcx_wlevel_dbg_s cnf75xx; +}; + +/** + * cvmx_lmc#_wlevel_rank# + * + * Four of these CSRs exist per LMC, one for each rank. Write level setting + * is measured in units of 1/8 CK, so the below BYTEn values can range over + * 4 CK cycles. Assuming LMC()_WLEVEL_CTL[SSET]=0, the BYTEn<2:0> values are + * not used during write leveling, and they are overwritten by the hardware + * as part of the write leveling sequence. (Hardware sets [STATUS] to 3 after + * hardware write leveling completes for the rank). Software needs to set + * BYTEn<4:3> bits. + * + * Each CSR may also be written by software, but not while a write leveling + * sequence is in progress. (Hardware sets [STATUS] to 1 after a CSR write.) + * Software initiates a hardware write-leveling sequence by programming + * LMC()_WLEVEL_CTL and writing RANKMASK and INIT_START=1 with SEQ_SEL=6 in + * LMC*0_CONFIG. + * + * LMC will then step through and accumulate write leveling results for 8 + * unique delay settings (twice), starting at a delay of LMC()_WLEVEL_RANK() + * [BYTEn<4:3>]* 8 CK increasing by 1/8 CK each setting. Hardware will then + * set LMC()_WLEVEL_RANK()[BYTEn<2:0>] to indicate the first write leveling + * result of 1 that followed a result of 0 during the sequence by searching + * for a '1100' pattern in the generated bitmask, except that LMC will always + * write LMC()_WLEVEL_RANK()[BYTEn<0>]=0. If hardware is unable to find a match + * for a '1100' pattern, then hardware sets LMC()_WLEVEL_RANK() [BYTEn<2:0>] + * to 0x4. See LMC()_WLEVEL_CTL. + * + * LMC()_WLEVEL_RANKi values for ranks i without attached DRAM should be set + * such that they do not increase the range of possible BYTE values for any + * byte lane. The easiest way to do this is to set LMC()_WLEVEL_RANKi = + * LMC()_WLEVEL_RANKj, where j is some rank with attached DRAM whose + * LMC()_WLEVEL_RANKj is already fully initialized. + */ +union cvmx_lmcx_wlevel_rankx { + u64 u64; + struct cvmx_lmcx_wlevel_rankx_s { + uint64_t reserved_47_63:17; + uint64_t status:2; + uint64_t byte8:5; + uint64_t byte7:5; + uint64_t byte6:5; + uint64_t byte5:5; + uint64_t byte4:5; + uint64_t byte3:5; + uint64_t byte2:5; + uint64_t byte1:5; + uint64_t byte0:5; + } s; + struct cvmx_lmcx_wlevel_rankx_s cn61xx; + struct cvmx_lmcx_wlevel_rankx_s cn63xx; + struct cvmx_lmcx_wlevel_rankx_s cn63xxp1; + struct cvmx_lmcx_wlevel_rankx_s cn66xx; + struct cvmx_lmcx_wlevel_rankx_s cn68xx; + struct cvmx_lmcx_wlevel_rankx_s cn68xxp1; + struct cvmx_lmcx_wlevel_rankx_s cn70xx; + struct cvmx_lmcx_wlevel_rankx_s cn70xxp1; + struct cvmx_lmcx_wlevel_rankx_s cn73xx; + struct cvmx_lmcx_wlevel_rankx_s cn78xx; + struct cvmx_lmcx_wlevel_rankx_s cn78xxp1; + struct cvmx_lmcx_wlevel_rankx_s cnf71xx; + struct cvmx_lmcx_wlevel_rankx_s cnf75xx; +}; + +/** + * cvmx_lmc#_wodt_ctl0 + * + * LMC_WODT_CTL0 = LMC Write OnDieTermination control + * See the description in LMC_WODT_CTL1. + * + * Notes: + * Together, the LMC_WODT_CTL1 and LMC_WODT_CTL0 CSRs control the write + * ODT mask. See LMC_WODT_CTL1. + * + */ +union cvmx_lmcx_wodt_ctl0 { + u64 u64; + struct cvmx_lmcx_wodt_ctl0_s { + uint64_t reserved_0_63:64; + } s; + struct cvmx_lmcx_wodt_ctl0_cn30xx { + uint64_t reserved_32_63:32; + uint64_t wodt_d1_r1:8; + uint64_t wodt_d1_r0:8; + uint64_t wodt_d0_r1:8; + uint64_t wodt_d0_r0:8; + } cn30xx; + struct cvmx_lmcx_wodt_ctl0_cn30xx cn31xx; + struct cvmx_lmcx_wodt_ctl0_cn38xx { + uint64_t reserved_32_63:32; + uint64_t wodt_hi3:4; + uint64_t wodt_hi2:4; + uint64_t wodt_hi1:4; + uint64_t wodt_hi0:4; + uint64_t wodt_lo3:4; + uint64_t wodt_lo2:4; + uint64_t wodt_lo1:4; + uint64_t wodt_lo0:4; + } cn38xx; + struct cvmx_lmcx_wodt_ctl0_cn38xx cn38xxp2; + struct cvmx_lmcx_wodt_ctl0_cn38xx cn50xx; + struct cvmx_lmcx_wodt_ctl0_cn30xx cn52xx; + struct cvmx_lmcx_wodt_ctl0_cn30xx cn52xxp1; + struct cvmx_lmcx_wodt_ctl0_cn30xx cn56xx; + struct cvmx_lmcx_wodt_ctl0_cn30xx cn56xxp1; + struct cvmx_lmcx_wodt_ctl0_cn38xx cn58xx; + struct cvmx_lmcx_wodt_ctl0_cn38xx cn58xxp1; +}; + +/** + * cvmx_lmc#_wodt_ctl1 + * + * LMC_WODT_CTL1 = LMC Write OnDieTermination control + * System designers may desire to terminate DQ/DQS/DM lines for higher + * frequency DDR operations (667MHz and faster), especially on a multi-rank + * system. DDR2 DQ/DM/DQS I/O's have built in Termination resistor that can + * be turned on or off by the controller, after meeting tAOND and tAOF + * timing requirements. Each Rank has its own ODT pin that fans out to all + * the memory parts in that DIMM. System designers may prefer different + * combinations of ODT ON's for read and write into different ranks. Octeon + * supports full programmability by way of the mask register below. + * Each Rank position has its own 8-bit programmable field. + * When the controller does a write to that rank, it sets the 8 ODT pins + * to the MASK pins below. For eg., When doing a write into Rank0, a system + * designer may desire to terminate the lines with the resistor on + * Dimm0/Rank1. The mask WODT_D0_R0 would then be [00000010]. If ODT feature + * is not desired, the DDR parts can be programmed to not look at these pins by + * writing 0 in QS_DIC. Octeon drives the appropriate mask values on the ODT + * pins by default. + * If this feature is not required, write 0 in this register. + * + * Notes: + * Together, the LMC_WODT_CTL1 and LMC_WODT_CTL0 CSRs control the write + * ODT mask. When a given RANK is selected, the WODT mask for that RANK + * is used. The resulting WODT mask is driven to the DIMMs in the following + * manner: + * BUNK_ENA=1 BUNK_ENA=0 + * Mask[7] -> DIMM3, RANK1 DIMM3 + * Mask[6] -> DIMM3, RANK0 + * Mask[5] -> DIMM2, RANK1 DIMM2 + * Mask[4] -> DIMM2, RANK0 + * Mask[3] -> DIMM1, RANK1 DIMM1 + * Mask[2] -> DIMM1, RANK0 + * Mask[1] -> DIMM0, RANK1 DIMM0 + * Mask[0] -> DIMM0, RANK0 + */ +union cvmx_lmcx_wodt_ctl1 { + u64 u64; + struct cvmx_lmcx_wodt_ctl1_s { + uint64_t reserved_32_63:32; + uint64_t wodt_d3_r1:8; + uint64_t wodt_d3_r0:8; + uint64_t wodt_d2_r1:8; + uint64_t wodt_d2_r0:8; + } s; + struct cvmx_lmcx_wodt_ctl1_s cn30xx; + struct cvmx_lmcx_wodt_ctl1_s cn31xx; + struct cvmx_lmcx_wodt_ctl1_s cn52xx; + struct cvmx_lmcx_wodt_ctl1_s cn52xxp1; + struct cvmx_lmcx_wodt_ctl1_s cn56xx; + struct cvmx_lmcx_wodt_ctl1_s cn56xxp1; +}; + +/** + * cvmx_lmc#_wodt_mask + * + * System designers may desire to terminate DQ/DQS lines for higher-frequency + * DDR operations, especially on a multirank system. DDR3 DQ/DQS I/Os have + * built-in termination resistors that can be turned on or off by the + * controller, after meeting TAOND and TAOF timing requirements. Each rank + * has its own ODT pin that fans out to all of the memory parts in that DIMM. + * System designers may prefer different combinations of ODT ONs for write + * operations into different ranks. CNXXXX supports full programmability by + * way of the mask register below. Each rank position has its own 8-bit + * programmable field. When the controller does a write to that rank, + * it sets the four ODT pins to the mask pins below. For example, when + * doing a write into Rank0, a system designer may desire to terminate the + * lines with the resistor on DIMM0/Rank1. The mask [WODT_D0_R0] would then + * be [00000010]. + * + * CNXXXX drives the appropriate mask values on the ODT pins by default. + * If this feature is not required, write 0x0 in this register. When a + * given RANK is selected, the WODT mask for that RANK is used. The + * resulting WODT mask is driven to the DIMMs in the following manner: + */ +union cvmx_lmcx_wodt_mask { + u64 u64; + struct cvmx_lmcx_wodt_mask_s { + uint64_t wodt_d3_r1:8; + uint64_t wodt_d3_r0:8; + uint64_t wodt_d2_r1:8; + uint64_t wodt_d2_r0:8; + uint64_t wodt_d1_r1:8; + uint64_t wodt_d1_r0:8; + uint64_t wodt_d0_r1:8; + uint64_t wodt_d0_r0:8; + } s; + struct cvmx_lmcx_wodt_mask_s cn61xx; + struct cvmx_lmcx_wodt_mask_s cn63xx; + struct cvmx_lmcx_wodt_mask_s cn63xxp1; + struct cvmx_lmcx_wodt_mask_s cn66xx; + struct cvmx_lmcx_wodt_mask_s cn68xx; + struct cvmx_lmcx_wodt_mask_s cn68xxp1; + struct cvmx_lmcx_wodt_mask_cn70xx { + uint64_t reserved_28_63:36; + uint64_t wodt_d1_r1:4; + uint64_t reserved_20_23:4; + uint64_t wodt_d1_r0:4; + uint64_t reserved_12_15:4; + uint64_t wodt_d0_r1:4; + uint64_t reserved_4_7:4; + uint64_t wodt_d0_r0:4; + } cn70xx; + struct cvmx_lmcx_wodt_mask_cn70xx cn70xxp1; + struct cvmx_lmcx_wodt_mask_cn70xx cn73xx; + struct cvmx_lmcx_wodt_mask_cn70xx cn78xx; + struct cvmx_lmcx_wodt_mask_cn70xx cn78xxp1; + struct cvmx_lmcx_wodt_mask_s cnf71xx; + struct cvmx_lmcx_wodt_mask_cn70xx cnf75xx; +}; + +#endif diff --git a/arch/mips/mach-octeon/include/mach/octeon-feature.h b/arch/mips/mach-octeon/include/mach/octeon-feature.h new file mode 100644 index 0000000000..1202716ba5 --- /dev/null +++ b/arch/mips/mach-octeon/include/mach/octeon-feature.h @@ -0,0 +1,442 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Marvell International Ltd. + */ + +#ifndef __OCTEON_FEATURE_H__ +#define __OCTEON_FEATURE_H__ + +/* + * Octeon models are declared after the macros in octeon-model.h with the + * suffix _FEATURE. The individual features are declared with the + * _FEATURE_ infix. + */ +enum octeon_feature { + /* + * Checks on the critical path are moved to the top (8 positions) + * so that the compiler generates one less insn than for the rest + * of the checks. + */ + OCTEON_FEATURE_PKND, /* CN68XX uses port kinds for packet interface */ + /* CN68XX has different fields in word0 - word2 */ + OCTEON_FEATURE_CN68XX_WQE, + + /* + * Features + */ + /* + * Octeon models in the CN5XXX family and higher support atomic + * add instructions to memory (saa/saad) + */ + OCTEON_FEATURE_SAAD, + /* Does this Octeon support the ZIP offload engine? */ + OCTEON_FEATURE_ZIP, + /* Does this Octeon support crypto acceleration using COP2? */ + OCTEON_FEATURE_CRYPTO, + /* Can crypto be enabled by calling cvmx_crypto_dormant_enable()? */ + OCTEON_FEATURE_DORM_CRYPTO, + OCTEON_FEATURE_PCIE, /* Does this Octeon support PCI express? */ + OCTEON_FEATURE_SRIO, /* Does this Octeon support SRIO */ + OCTEON_FEATURE_ILK, /* Does this Octeon support Interlaken */ + /* + * Some Octeon models support internal memory for storing + * cryptographic keys + */ + OCTEON_FEATURE_KEY_MEMORY, + /* Octeon has a LED controller for banks of external LEDs */ + OCTEON_FEATURE_LED_CONTROLLER, + OCTEON_FEATURE_TRA, /* Octeon has a trace buffer */ + OCTEON_FEATURE_MGMT_PORT, /* Octeon has a management port */ + OCTEON_FEATURE_RAID, /* Octeon has a raid unit */ + OCTEON_FEATURE_USB, /* Octeon has a builtin USB */ + /* Octeon IPD can run without using work queue entries */ + OCTEON_FEATURE_NO_WPTR, + OCTEON_FEATURE_DFA, /* Octeon has DFA state machines */ + /* + * Octeon MDIO block supports clause 45 transactions for + * 10 Gig support + */ + OCTEON_FEATURE_MDIO_CLAUSE_45, + /* + * CN52XX and CN56XX used a block named NPEI for PCIe access. + * Newer chips replaced this with SLI+DPI + */ + OCTEON_FEATURE_NPEI, + OCTEON_FEATURE_HFA, /* Octeon has DFA/HFA */ + OCTEON_FEATURE_DFM, /* Octeon has DFM */ + OCTEON_FEATURE_CIU2, /* Octeon has CIU2 */ + /* Octeon has DMA Instruction Completion Interrupt mode */ + OCTEON_FEATURE_DICI_MODE, + /* Octeon has Bit Select Extractor schedulor */ + OCTEON_FEATURE_BIT_EXTRACTOR, + OCTEON_FEATURE_NAND, /* Octeon has NAND */ + OCTEON_FEATURE_MMC, /* Octeon has built-in MMC support */ + OCTEON_FEATURE_ROM, /* Octeon has built-in ROM support */ + OCTEON_FEATURE_AUTHENTIK, /* Octeon has Authentik ROM support */ + OCTEON_FEATURE_MULTICAST_TIMER, /* Octeon has multi_cast timer */ + OCTEON_FEATURE_MULTINODE, /* Octeon has node support */ + OCTEON_FEATURE_CIU3, /* Octeon has CIU3 */ + OCTEON_FEATURE_FPA3, /* Octeon has FPA first seen on 78XX */ + /* CN78XX has different fields in word0 - word2 */ + OCTEON_FEATURE_CN78XX_WQE, + OCTEON_FEATURE_PKO3, /* Octeon has enhanced PKO block */ + OCTEON_FEATURE_SPI, /* Octeon supports SPI interfaces */ + OCTEON_FEATURE_ZIP3, /* Octeon has zip first seen on 78XX */ + OCTEON_FEATURE_BCH, /* Octeon supports BCH ECC */ + OCTEON_FEATURE_PKI, /* Octeon has PKI block */ + OCTEON_FEATURE_OCLA, /* Octeon has OCLA */ + OCTEON_FEATURE_FAU, /* Octeon has FAU */ + OCTEON_FEATURE_BGX, /* Octeon has BGX */ + OCTEON_FEATURE_BGX_MIX, /* On of the BGX is used for MIX */ + OCTEON_FEATURE_HNA, /* Octeon has HNA */ + OCTEON_FEATURE_BGX_XCV, /* Octeon has BGX XCV RGMII support */ + OCTEON_FEATURE_TSO, /* Octeon has tcp segmentation offload */ + OCTEON_FEATURE_TDM, /* Octeon has PCM/TDM support */ + OCTEON_FEATURE_PTP, /* Octeon has PTP support */ + OCTEON_MAX_FEATURE +}; + +static inline int octeon_has_feature_OCTEON_FEATURE_SAAD(void) +{ + return true; +} + +static inline int octeon_has_feature_OCTEON_FEATURE_ZIP(void) +{ + if (OCTEON_IS_MODEL(OCTEON_CNF71XX) || + OCTEON_IS_MODEL(OCTEON_CN70XX) || OCTEON_IS_MODEL(OCTEON_CNF75XX)) + return 0; + else + return !cvmx_fuse_read(121); +} + +static inline int octeon_has_feature_OCTEON_FEATURE_ZIP3(void) +{ + return (OCTEON_IS_MODEL(OCTEON_CN78XX) || + OCTEON_IS_MODEL(OCTEON_CN73XX)); +} + +static inline int octeon_has_feature_OCTEON_FEATURE_BCH(void) +{ + return (OCTEON_IS_MODEL(OCTEON_CN70XX) || + OCTEON_IS_MODEL(OCTEON_CNF75XX) || + OCTEON_IS_MODEL(OCTEON_CN73XX)); +} + +static inline int octeon_has_feature_OCTEON_FEATURE_CRYPTO(void) +{ + /* OCTEON II and later */ + u64 val; + + val = csr_rd(CVMX_MIO_FUS_DAT2); + if (val & MIO_FUS_DAT2_NOCRYPTO || val & MIO_FUS_DAT2_NOMUL) + return 0; + else if (!(val & MIO_FUS_DAT2_DORM_CRYPTO)) + return 1; + + val = csr_rd(CVMX_RNM_CTL_STATUS); + return val & RNM_CTL_STATUS_EER_VAL; +} + +static inline int octeon_has_feature_OCTEON_FEATURE_DORM_CRYPTO(void) +{ + /* OCTEON II and later */ + u64 val; + + val = csr_rd(CVMX_MIO_FUS_DAT2); + return !(val & MIO_FUS_DAT2_NOCRYPTO) && !(val & MIO_FUS_DAT2_NOMUL) && + (val & MIO_FUS_DAT2_DORM_CRYPTO); +} + +static inline int octeon_has_feature_OCTEON_FEATURE_PCIE(void) +{ + /* OCTEON II and later have PCIe */ + return true; +} + +static inline int octeon_has_feature_OCTEON_FEATURE_SRIO(void) +{ + if (OCTEON_IS_MODEL(OCTEON_CNF75XX)) { + if (cvmx_fuse_read(1601) == 0) + return 0; + else + return 1; + } else { + return (OCTEON_IS_MODEL(OCTEON_CN63XX) || + OCTEON_IS_MODEL(OCTEON_CN66XX)); + } +} + +static inline int octeon_has_feature_OCTEON_FEATURE_ILK(void) +{ + return (OCTEON_IS_MODEL(OCTEON_CN68XX) || + OCTEON_IS_MODEL(OCTEON_CN78XX)); +} + +static inline int octeon_has_feature_OCTEON_FEATURE_KEY_MEMORY(void) +{ + /* OCTEON II or later */ + return true; +} + +static inline int octeon_has_feature_OCTEON_FEATURE_LED_CONTROLLER(void) +{ + return false; +} + +static inline int octeon_has_feature_OCTEON_FEATURE_TRA(void) +{ + return !OCTEON_IS_OCTEON3(); +} + +static inline int octeon_has_feature_OCTEON_FEATURE_MGMT_PORT(void) +{ + /* OCTEON II or later */ + return true; +} + +static inline int octeon_has_feature_OCTEON_FEATURE_RAID(void) +{ + return !OCTEON_IS_MODEL(OCTEON_CNF75XX); +} + +static inline int octeon_has_feature_OCTEON_FEATURE_USB(void) +{ + return true; +} + +static inline int octeon_has_feature_OCTEON_FEATURE_NO_WPTR(void) +{ + return true; +} + +static inline int octeon_has_feature_OCTEON_FEATURE_DFA(void) +{ + return 0; +} + +static inline int octeon_has_feature_OCTEON_FEATURE_HFA(void) +{ + if (OCTEON_IS_MODEL(OCTEON_CNF75XX)) + return 0; + else + return !cvmx_fuse_read(90); +} + +static inline int octeon_has_feature_OCTEON_FEATURE_HNA(void) +{ + if (OCTEON_IS_MODEL(OCTEON_CN78XX) || OCTEON_IS_MODEL(OCTEON_CN73XX)) + return !cvmx_fuse_read(134); + else + return 0; +} + +static inline int octeon_has_feature_OCTEON_FEATURE_DFM(void) +{ + if (!(OCTEON_IS_MODEL(OCTEON_CN63XX) || OCTEON_IS_MODEL(OCTEON_CN66XX))) + return 0; + else + return !cvmx_fuse_read(90); +} + +static inline int octeon_has_feature_OCTEON_FEATURE_MDIO_CLAUSE_45(void) +{ + return true; +} + +static inline int octeon_has_feature_OCTEON_FEATURE_NPEI(void) +{ + return false; +} + +static inline int octeon_has_feature_OCTEON_FEATURE_PKND(void) +{ + return OCTEON_IS_MODEL(OCTEON_CN68XX) || + OCTEON_IS_MODEL(OCTEON_CNF75XX) || + OCTEON_IS_MODEL(OCTEON_CN73XX) || + OCTEON_IS_MODEL(OCTEON_CN78XX); +} + +static inline int octeon_has_feature_OCTEON_FEATURE_CN68XX_WQE(void) +{ + return OCTEON_IS_MODEL(OCTEON_CN68XX); +} + +static inline int octeon_has_feature_OCTEON_FEATURE_CIU2(void) +{ + return OCTEON_IS_MODEL(OCTEON_CN68XX); +} + +static inline int octeon_has_feature_OCTEON_FEATURE_CIU3(void) +{ + return (OCTEON_IS_MODEL(OCTEON_CN78XX) || + OCTEON_IS_MODEL(OCTEON_CNF75XX) || + OCTEON_IS_MODEL(OCTEON_CN73XX)); +} + +static inline int octeon_has_feature_OCTEON_FEATURE_FPA3(void) +{ + return (OCTEON_IS_MODEL(OCTEON_CN78XX) || + OCTEON_IS_MODEL(OCTEON_CNF75XX) || + OCTEON_IS_MODEL(OCTEON_CN73XX)); +} + +static inline int octeon_has_feature_OCTEON_FEATURE_NAND(void) +{ + return (OCTEON_IS_MODEL(OCTEON_CN63XX) || + OCTEON_IS_MODEL(OCTEON_CN66XX) || + OCTEON_IS_MODEL(OCTEON_CN68XX) || + OCTEON_IS_MODEL(OCTEON_CN73XX) || + OCTEON_IS_MODEL(OCTEON_CNF75XX) || + OCTEON_IS_MODEL(OCTEON_CN70XX)); +} + +static inline int octeon_has_feature_OCTEON_FEATURE_DICI_MODE(void) +{ + return (OCTEON_IS_MODEL(OCTEON_CN68XX_PASS2_X) || + OCTEON_IS_MODEL(OCTEON_CN61XX) || + OCTEON_IS_MODEL(OCTEON_CNF71XX) || + OCTEON_IS_MODEL(OCTEON_CN70XX)); +} + +static inline int octeon_has_feature_OCTEON_FEATURE_BIT_EXTRACTOR(void) +{ + return (OCTEON_IS_MODEL(OCTEON_CN68XX_PASS2_X) || + OCTEON_IS_MODEL(OCTEON_CN61XX) || + OCTEON_IS_MODEL(OCTEON_CNF71XX) || + OCTEON_IS_MODEL(OCTEON_CN70XX)); +} + +static inline int octeon_has_feature_OCTEON_FEATURE_MMC(void) +{ + return (OCTEON_IS_MODEL(OCTEON_CN61XX) || + OCTEON_IS_MODEL(OCTEON_CNF71XX) || OCTEON_IS_OCTEON3()); +} + +static inline int octeon_has_feature_OCTEON_FEATURE_ROM(void) +{ + return OCTEON_IS_MODEL(OCTEON_CN66XX) || + OCTEON_IS_MODEL(OCTEON_CN61XX) || + OCTEON_IS_MODEL(OCTEON_CNF71XX); +} + +static inline int octeon_has_feature_OCTEON_FEATURE_AUTHENTIK(void) +{ + if (OCTEON_IS_MODEL(OCTEON_CN66XX) || + OCTEON_IS_MODEL(OCTEON_CN61XX) || + OCTEON_IS_MODEL(OCTEON_CNF71XX) || + OCTEON_IS_MODEL(OCTEON_CN70XX)) { + u64 val; + + val = csr_rd(CVMX_MIO_FUS_DAT2); + return (val & MIO_FUS_DAT2_NOCRYPTO) && + (val & MIO_FUS_DAT2_DORM_CRYPTO); + } + + return 0; +} + +static inline int octeon_has_feature_OCTEON_FEATURE_MULTICAST_TIMER(void) +{ + return (OCTEON_IS_MODEL(OCTEON_CN66XX_PASS1_2) || + OCTEON_IS_MODEL(OCTEON_CN61XX) || + OCTEON_IS_MODEL(OCTEON_CNF71XX) || + OCTEON_IS_MODEL(OCTEON_CN70XX)); +} + +static inline int octeon_has_feature_OCTEON_FEATURE_MULTINODE(void) +{ + return (!OCTEON_IS_MODEL(OCTEON_CN76XX) && + OCTEON_IS_MODEL(OCTEON_CN78XX)); +} + +static inline int octeon_has_feature_OCTEON_FEATURE_CN78XX_WQE(void) +{ + return (OCTEON_IS_MODEL(OCTEON_CN78XX) || + OCTEON_IS_MODEL(OCTEON_CNF75XX) || + OCTEON_IS_MODEL(OCTEON_CN73XX)); +} + +static inline int octeon_has_feature_OCTEON_FEATURE_SPI(void) +{ + return (OCTEON_IS_MODEL(OCTEON_CN66XX) || + OCTEON_IS_MODEL(OCTEON_CN61XX) || + OCTEON_IS_MODEL(OCTEON_CNF71XX) || OCTEON_IS_OCTEON3()); +} + +static inline int octeon_has_feature_OCTEON_FEATURE_PKI(void) +{ + return (OCTEON_IS_MODEL(OCTEON_CN78XX) || + OCTEON_IS_MODEL(OCTEON_CNF75XX) || + OCTEON_IS_MODEL(OCTEON_CN73XX)); +} + +static inline int octeon_has_feature_OCTEON_FEATURE_PKO3(void) +{ + return (OCTEON_IS_MODEL(OCTEON_CN78XX) || + OCTEON_IS_MODEL(OCTEON_CNF75XX) || + OCTEON_IS_MODEL(OCTEON_CN73XX)); +} + +static inline int octeon_has_feature_OCTEON_FEATURE_OCLA(void) +{ + return OCTEON_IS_OCTEON3(); +} + +static inline int octeon_has_feature_OCTEON_FEATURE_FAU(void) +{ + return (!OCTEON_IS_MODEL(OCTEON_CN78XX) && + !OCTEON_IS_MODEL(OCTEON_CNF75XX) && + !OCTEON_IS_MODEL(OCTEON_CN73XX)); +} + +static inline int octeon_has_feature_OCTEON_FEATURE_BGX(void) +{ + return (OCTEON_IS_MODEL(OCTEON_CN78XX) || + OCTEON_IS_MODEL(OCTEON_CNF75XX) || + OCTEON_IS_MODEL(OCTEON_CN73XX)); +} + +static inline int octeon_has_feature_OCTEON_FEATURE_BGX_MIX(void) +{ + return (OCTEON_IS_MODEL(OCTEON_CN78XX) || + OCTEON_IS_MODEL(OCTEON_CNF75XX) || + OCTEON_IS_MODEL(OCTEON_CN73XX)); +} + +static inline int octeon_has_feature_OCTEON_FEATURE_BGX_XCV(void) +{ + return OCTEON_IS_MODEL(OCTEON_CN73XX); +} + +static inline int octeon_has_feature_OCTEON_FEATURE_TSO(void) +{ + return (OCTEON_IS_MODEL(OCTEON_CN73XX) || + OCTEON_IS_MODEL(OCTEON_CNF75XX) || + OCTEON_IS_MODEL(OCTEON_CN78XX_PASS2_X)); +} + +static inline int octeon_has_feature_OCTEON_FEATURE_TDM(void) +{ + return OCTEON_IS_MODEL(OCTEON_CN61XX) || + OCTEON_IS_MODEL(OCTEON_CNF71XX) || + OCTEON_IS_MODEL(OCTEON_CN70XX); +} + +static inline int octeon_has_feature_OCTEON_FEATURE_PTP(void) +{ + return OCTEON_IS_MODEL(OCTEON_CN6XXX) || + OCTEON_IS_MODEL(OCTEON_CNF7XXX) || + OCTEON_IS_MODEL(OCTEON_CN73XX) || + OCTEON_IS_MODEL(OCTEON_CNF75XX) || + OCTEON_IS_MODEL(OCTEON_CN78XX_PASS2_X); +} + +/* + * Answer ``Is the bit for feature set in the bitmap?'' + * @param feature + * @return 1 when the feature is present and 0 otherwise, -1 in case of error. + */ +#define octeon_has_feature(feature_x) octeon_has_feature_##feature_x() + +#endif /* __OCTEON_FEATURE_H__ */ diff --git a/arch/mips/mach-octeon/include/mach/octeon-model.h b/arch/mips/mach-octeon/include/mach/octeon-model.h new file mode 100644 index 0000000000..22d6df6a9e --- /dev/null +++ b/arch/mips/mach-octeon/include/mach/octeon-model.h @@ -0,0 +1,317 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Marvell International Ltd. + */ + +#ifndef __OCTEON_MODEL_H__ +#define __OCTEON_MODEL_H__ + +/* + * NOTE: These must match what is checked in common-config.mk + * Defines to represent the different versions of Octeon. + * + * IMPORTANT: When the default pass is updated for an Octeon Model, + * the corresponding change must also be made in the oct-sim script. + * + * The defines below should be used with the OCTEON_IS_MODEL() macro to + * determine what model of chip the software is running on. Models ending + * in 'XX' match multiple models (families), while specific models match only + * that model. If a pass (revision) is specified, then only that revision + * will be matched. Care should be taken when checking for both specific + * models and families that the specific models are checked for first. + * While these defines are similar to the processor ID, they are not intended + * to be used by anything other that the OCTEON_IS_MODEL framework, and + * the values are subject to change at anytime without notice. + * + * NOTE: only the OCTEON_IS_MODEL() macro/function and the OCTEON_CN* macros + * should be used outside of this file. All other macros are for internal + * use only, and may change without notice. + */ + +#define OCTEON_FAMILY_MASK 0x00ffff00 +#define OCTEON_PRID_MASK 0x00ffffff + +/* Flag bits in top byte */ +/* Ignores revision in model checks */ +#define OM_IGNORE_REVISION 0x01000000 +/* Check submodels */ +#define OM_CHECK_SUBMODEL 0x02000000 +/* Match all models previous than the one specified */ +#define OM_MATCH_PREVIOUS_MODELS 0x04000000 +/* Ignores the minor revison on newer parts */ +#define OM_IGNORE_MINOR_REVISION 0x08000000 +#define OM_FLAG_MASK 0xff000000 + +/* Match all cn5XXX Octeon models. */ +#define OM_MATCH_5XXX_FAMILY_MODELS 0x20000000 +/* Match all cn6XXX Octeon models. */ +#define OM_MATCH_6XXX_FAMILY_MODELS 0x40000000 +/* Match all cnf7XXX Octeon models. */ +#define OM_MATCH_F7XXX_FAMILY_MODELS 0x80000000 +/* Match all cn7XXX Octeon models. */ +#define OM_MATCH_7XXX_FAMILY_MODELS 0x10000000 +#define OM_MATCH_FAMILY_MODELS (OM_MATCH_5XXX_FAMILY_MODELS | \ + OM_MATCH_6XXX_FAMILY_MODELS | \ + OM_MATCH_F7XXX_FAMILY_MODELS | \ + OM_MATCH_7XXX_FAMILY_MODELS) + +/* + * CN7XXX models with new revision encoding + */ + +#define OCTEON_CNF75XX_PASS1_0 0x000d9800 +#define OCTEON_CNF75XX_PASS1_2 0x000d9802 +#define OCTEON_CNF75XX_PASS1_3 0x000d9803 +#define OCTEON_CNF75XX (OCTEON_CNF75XX_PASS1_0 | OM_IGNORE_REVISION) +#define OCTEON_CNF75XX_PASS1_X \ + (OCTEON_CNF75XX_PASS1_0 | OM_IGNORE_MINOR_REVISION) + +#define OCTEON_CN73XX_PASS1_0 0x000d9700 +#define OCTEON_CN73XX_PASS1_1 0x000d9701 +#define OCTEON_CN73XX_PASS1_2 0x000d9702 +#define OCTEON_CN73XX_PASS1_3 0x000d9703 +#define OCTEON_CN73XX (OCTEON_CN73XX_PASS1_0 | OM_IGNORE_REVISION) +#define OCTEON_CN73XX_PASS1_X \ + (OCTEON_CN73XX_PASS1_0 | OM_IGNORE_MINOR_REVISION) + +#define OCTEON_CN72XX OCTEON_CN73XX + +#define OCTEON_CN23XX OCTEON_CN73XX +#define OCTEON_CN23XX_PASS1_2 OCTEON_CN73XX_PASS1_2 +#define OCTEON_CN23XX_PASS1_3 OCTEON_CN73XX_PASS1_3 + +#define OCTEON_CN70XX_PASS1_0 0x000d9600 +#define OCTEON_CN70XX_PASS1_1 0x000d9601 +#define OCTEON_CN70XX_PASS1_2 0x000d9602 + +#define OCTEON_CN70XX_PASS2_0 0x000d9608 + +#define OCTEON_CN70XX (OCTEON_CN70XX_PASS1_0 | OM_IGNORE_REVISION) +#define OCTEON_CN70XX_PASS1_X \ + (OCTEON_CN70XX_PASS1_0 | OM_IGNORE_MINOR_REVISION) +#define OCTEON_CN70XX_PASS2_X \ + (OCTEON_CN70XX_PASS2_0 | OM_IGNORE_MINOR_REVISION) + +#define OCTEON_CN71XX OCTEON_CN70XX + +#define OCTEON_CN78XX_PASS1_0 0x000d9500 +#define OCTEON_CN78XX_PASS1_1 0x000d9501 +#define OCTEON_CN78XX_PASS2_0 0x000d9508 + +#define OCTEON_CN78XX (OCTEON_CN78XX_PASS2_0 | OM_IGNORE_REVISION) +#define OCTEON_CN78XX_PASS1_X \ + (OCTEON_CN78XX_PASS1_0 | OM_IGNORE_MINOR_REVISION) +#define OCTEON_CN78XX_PASS2_X \ + (OCTEON_CN78XX_PASS2_0 | OM_IGNORE_MINOR_REVISION) + +#define OCTEON_CN76XX (0x000d9540 | OM_CHECK_SUBMODEL) + +/* + * CNF7XXX models with new revision encoding + */ +#define OCTEON_CNF71XX_PASS1_0 0x000d9400 +#define OCTEON_CNF71XX_PASS1_1 0x000d9401 + +#define OCTEON_CNF71XX (OCTEON_CNF71XX_PASS1_0 | OM_IGNORE_REVISION) +#define OCTEON_CNF71XX_PASS1_X \ + (OCTEON_CNF71XX_PASS1_0 | OM_IGNORE_MINOR_REVISION) + +/* + * CN6XXX models with new revision encoding + */ +#define OCTEON_CN68XX_PASS1_0 0x000d9100 +#define OCTEON_CN68XX_PASS1_1 0x000d9101 +#define OCTEON_CN68XX_PASS2_0 0x000d9108 +#define OCTEON_CN68XX_PASS2_1 0x000d9109 +#define OCTEON_CN68XX_PASS2_2 0x000d910a + +#define OCTEON_CN68XX (OCTEON_CN68XX_PASS2_0 | OM_IGNORE_REVISION) +#define OCTEON_CN68XX_PASS1_X \ + (OCTEON_CN68XX_PASS1_0 | OM_IGNORE_MINOR_REVISION) +#define OCTEON_CN68XX_PASS2_X \ + (OCTEON_CN68XX_PASS2_0 | OM_IGNORE_MINOR_REVISION) + +#define OCTEON_CN68XX_PASS1 OCTEON_CN68XX_PASS1_X +#define OCTEON_CN68XX_PASS2 OCTEON_CN68XX_PASS2_X + +#define OCTEON_CN66XX_PASS1_0 0x000d9200 +#define OCTEON_CN66XX_PASS1_2 0x000d9202 + +#define OCTEON_CN66XX (OCTEON_CN66XX_PASS1_0 | OM_IGNORE_REVISION) +#define OCTEON_CN66XX_PASS1_X \ + (OCTEON_CN66XX_PASS1_0 | OM_IGNORE_MINOR_REVISION) + +#define OCTEON_CN63XX_PASS1_0 0x000d9000 +#define OCTEON_CN63XX_PASS1_1 0x000d9001 +#define OCTEON_CN63XX_PASS1_2 0x000d9002 +#define OCTEON_CN63XX_PASS2_0 0x000d9008 +#define OCTEON_CN63XX_PASS2_1 0x000d9009 +#define OCTEON_CN63XX_PASS2_2 0x000d900a + +#define OCTEON_CN63XX (OCTEON_CN63XX_PASS2_0 | OM_IGNORE_REVISION) +#define OCTEON_CN63XX_PASS1_X \ + (OCTEON_CN63XX_PASS1_0 | OM_IGNORE_MINOR_REVISION) +#define OCTEON_CN63XX_PASS2_X \ + (OCTEON_CN63XX_PASS2_0 | OM_IGNORE_MINOR_REVISION) + +/* CN62XX is same as CN63XX with 1 MB cache */ +#define OCTEON_CN62XX OCTEON_CN63XX + +#define OCTEON_CN61XX_PASS1_0 0x000d9300 +#define OCTEON_CN61XX_PASS1_1 0x000d9301 + +#define OCTEON_CN61XX (OCTEON_CN61XX_PASS1_0 | OM_IGNORE_REVISION) +#define OCTEON_CN61XX_PASS1_X \ + (OCTEON_CN61XX_PASS1_0 | OM_IGNORE_MINOR_REVISION) + +/* CN60XX is same as CN61XX with 512 KB cache */ +#define OCTEON_CN60XX OCTEON_CN61XX + +/* This matches the complete family of CN3xxx CPUs, and not subsequent models */ +#define OCTEON_CN6XXX \ + (OCTEON_CN63XX_PASS1_0 | OM_MATCH_6XXX_FAMILY_MODELS) +#define OCTEON_CNF7XXX \ + (OCTEON_CNF71XX_PASS1_0 | OM_MATCH_F7XXX_FAMILY_MODELS) +#define OCTEON_CN7XXX \ + (OCTEON_CN78XX_PASS1_0 | OM_MATCH_7XXX_FAMILY_MODELS) + +/* + * The revision byte (low byte) has two different encodings. + * CN3XXX: + * + * bits + * <7:5>: reserved (0) + * <4>: alternate package + * <3:0>: revision + * + * CN5XXX and older models: + * + * bits + * <7>: reserved (0) + * <6>: alternate package + * <5:3>: major revision + * <2:0>: minor revision + */ + +/* Masks used for the various types of model/family/revision matching */ +#define OCTEON_38XX_FAMILY_MASK 0x00ffff00 +#define OCTEON_38XX_FAMILY_REV_MASK 0x00ffff0f +#define OCTEON_38XX_MODEL_MASK 0x00ffff10 +#define OCTEON_38XX_MODEL_REV_MASK \ + (OCTEON_38XX_FAMILY_REV_MASK | OCTEON_38XX_MODEL_MASK) + +/* CN5XXX and later use different layout of bits in the revision ID field */ +#define OCTEON_58XX_FAMILY_MASK OCTEON_38XX_FAMILY_MASK +#define OCTEON_58XX_FAMILY_REV_MASK 0x00ffff3f +#define OCTEON_58XX_MODEL_MASK 0x00ffff40 +#define OCTEON_58XX_MODEL_REV_MASK \ + (OCTEON_58XX_FAMILY_REV_MASK | OCTEON_58XX_MODEL_MASK) +#define OCTEON_58XX_MODEL_MINOR_REV_MASK \ + (OCTEON_58XX_MODEL_REV_MASK & 0x00ffff38) +#define OCTEON_5XXX_MODEL_MASK 0x00ff0fc0 + +#define __OCTEON_MATCH_MASK__(X, Y, Z) \ + ({ \ + typeof(X) x = (X); \ + typeof(Y) y = (Y); \ + typeof(Z) z = (Z); \ + (x & z) == (y & z); \ + }) + +/* + * __OCTEON_IS_MODEL_COMPILE__(arg_model, chip_model) + * returns true if chip_model is identical or belong to the OCTEON + * model group specified in arg_model. + */ + +/* Helper macros to make to following macro compacter */ +#define OM_MASK OM_FLAG_MASK +#define OM_MATCH_MASK __OCTEON_MATCH_MASK__ +#define OM_MATCH_PREVIOUS OM_MATCH_PREVIOUS_MODELS + +#define __OCTEON_IS_MODEL_COMPILE__(A, B) \ + ({ \ + typeof(A) a = (A); \ + typeof(B) b = (B); \ + (((((((a) & OM_MASK) == (OM_IGNORE_REVISION | OM_CHECK_SUBMODEL)) && \ + OM_MATCH_MASK((b), (a), OCTEON_58XX_MODEL_MASK)) || \ + ((((a) & OM_MASK) == 0) && \ + OM_MATCH_MASK((b), (a), OCTEON_58XX_FAMILY_REV_MASK)) || \ + ((((a) & OM_MASK) == OM_IGNORE_MINOR_REVISION) && \ + OM_MATCH_MASK((b), (a), OCTEON_58XX_MODEL_MINOR_REV_MASK)) || \ + ((((a) & OM_MASK) == OM_CHECK_SUBMODEL) && \ + OM_MATCH_MASK((b), (a), OCTEON_58XX_MODEL_MASK)) || \ + ((((a) & OM_MASK) == OM_IGNORE_REVISION) && \ + OM_MATCH_MASK((b), (a), OCTEON_58XX_FAMILY_MASK)) || \ + ((((a) & (OM_MATCH_5XXX_FAMILY_MODELS)) == \ + OM_MATCH_5XXX_FAMILY_MODELS) && \ + ((b & OCTEON_PRID_MASK) < OCTEON_CN63XX_PASS1_0)) || \ + ((((a) & (OM_MATCH_6XXX_FAMILY_MODELS)) == \ + OM_MATCH_6XXX_FAMILY_MODELS) && \ + ((b & OCTEON_PRID_MASK) >= OCTEON_CN63XX_PASS1_0) && \ + ((b & OCTEON_PRID_MASK) < OCTEON_CNF71XX_PASS1_0)) || \ + ((((a) & (OM_MATCH_F7XXX_FAMILY_MODELS)) == \ + OM_MATCH_F7XXX_FAMILY_MODELS) && \ + ((b & OCTEON_PRID_MASK) >= OCTEON_CNF71XX_PASS1_0) && \ + ((b & OCTEON_PRID_MASK) < OCTEON_CN78XX_PASS1_0)) || \ + ((((a) & (OM_MATCH_7XXX_FAMILY_MODELS)) == \ + OM_MATCH_7XXX_FAMILY_MODELS) && ((b & OCTEON_PRID_MASK) >= \ + OCTEON_CN78XX_PASS1_0)) || \ + ((((a) & (OM_MATCH_PREVIOUS)) == OM_MATCH_PREVIOUS) && \ + (((b) & OCTEON_58XX_MODEL_MASK) < ((a) & OCTEON_58XX_MODEL_MASK))) \ + ))); \ + }) + +#ifndef __ASSEMBLY__ + +#ifndef OCTEON_IS_MODEL + +static inline int __octeon_is_model_runtime_internal__(u32 model) +{ + u32 cpuid = read_c0_prid(); + + return __OCTEON_IS_MODEL_COMPILE__(model, cpuid); +} + +static inline int __octeon_is_model_runtime__(u32 model) +{ + return __octeon_is_model_runtime_internal__(model); +} + +/* + * The OCTEON_IS_MODEL macro should be used for all Octeon model checking done + * in a program. + * This should be kept runtime if at all possible and must be conditionalized + * with OCTEON_IS_COMMON_BINARY() if runtime checking support is required. + * + * Use of the macro in preprocessor directives ( #if OCTEON_IS_MODEL(...) ) + * is NOT SUPPORTED, and should be replaced with CVMX_COMPILED_FOR() + * I.e.: + * #if OCTEON_IS_MODEL(OCTEON_CN56XX) -> #if CVMX_COMPILED_FOR(OCTEON_CN56XX) + */ +#define OCTEON_IS_MODEL(x) __octeon_is_model_runtime__(x) +#define OCTEON_IS_COMMON_BINARY() 1 +#undef OCTEON_MODEL +#endif + +#define OCTEON_IS_OCTEON2() \ + (OCTEON_IS_MODEL(OCTEON_CN6XXX) || OCTEON_IS_MODEL(OCTEON_CNF71XX)) + +#define OCTEON_IS_OCTEON3() OCTEON_IS_MODEL(OCTEON_CN7XXX) + +const char *octeon_model_get_string(u32 chip_id); +const char *octeon_model_get_string_buffer(u32 chip_id, char *buffer); + +/** + * Return the octeon family, i.e., ProcessorID of the PrID register. + * + * @return the octeon family on success, ((u32)-1) on error. + */ +static inline u32 cvmx_get_octeon_family(void) +{ + return (read_c0_prid() & OCTEON_FAMILY_MASK); +} + +#endif /* __ASSEMBLY__ */ + +#endif /* __OCTEON_MODEL_H__ */ diff --git a/arch/mips/mach-octeon/include/mach/octeon_ddr.h b/arch/mips/mach-octeon/include/mach/octeon_ddr.h new file mode 100644 index 0000000000..4473be4d44 --- /dev/null +++ b/arch/mips/mach-octeon/include/mach/octeon_ddr.h @@ -0,0 +1,982 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Marvell International Ltd. + */ + +#ifndef __OCTEON_DDR_H_ +#define __OCTEON_DDR_H_ + +#include +#include +#include +#include +#include +#include + +/* Mapping is done starting from 0x11800.80000000 */ +#define CVMX_L2C_CTL 0x00800000 +#define CVMX_L2C_BIG_CTL 0x00800030 +#define CVMX_L2C_TADX_INT(i) (0x00a00028 + (((i) & 7) * 0x40000)) +#define CVMX_L2C_MCIX_INT(i) (0x00c00028 + (((i) & 3) * 0x40000)) + +/* Some "external" (non-LMC) registers */ +#define CVMX_IPD_CLK_COUNT 0x00014F0000000338 +#define CVMX_FPA_CLK_COUNT 0x00012800000000F0 + +#define CVMX_NODE_MEM_SHIFT 40 + +#define DDR_INTERFACE_MAX 4 + +/* Private data struct */ +struct ddr_priv { + void __iomem *lmc_base; + void __iomem *l2c_base; + + bool ddr_clock_initialized[DDR_INTERFACE_MAX]; + bool ddr_memory_preserved; + u32 flags; + + struct ram_info info; +}; + +/* Short cut to convert a number to megabytes */ +#define MB(X) ((u64)(X) * (u64)(1024 * 1024)) + +#define octeon_is_cpuid(x) (__OCTEON_IS_MODEL_COMPILE__(x, read_c0_prid())) + +#define strtoull simple_strtoull + +/* Access LMC registers */ +static inline u64 lmc_rd(struct ddr_priv *priv, u64 addr) +{ + return ioread64(priv->lmc_base + addr); +} + +static inline void lmc_wr(struct ddr_priv *priv, u64 addr, u64 val) +{ + iowrite64(val, priv->lmc_base + addr); +} + +/* Access L2C registers */ +static inline u64 l2c_rd(struct ddr_priv *priv, u64 addr) +{ + return ioread64(priv->l2c_base + addr); +} + +static inline void l2c_wr(struct ddr_priv *priv, u64 addr, u64 val) +{ + iowrite64(val, priv->l2c_base + addr); +} + +/* Access other CSR registers not located inside the LMC address space */ +static inline u64 csr_rd(u64 addr) +{ + void __iomem *base; + + base = ioremap_nocache(addr, 0x100); + return ioread64(base); +} + +static inline void csr_wr(u64 addr, u64 val) +{ + void __iomem *base; + + base = ioremap_nocache(addr, 0x100); + return iowrite64(val, base); +} + +/* "Normal" access, without any offsets and/or mapping */ +static inline u64 cvmx_read64_uint64(u64 addr) +{ + return readq((void *)addr); +} + +static inline void cvmx_write64_uint64(u64 addr, u64 val) +{ + writeq(val, (void *)addr); +} + +/* Failsafe mode */ +#define FLAG_FAILSAFE_MODE 0x01000 +/* Note that the DDR clock initialized flags must be contiguous */ +/* Clock for DDR 0 initialized */ +#define FLAG_DDR0_CLK_INITIALIZED 0x02000 +/* Clock for DDR 1 initialized */ +#define FLAG_DDR1_CLK_INITIALIZED 0x04000 +/* Clock for DDR 2 initialized */ +#define FLAG_DDR2_CLK_INITIALIZED 0x08000 +/* Clock for DDR 3 initialized */ +#define FLAG_DDR3_CLK_INITIALIZED 0x10000 +/* Loaded into RAM externally */ +#define FLAG_RAM_RESIDENT 0x20000 +/* Verbose DDR information */ +#define FLAG_DDR_VERBOSE 0x40000 +/* Check env. for DDR variables */ +#define FLAG_DDR_DEBUG 0x80000 +#define FLAG_DDR_TRACE_INIT 0x100000 +#define FLAG_MEMORY_PRESERVED 0x200000 +#define FLAG_DFM_VERBOSE 0x400000 +#define FLAG_DFM_TRACE_INIT 0x800000 +/* DFM memory clock initialized */ +#define FLAG_DFM_CLK_INITIALIZED 0x1000000 +/* EEPROM clock descr. missing */ +#define FLAG_CLOCK_DESC_MISSING 0x2000000 +/* EEPROM board descr. missing */ +#define FLAG_BOARD_DESC_MISSING 0x4000000 +#define FLAG_DDR_PROMPT 0x8000000 + +#ifndef DDR_NO_DEBUG +static inline int ddr_verbose(struct ddr_priv *priv) +{ + return !!(priv->flags & FLAG_DDR_VERBOSE); +} + +static inline char *ddr_getenv_debug(struct ddr_priv *priv, char *name) +{ + if (priv->flags & FLAG_FAILSAFE_MODE) + return NULL; + + if (priv->flags & FLAG_DDR_DEBUG) + return env_get(name); + + return NULL; +} +#else +static inline int ddr_verbose(void) +{ + return 0; +} +#endif + +/* turn the variable name into a string */ +#define CVMX_TMP_STR(x) CVMX_TMP_STR2(x) +#define CVMX_TMP_STR2(x) #x + +#define CVMX_SYNC asm volatile ("sync" : : : "memory") + +#define CVMX_CACHE(op, address, offset) \ + asm volatile ("cache " CVMX_TMP_STR(op) ", " \ + CVMX_TMP_STR(offset) "(%[rbase])" \ + : : [rbase] "d" (address)) + +/* unlock the state */ +#define CVMX_CACHE_WBIL2(address, offset) \ + CVMX_CACHE(23, address, offset) + +/* complete prefetches, invalidate entire dcache */ +#define CVMX_DCACHE_INVALIDATE \ + { CVMX_SYNC; asm volatile ("cache 9, 0($0)" : : ); } + +/** + * cvmx_l2c_cfg + * + * Specify the RSL base addresses for the block + * + * L2C_CFG = L2C Configuration + * + * Description: + */ +union cvmx_l2c_cfg { + u64 u64; + struct cvmx_l2c_cfg_s { + uint64_t reserved_20_63:44; + uint64_t bstrun:1; + uint64_t lbist:1; + uint64_t xor_bank:1; + uint64_t dpres1:1; + uint64_t dpres0:1; + uint64_t dfill_dis:1; + uint64_t fpexp:4; + uint64_t fpempty:1; + uint64_t fpen:1; + uint64_t idxalias:1; + uint64_t mwf_crd:4; + uint64_t rsp_arb_mode:1; + uint64_t rfb_arb_mode:1; + uint64_t lrf_arb_mode:1; + } s; +}; + +/** + * cvmx_l2c_ctl + * + * L2C_CTL = L2C Control + * + * + * Notes: + * (1) If MAXVAB is != 0, VAB_THRESH should be less than MAXVAB. + * + * (2) L2DFDBE and L2DFSBE allows software to generate L2DSBE, L2DDBE, VBFSBE, + * and VBFDBE errors for the purposes of testing error handling code. When + * one (or both) of these bits are set a PL2 which misses in the L2 will fill + * with the appropriate error in the first 2 OWs of the fill. Software can + * determine which OW pair gets the error by choosing the desired fill order + * (address<6:5>). A PL2 which hits in the L2 will not inject any errors. + * Therefore sending a WBIL2 prior to the PL2 is recommended to make a miss + * likely (if multiple processors are involved software must be careful to be + * sure no other processor or IO device can bring the block into the L2). + * + * To generate a VBFSBE or VBFDBE, software must first get the cache block + * into the cache with an error using a PL2 which misses the L2. Then a + * store partial to a portion of the cache block without the error must + * change the block to dirty. Then, a subsequent WBL2/WBIL2/victim will + * trigger the VBFSBE/VBFDBE error. + */ +union cvmx_l2c_ctl { + u64 u64; + struct cvmx_l2c_ctl_s { + uint64_t reserved_29_63:35; + uint64_t rdf_fast:1; + uint64_t disstgl2i:1; + uint64_t l2dfsbe:1; + uint64_t l2dfdbe:1; + uint64_t discclk:1; + uint64_t maxvab:4; + uint64_t maxlfb:4; + uint64_t rsp_arb_mode:1; + uint64_t xmc_arb_mode:1; + uint64_t reserved_2_13:12; + uint64_t disecc:1; + uint64_t disidxalias:1; + } s; + + struct cvmx_l2c_ctl_cn73xx { + uint64_t reserved_32_63:32; + uint64_t ocla_qos:3; + uint64_t reserved_28_28:1; + uint64_t disstgl2i:1; + uint64_t reserved_25_26:2; + uint64_t discclk:1; + uint64_t reserved_16_23:8; + uint64_t rsp_arb_mode:1; + uint64_t xmc_arb_mode:1; + uint64_t rdf_cnt:8; + uint64_t reserved_4_5:2; + uint64_t disldwb:1; + uint64_t dissblkdty:1; + uint64_t disecc:1; + uint64_t disidxalias:1; + } cn73xx; + + struct cvmx_l2c_ctl_cn73xx cn78xx; +}; + +/** + * cvmx_l2c_big_ctl + * + * L2C_BIG_CTL = L2C Big memory control register + * + * + * Notes: + * (1) BIGRD interrupts can occur during normal operation as the PP's are + * allowed to prefetch to non-existent memory locations. Therefore, + * BIGRD is for informational purposes only. + * + * (2) When HOLEWR/BIGWR blocks a store L2C_VER_ID, L2C_VER_PP, L2C_VER_IOB, + * and L2C_VER_MSC will be loaded just like a store which is blocked by VRTWR. + * Additionally, L2C_ERR_XMC will be loaded. + */ +union cvmx_l2c_big_ctl { + u64 u64; + struct cvmx_l2c_big_ctl_s { + uint64_t reserved_8_63:56; + uint64_t maxdram:4; + uint64_t reserved_0_3:4; + } s; + struct cvmx_l2c_big_ctl_cn61xx { + uint64_t reserved_8_63:56; + uint64_t maxdram:4; + uint64_t reserved_1_3:3; + uint64_t disable:1; + } cn61xx; + struct cvmx_l2c_big_ctl_cn61xx cn63xx; + struct cvmx_l2c_big_ctl_cn61xx cn66xx; + struct cvmx_l2c_big_ctl_cn61xx cn68xx; + struct cvmx_l2c_big_ctl_cn61xx cn68xxp1; + struct cvmx_l2c_big_ctl_cn70xx { + uint64_t reserved_8_63:56; + uint64_t maxdram:4; + uint64_t reserved_1_3:3; + uint64_t disbig:1; + } cn70xx; + struct cvmx_l2c_big_ctl_cn70xx cn70xxp1; + struct cvmx_l2c_big_ctl_cn70xx cn73xx; + struct cvmx_l2c_big_ctl_cn70xx cn78xx; + struct cvmx_l2c_big_ctl_cn70xx cn78xxp1; + struct cvmx_l2c_big_ctl_cn61xx cnf71xx; + struct cvmx_l2c_big_ctl_cn70xx cnf75xx; +}; + +struct rlevel_byte_data { + int delay; + int loop_total; + int loop_count; + int best; + u64 bm; + int bmerrs; + int sqerrs; + int bestsq; +}; + +#define DEBUG_VALIDATE_BITMASK 0 +#if DEBUG_VALIDATE_BITMASK +#define debug_bitmask_print printf +#else +#define debug_bitmask_print(...) +#endif + +#define RLEVEL_BITMASK_TRAILING_BITS_ERROR 5 +// FIXME? now less than TOOLONG +#define RLEVEL_BITMASK_BUBBLE_BITS_ERROR 11 +#define RLEVEL_BITMASK_NARROW_ERROR 6 +#define RLEVEL_BITMASK_BLANK_ERROR 100 +#define RLEVEL_BITMASK_TOOLONG_ERROR 12 +#define RLEVEL_NONSEQUENTIAL_DELAY_ERROR 50 +#define RLEVEL_ADJACENT_DELAY_ERROR 30 + +/* + * Apply a filter to the BITMASK results returned from Octeon + * read-leveling to determine the most likely delay result. This + * computed delay may be used to qualify the delay result returned by + * Octeon. Accumulate an error penalty for invalid characteristics of + * the bitmask so that they can be used to select the most reliable + * results. + * + * The algorithm searches for the largest contiguous MASK within a + * maximum RANGE of bits beginning with the MSB. + * + * 1. a MASK with a WIDTH less than 4 will be penalized + * 2. Bubbles in the bitmask that occur before or after the MASK + * will be penalized + * 3. If there are no trailing bubbles then extra bits that occur + * beyond the maximum RANGE will be penalized. + * + * +++++++++++++++++++++++++++++++++++++++++++++++++++ + * + + + * + e.g. bitmask = 27B00 + + * + + + * + 63 +--- mstart 0 + + * + | | | + + * + | +---------+ +--- fb | + + * + | | range | | | + + * + V V V V V + + * + + + * + 0 0 ... 1 0 0 1 1 1 1 0 1 1 0 0 0 0 0 0 0 0 + + * + + + * + ^ ^ ^ + + * + | | mask| + + * + lb ---+ +-----+ + + * + width + + * + + + * +++++++++++++++++++++++++++++++++++++++++++++++++++ + */ + +struct rlevel_bitmask { + u64 bm; + u8 mstart; + u8 width; + int errs; +}; + +#define MASKRANGE_BITS 6 +#define MASKRANGE ((1 << MASKRANGE_BITS) - 1) + +/* data field addresses in the DDR2 SPD eeprom */ +enum ddr2_spd_addrs { + DDR2_SPD_BYTES_PROGRAMMED = 0, + DDR2_SPD_TOTAL_BYTES = 1, + DDR2_SPD_MEM_TYPE = 2, + DDR2_SPD_NUM_ROW_BITS = 3, + DDR2_SPD_NUM_COL_BITS = 4, + DDR2_SPD_NUM_RANKS = 5, + DDR2_SPD_CYCLE_CLX = 9, + DDR2_SPD_CONFIG_TYPE = 11, + DDR2_SPD_REFRESH = 12, + DDR2_SPD_SDRAM_WIDTH = 13, + DDR2_SPD_BURST_LENGTH = 16, + DDR2_SPD_NUM_BANKS = 17, + DDR2_SPD_CAS_LATENCY = 18, + DDR2_SPD_DIMM_TYPE = 20, + DDR2_SPD_CYCLE_CLX1 = 23, + DDR2_SPD_CYCLE_CLX2 = 25, + DDR2_SPD_TRP = 27, + DDR2_SPD_TRRD = 28, + DDR2_SPD_TRCD = 29, + DDR2_SPD_TRAS = 30, + DDR2_SPD_TWR = 36, + DDR2_SPD_TWTR = 37, + DDR2_SPD_TRFC_EXT = 40, + DDR2_SPD_TRFC = 42, + DDR2_SPD_CHECKSUM = 63, + DDR2_SPD_MFR_ID = 64 +}; + +/* data field addresses in the DDR2 SPD eeprom */ +enum ddr3_spd_addrs { + DDR3_SPD_BYTES_PROGRAMMED = 0, + DDR3_SPD_REVISION = 1, + DDR3_SPD_KEY_BYTE_DEVICE_TYPE = 2, + DDR3_SPD_KEY_BYTE_MODULE_TYPE = 3, + DDR3_SPD_DENSITY_BANKS = 4, + DDR3_SPD_ADDRESSING_ROW_COL_BITS = 5, + DDR3_SPD_NOMINAL_VOLTAGE = 6, + DDR3_SPD_MODULE_ORGANIZATION = 7, + DDR3_SPD_MEMORY_BUS_WIDTH = 8, + DDR3_SPD_FINE_TIMEBASE_DIVIDEND_DIVISOR = 9, + DDR3_SPD_MEDIUM_TIMEBASE_DIVIDEND = 10, + DDR3_SPD_MEDIUM_TIMEBASE_DIVISOR = 11, + DDR3_SPD_MINIMUM_CYCLE_TIME_TCKMIN = 12, + DDR3_SPD_CAS_LATENCIES_LSB = 14, + DDR3_SPD_CAS_LATENCIES_MSB = 15, + DDR3_SPD_MIN_CAS_LATENCY_TAAMIN = 16, + DDR3_SPD_MIN_WRITE_RECOVERY_TWRMIN = 17, + DDR3_SPD_MIN_RAS_CAS_DELAY_TRCDMIN = 18, + DDR3_SPD_MIN_ROW_ACTIVE_DELAY_TRRDMIN = 19, + DDR3_SPD_MIN_ROW_PRECHARGE_DELAY_TRPMIN = 20, + DDR3_SPD_UPPER_NIBBLES_TRAS_TRC = 21, + DDR3_SPD_MIN_ACTIVE_PRECHARGE_LSB_TRASMIN = 22, + DDR3_SPD_MIN_ACTIVE_REFRESH_LSB_TRCMIN = 23, + DDR3_SPD_MIN_REFRESH_RECOVERY_LSB_TRFCMIN = 24, + DDR3_SPD_MIN_REFRESH_RECOVERY_MSB_TRFCMIN = 25, + DDR3_SPD_MIN_INTERNAL_WRITE_READ_CMD_TWTRMIN = 26, + DDR3_SPD_MIN_INTERNAL_READ_PRECHARGE_CMD_TRTPMIN = 27, + DDR3_SPD_UPPER_NIBBLE_TFAW = 28, + DDR3_SPD_MIN_FOUR_ACTIVE_WINDOW_TFAWMIN = 29, + DDR3_SPD_SDRAM_OPTIONAL_FEATURES = 30, + DDR3_SPD_SDRAM_THERMAL_REFRESH_OPTIONS = 31, + DDR3_SPD_MODULE_THERMAL_SENSOR = 32, + DDR3_SPD_SDRAM_DEVICE_TYPE = 33, + DDR3_SPD_MINIMUM_CYCLE_TIME_FINE_TCKMIN = 34, + DDR3_SPD_MIN_CAS_LATENCY_FINE_TAAMIN = 35, + DDR3_SPD_MIN_RAS_CAS_DELAY_FINE_TRCDMIN = 36, + DDR3_SPD_MIN_ROW_PRECHARGE_DELAY_FINE_TRPMIN = 37, + DDR3_SPD_MIN_ACTIVE_REFRESH_LSB_FINE_TRCMIN = 38, + DDR3_SPD_REFERENCE_RAW_CARD = 62, + DDR3_SPD_ADDRESS_MAPPING = 63, + DDR3_SPD_REGISTER_MANUFACTURER_ID_LSB = 65, + DDR3_SPD_REGISTER_MANUFACTURER_ID_MSB = 66, + DDR3_SPD_REGISTER_REVISION_NUMBER = 67, + DDR3_SPD_MODULE_SERIAL_NUMBER = 122, + DDR3_SPD_CYCLICAL_REDUNDANCY_CODE_LOWER_NIBBLE = 126, + DDR3_SPD_CYCLICAL_REDUNDANCY_CODE_UPPER_NIBBLE = 127, + DDR3_SPD_MODULE_PART_NUMBER = 128 +}; + +/* data field addresses in the DDR4 SPD eeprom */ +enum ddr4_spd_addrs { + DDR4_SPD_BYTES_PROGRAMMED = 0, + DDR4_SPD_REVISION = 1, + DDR4_SPD_KEY_BYTE_DEVICE_TYPE = 2, + DDR4_SPD_KEY_BYTE_MODULE_TYPE = 3, + DDR4_SPD_DENSITY_BANKS = 4, + DDR4_SPD_ADDRESSING_ROW_COL_BITS = 5, + DDR4_SPD_PACKAGE_TYPE = 6, + DDR4_SPD_OPTIONAL_FEATURES = 7, + DDR4_SPD_THERMAL_REFRESH_OPTIONS = 8, + DDR4_SPD_OTHER_OPTIONAL_FEATURES = 9, + DDR4_SPD_SECONDARY_PACKAGE_TYPE = 10, + DDR4_SPD_MODULE_NOMINAL_VOLTAGE = 11, + DDR4_SPD_MODULE_ORGANIZATION = 12, + DDR4_SPD_MODULE_MEMORY_BUS_WIDTH = 13, + DDR4_SPD_MODULE_THERMAL_SENSOR = 14, + DDR4_SPD_RESERVED_BYTE15 = 15, + DDR4_SPD_RESERVED_BYTE16 = 16, + DDR4_SPD_TIMEBASES = 17, + DDR4_SPD_MINIMUM_CYCLE_TIME_TCKAVGMIN = 18, + DDR4_SPD_MAXIMUM_CYCLE_TIME_TCKAVGMAX = 19, + DDR4_SPD_CAS_LATENCIES_BYTE0 = 20, + DDR4_SPD_CAS_LATENCIES_BYTE1 = 21, + DDR4_SPD_CAS_LATENCIES_BYTE2 = 22, + DDR4_SPD_CAS_LATENCIES_BYTE3 = 23, + DDR4_SPD_MIN_CAS_LATENCY_TAAMIN = 24, + DDR4_SPD_MIN_RAS_CAS_DELAY_TRCDMIN = 25, + DDR4_SPD_MIN_ROW_PRECHARGE_DELAY_TRPMIN = 26, + DDR4_SPD_UPPER_NIBBLES_TRAS_TRC = 27, + DDR4_SPD_MIN_ACTIVE_PRECHARGE_LSB_TRASMIN = 28, + DDR4_SPD_MIN_ACTIVE_REFRESH_LSB_TRCMIN = 29, + DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC1MIN = 30, + DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC1MIN = 31, + DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC2MIN = 32, + DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC2MIN = 33, + DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC4MIN = 34, + DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC4MIN = 35, + DDR4_SPD_MIN_FOUR_ACTIVE_WINDOW_MSN_TFAWMIN = 36, + DDR4_SPD_MIN_FOUR_ACTIVE_WINDOW_LSB_TFAWMIN = 37, + DDR4_SPD_MIN_ROW_ACTIVE_DELAY_SAME_TRRD_SMIN = 38, + DDR4_SPD_MIN_ROW_ACTIVE_DELAY_DIFF_TRRD_LMIN = 39, + DDR4_SPD_MIN_CAS_TO_CAS_DELAY_TCCD_LMIN = 40, + DDR4_SPD_MIN_CAS_TO_CAS_DELAY_FINE_TCCD_LMIN = 117, + DDR4_SPD_MIN_ACT_TO_ACT_DELAY_SAME_FINE_TRRD_LMIN = 118, + DDR4_SPD_MIN_ACT_TO_ACT_DELAY_DIFF_FINE_TRRD_SMIN = 119, + DDR4_SPD_MIN_ACT_TO_ACT_REFRESH_DELAY_FINE_TRCMIN = 120, + DDR4_SPD_MIN_ROW_PRECHARGE_DELAY_FINE_TRPMIN = 121, + DDR4_SPD_MIN_RAS_TO_CAS_DELAY_FINE_TRCDMIN = 122, + DDR4_SPD_MIN_CAS_LATENCY_FINE_TAAMIN = 123, + DDR4_SPD_MAX_CYCLE_TIME_FINE_TCKAVGMAX = 124, + DDR4_SPD_MIN_CYCLE_TIME_FINE_TCKAVGMIN = 125, + DDR4_SPD_CYCLICAL_REDUNDANCY_CODE_LOWER_NIBBLE = 126, + DDR4_SPD_CYCLICAL_REDUNDANCY_CODE_UPPER_NIBBLE = 127, + DDR4_SPD_REFERENCE_RAW_CARD = 130, + DDR4_SPD_UDIMM_ADDR_MAPPING_FROM_EDGE = 131, + DDR4_SPD_REGISTER_MANUFACTURER_ID_LSB = 133, + DDR4_SPD_REGISTER_MANUFACTURER_ID_MSB = 134, + DDR4_SPD_REGISTER_REVISION_NUMBER = 135, + DDR4_SPD_RDIMM_ADDR_MAPPING_FROM_REGISTER_TO_DRAM = 136, + DDR4_SPD_RDIMM_REGISTER_DRIVE_STRENGTH_CTL = 137, + DDR4_SPD_RDIMM_REGISTER_DRIVE_STRENGTH_CK = 138, +}; + +#define SPD_EEPROM_SIZE (DDR4_SPD_RDIMM_REGISTER_DRIVE_STRENGTH_CK + 1) + +struct impedence_values { + unsigned char *rodt_ohms; + unsigned char *rtt_nom_ohms; + unsigned char *rtt_nom_table; + unsigned char *rtt_wr_ohms; + unsigned char *dic_ohms; + short *drive_strength; + short *dqx_strength; +}; + +#define RODT_OHMS_COUNT 8 +#define RTT_NOM_OHMS_COUNT 8 +#define RTT_NOM_TABLE_COUNT 8 +#define RTT_WR_OHMS_COUNT 8 +#define DIC_OHMS_COUNT 3 +#define DRIVE_STRENGTH_COUNT 15 + +/* + * Structure that provides DIMM information, either in the form of an SPD + * TWSI address, or a pointer to an array that contains SPD data. One of + * the two fields must be valid. + */ +struct dimm_config { + u16 spd_addrs[2]; /* TWSI address of SPD, 0 if not used */ + u8 *spd_ptrs[2]; /* pointer to SPD data array, NULL if not used */ + int spd_cached[2]; + u8 spd_data[2][SPD_EEPROM_SIZE]; +}; + +struct dimm_odt_config { + u8 odt_ena; /* FIX: dqx_ctl for Octeon 3 DDR4 */ + u64 odt_mask; /* FIX: wodt_mask for Octeon 3 */ + union cvmx_lmcx_modereg_params1 modereg_params1; + union cvmx_lmcx_modereg_params2 modereg_params2; + u8 qs_dic; /* FIX: rodt_ctl for Octeon 3 */ + u64 rodt_ctl; /* FIX: rodt_mask for Octeon 3 */ + u8 dic; +}; + +struct ddr_delay_config { + u32 ddr_board_delay; + u8 lmc_delay_clk; + u8 lmc_delay_cmd; + u8 lmc_delay_dq; +}; + +/* + * The parameters below make up the custom_lmc_config data structure. + * This structure is used to customize the way that the LMC DRAM + * Controller is configured for a particular board design. + * + * The HRM describes LMC Read Leveling which supports automatic + * selection of per byte-lane delays. When measuring the read delays + * the LMC configuration software sweeps through a range of settings + * for LMC0_COMP_CTL2[RODT_CTL], the Octeon II on-die-termination + * resistance and LMC0_MODEREG_PARAMS1[RTT_NOM_XX], the DRAM + * on-die-termination resistance. The minimum and maximum parameters + * for rtt_nom_idx and rodt_ctl listed below determine the ranges of + * ODT settings used for the measurements. Note that for rtt_nom an + * index is used into a sorted table rather than the direct csr setting + * in order to optimize the sweep. + * + * .min_rtt_nom_idx: 1=120ohms, 2=60ohms, 3=40ohms, 4=30ohms, 5=20ohms + * .max_rtt_nom_idx: 1=120ohms, 2=60ohms, 3=40ohms, 4=30ohms, 5=20ohms + * .min_rodt_ctl: 1=20ohms, 2=30ohms, 3=40ohms, 4=60ohms, 5=120ohms + * .max_rodt_ctl: 1=20ohms, 2=30ohms, 3=40ohms, 4=60ohms, 5=120ohms + * + * The settings below control the Octeon II drive strength for the CK, + * ADD/CMD, and DQ/DQS signals. 1=24ohms, 2=26.67ohms, 3=30ohms, + * 4=34.3ohms, 5=40ohms, 6=48ohms, 6=60ohms. + * + * .dqx_ctl: Drive strength control for DDR_DQX/DDR_DQS_X_P/N drivers. + * .ck_ctl: Drive strength control for + * DDR_CK_X_P/DDR_DIMMX_CSX_L/DDR_DIMMX_ODT_X drivers. + * .cmd_ctl: Drive strength control for CMD/A/RESET_L/CKEX drivers. + * + * The LMC controller software selects the most optimal CAS Latency + * that complies with the appropriate SPD values and the frequency + * that the DRAMS are being operated. When operating the DRAMs at + * frequencies substantially lower than their rated frequencies it + * might be necessary to limit the minimum CAS Latency the LMC + * controller software is allowed to select in order to make the DRAM + * work reliably. + * + * .min_cas_latency: Minimum allowed CAS Latency + * + * The value used for LMC0_RLEVEL_CTL[OFFSET_EN] determine how the + * read-leveling information that the Octeon II gathers is interpreted + * to determine the per-byte read delays. + * + * .offset_en: Value used for LMC0_RLEVEL_CTL[OFFSET_EN]. + * .offset_udimm: Value used for LMC0_RLEVEL_CTL[OFFSET] for UDIMMS. + * .offset_rdimm: Value used for LMC0_RLEVEL_CTL[OFFSET] for RDIMMS. + * + * The LMC configuration software sweeps through a range of ODT + * settings while measuring the per-byte read delays. During those + * measurements the software makes an assessment of the quality of the + * measurements in order to determine which measurements provide the + * most accurate delays. The automatic settings provide the option to + * allow that same assessment to determine the most optimal RODT_CTL + * and/or RTT_NOM settings. + * + * The automatic approach might provide the best means to determine + * the settings used for initial poweron of a new design. However, + * the final settings should be determined by board analysis, testing, + * and experience. + * + * .ddr_rtt_nom_auto: 1 means automatically set RTT_NOM value. + * .ddr_rodt_ctl_auto: 1 means automatically set RODT_CTL value. + * + * .rlevel_compute: Enables software interpretation of per-byte read + * delays using the measurements collected by the + * Octeon II rather than completely relying on the + * Octeon II to determine the delays. 1=software + * computation is recomended since a more complete + * analysis is implemented in software. + * + * .rlevel_comp_offset: Set to 2 unless instructed differently by Cavium. + * + * .rlevel_average_loops: Determines the number of times the read-leveling + * sequence is run for each rank. The results is + * then averaged across the number of loops. The + * default setting is 1. + * + * .ddr2t_udimm: + * .ddr2t_rdimm: Turn on the DDR 2T mode. 2-cycle window for CMD and + * address. This mode helps relieve setup time pressure + * on the address and command bus. Please refer to + * Micron's tech note tn_47_01 titled DDR2-533 Memory + * Design Guide for Two Dimm Unbuffered Systems for + * physical details. + * + * .disable_sequential_delay_check: As result of the flyby topology + * prescribed in the JEDEC specifications the byte delays should + * maintain a consistent increasing or decreasing trend across + * the bytes on standard dimms. This setting can be used disable + * that check for unusual circumstances where the check is not + * useful. + * + * .maximum_adjacent_rlevel_delay_increment: An additional sequential + * delay check for the delays that result from the flyby + * topology. This value specifies the maximum difference between + * the delays of adjacent bytes. A value of 0 disables this + * check. + * + * .fprch2 Front Porch Enable: When set, the turn-off + * time for the default DDR_DQ/DQS drivers is FPRCH2 CKs earlier. + * 00 = 0 CKs + * 01 = 1 CKs + * 10 = 2 CKs + * + * .parity: The parity input signal PAR_IN on each dimm must be + * strapped high or low on the board. This bit is programmed + * into LMC0_DIMM_CTL[PARITY] and it must be set to match the + * board strapping. This signal is typically strapped low. + * + * .mode32b: Enable 32-bit datapath mode. Set to 1 if only 32 DQ pins + * are used. (cn61xx, cn71xx) + * + * .measured_vref: Set to 1 to measure VREF; set to 0 to compute VREF. + * + * .dram_connection: Set to 1 if discrete DRAMs; set to 0 if using DIMMs. + * This changes the algorithms used to compute VREF. + * + * .dll_write_offset: FIXME: Add description + * .dll_read_offset: FIXME: Add description + */ + +struct rlevel_table { + const char part[20]; + int speed; + u64 rl_rank[4][4]; +}; + +struct ddr3_custom_config { + u8 min_rtt_nom_idx; + u8 max_rtt_nom_idx; + u8 min_rodt_ctl; + u8 max_rodt_ctl; + u8 dqx_ctl; + u8 ck_ctl; + u8 cmd_ctl; + u8 ctl_ctl; + u8 min_cas_latency; + u8 offset_en; + u8 offset_udimm; + u8 offset_rdimm; + u8 rlevel_compute; + u8 ddr_rtt_nom_auto; + u8 ddr_rodt_ctl_auto; + u8 rlevel_comp_offset_udimm; + u8 rlevel_comp_offset_rdimm; + int8_t ptune_offset; + int8_t ntune_offset; + u8 rlevel_average_loops; + u8 ddr2t_udimm; + u8 ddr2t_rdimm; + u8 disable_sequential_delay_check; + u8 maximum_adjacent_rlevel_delay_increment; + u8 parity; + u8 fprch2; + u8 mode32b; + u8 measured_vref; + u8 dram_connection; + const int8_t *dll_write_offset; + const int8_t *dll_read_offset; + struct rlevel_table *rl_tbl; +}; + +#define DDR_CFG_T_MAX_DIMMS 5 + +struct ddr_conf { + struct dimm_config dimm_config_table[DDR_CFG_T_MAX_DIMMS]; + struct dimm_odt_config odt_1rank_config[4]; + struct dimm_odt_config odt_2rank_config[4]; + struct dimm_odt_config odt_4rank_config[4]; + struct ddr_delay_config unbuffered; + struct ddr_delay_config registered; + struct ddr3_custom_config custom_lmc_config; +}; + +/* Divide and round results to the nearest integer. */ +static inline u64 divide_nint(u64 dividend, u64 divisor) +{ + u64 quotent, remainder; + + quotent = dividend / divisor; + remainder = dividend % divisor; + return (quotent + ((remainder * 2) >= divisor)); +} + +/* Divide and round results up to the next higher integer. */ +static inline u64 divide_roundup(u64 dividend, u64 divisor) +{ + return ((dividend + divisor - 1) / divisor); +} + +enum ddr_type { + DDR3_DRAM = 3, + DDR4_DRAM = 4, +}; + +#define rttnom_none 0 /* Rtt_Nom disabled */ +#define rttnom_60ohm 1 /* RZQ/4 = 240/4 = 60 ohms */ +#define rttnom_120ohm 2 /* RZQ/2 = 240/2 = 120 ohms */ +#define rttnom_40ohm 3 /* RZQ/6 = 240/6 = 40 ohms */ +#define rttnom_20ohm 4 /* RZQ/12 = 240/12 = 20 ohms */ +#define rttnom_30ohm 5 /* RZQ/8 = 240/8 = 30 ohms */ +#define rttnom_rsrv1 6 /* Reserved */ +#define rttnom_rsrv2 7 /* Reserved */ + +#define rttwr_none 0 /* Dynamic ODT off */ +#define rttwr_60ohm 1 /* RZQ/4 = 240/4 = 60 ohms */ +#define rttwr_120ohm 2 /* RZQ/2 = 240/2 = 120 ohms */ +#define rttwr_rsrv1 3 /* Reserved */ + +#define dic_40ohm 0 /* RZQ/6 = 240/6 = 40 ohms */ +#define dic_34ohm 1 /* RZQ/7 = 240/7 = 34 ohms */ + +#define driver_24_ohm 1 +#define driver_27_ohm 2 +#define driver_30_ohm 3 +#define driver_34_ohm 4 +#define driver_40_ohm 5 +#define driver_48_ohm 6 +#define driver_60_ohm 7 + +#define rodt_ctl_none 0 +#define rodt_ctl_20_ohm 1 +#define rodt_ctl_30_ohm 2 +#define rodt_ctl_40_ohm 3 +#define rodt_ctl_60_ohm 4 +#define rodt_ctl_120_ohm 5 + +#define ddr4_rttnom_none 0 /* Rtt_Nom disabled */ +#define ddr4_rttnom_60ohm 1 /* RZQ/4 = 240/4 = 60 ohms */ +#define ddr4_rttnom_120ohm 2 /* RZQ/2 = 240/2 = 120 ohms */ +#define ddr4_rttnom_40ohm 3 /* RZQ/6 = 240/6 = 40 ohms */ +#define ddr4_rttnom_240ohm 4 /* RZQ/1 = 240/1 = 240 ohms */ +#define ddr4_rttnom_48ohm 5 /* RZQ/5 = 240/5 = 48 ohms */ +#define ddr4_rttnom_80ohm 6 /* RZQ/3 = 240/3 = 80 ohms */ +#define ddr4_rttnom_34ohm 7 /* RZQ/7 = 240/7 = 34 ohms */ + +#define ddr4_rttwr_none 0 /* Dynamic ODT off */ +#define ddr4_rttwr_120ohm 1 /* RZQ/2 = 240/2 = 120 ohms */ +#define ddr4_rttwr_240ohm 2 /* RZQ/1 = 240/1 = 240 ohms */ +#define ddr4_rttwr_hiz 3 /* HiZ */ +/* This setting is available for cn78xx pass 2, and cn73xx & cnf75xx pass 1 */ +#define ddr4_rttwr_80ohm 4 /* RZQ/3 = 240/3 = 80 ohms */ + +#define ddr4_dic_34ohm 0 /* RZQ/7 = 240/7 = 34 ohms */ +#define ddr4_dic_48ohm 1 /* RZQ/5 = 240/5 = 48 ohms */ + +#define ddr4_rttpark_none 0 /* Rtt_Park disabled */ +#define ddr4_rttpark_60ohm 1 /* RZQ/4 = 240/4 = 60 ohms */ +#define ddr4_rttpark_120ohm 2 /* RZQ/2 = 240/2 = 120 ohms */ +#define ddr4_rttpark_40ohm 3 /* RZQ/6 = 240/6 = 40 ohms */ +#define ddr4_rttpark_240ohm 4 /* RZQ/1 = 240/1 = 240 ohms */ +#define ddr4_rttpark_48ohm 5 /* RZQ/5 = 240/5 = 48 ohms */ +#define ddr4_rttpark_80ohm 6 /* RZQ/3 = 240/3 = 80 ohms */ +#define ddr4_rttpark_34ohm 7 /* RZQ/7 = 240/7 = 34 ohms */ + +#define ddr4_driver_26_ohm 2 +#define ddr4_driver_30_ohm 3 +#define ddr4_driver_34_ohm 4 +#define ddr4_driver_40_ohm 5 +#define ddr4_driver_48_ohm 6 + +#define ddr4_dqx_driver_24_ohm 1 +#define ddr4_dqx_driver_27_ohm 2 +#define ddr4_dqx_driver_30_ohm 3 +#define ddr4_dqx_driver_34_ohm 4 +#define ddr4_dqx_driver_40_ohm 5 +#define ddr4_dqx_driver_48_ohm 6 +#define ddr4_dqx_driver_60_ohm 7 + +#define ddr4_rodt_ctl_none 0 +#define ddr4_rodt_ctl_40_ohm 1 +#define ddr4_rodt_ctl_60_ohm 2 +#define ddr4_rodt_ctl_80_ohm 3 +#define ddr4_rodt_ctl_120_ohm 4 +#define ddr4_rodt_ctl_240_ohm 5 +#define ddr4_rodt_ctl_34_ohm 6 +#define ddr4_rodt_ctl_48_ohm 7 + +#define DIMM_CONFIG_TERMINATOR { {0, 0}, {NULL, NULL} } + +#define SET_DDR_DLL_CTL3(field, expr) \ + do { \ + if (octeon_is_cpuid(OCTEON_CN66XX) || \ + octeon_is_cpuid(OCTEON_CN63XX)) \ + ddr_dll_ctl3.cn63xx.field = (expr); \ + else if (octeon_is_cpuid(OCTEON_CN68XX) || \ + octeon_is_cpuid(OCTEON_CN61XX) || \ + octeon_is_cpuid(OCTEON_CNF71XX)) \ + ddr_dll_ctl3.cn61xx.field = (expr); \ + else if (octeon_is_cpuid(OCTEON_CN70XX) || \ + octeon_is_cpuid(OCTEON_CN78XX)) \ + ddr_dll_ctl3.cn70xx.field = (expr); \ + else if (octeon_is_cpuid(OCTEON_CN73XX) || \ + octeon_is_cpuid(OCTEON_CNF75XX)) \ + ddr_dll_ctl3.cn73xx.field = (expr); \ + else \ + debug("%s(): " #field \ + "not set for unknown chip\n", \ + __func__); \ + } while (0) + +#define ENCODE_DLL90_BYTE_SEL(byte_sel) \ + (octeon_is_cpuid(OCTEON_CN70XX) ? ((9 + 7 - (byte_sel)) % 9) : \ + ((byte_sel) + 1)) + +/** + * If debugging is disabled the ddr_print macro is not compatible + * with this macro. + */ +# define GET_DDR_DLL_CTL3(field) \ + ((octeon_is_cpuid(OCTEON_CN66XX) || \ + octeon_is_cpuid(OCTEON_CN63XX)) ? \ + ddr_dll_ctl3.cn63xx.field : \ + (octeon_is_cpuid(OCTEON_CN68XX) || \ + octeon_is_cpuid(OCTEON_CN61XX) || \ + octeon_is_cpuid(OCTEON_CNF71XX)) ? \ + ddr_dll_ctl3.cn61xx.field : \ + (octeon_is_cpuid(OCTEON_CN70XX) || \ + octeon_is_cpuid(OCTEON_CN78XX)) ? \ + ddr_dll_ctl3.cn70xx.field : \ + (octeon_is_cpuid(OCTEON_CN73XX) || \ + octeon_is_cpuid(OCTEON_CNF75XX)) ? \ + ddr_dll_ctl3.cn73xx.field : 0) + +extern const char *ddr3_dimm_types[]; +extern const char *ddr4_dimm_types[]; + +extern const struct dimm_odt_config disable_odt_config[]; + +#define RLEVEL_BYTE_BITS 6 +#define RLEVEL_BYTE_MSK ((1ULL << 6) - 1) + +/* Prototypes */ +int get_ddr_type(struct dimm_config *dimm_config, int upper_dimm); +int get_dimm_module_type(struct dimm_config *dimm_config, int upper_dimm, + int ddr_type); +int read_spd(struct dimm_config *dimm_config, int dimm_index, int spd_field); +int read_spd_init(struct dimm_config *dimm_config, int dimm_index); +void report_dimm(struct dimm_config *dimm_config, int upper_dimm, + int dimm, int if_num); +int validate_dimm(struct ddr_priv *priv, struct dimm_config *dimm_config, + int dimm_index); +char *printable_rank_spec(char *buffer, int num_ranks, int dram_width, + int spd_package); + +bool ddr_memory_preserved(struct ddr_priv *priv); + +int get_wl_rank(union cvmx_lmcx_wlevel_rankx *lmc_wlevel_rank, int byte); +int get_rl_rank(union cvmx_lmcx_rlevel_rankx *lmc_rlevel_rank, int byte); +void upd_wl_rank(union cvmx_lmcx_wlevel_rankx *lmc_wlevel_rank, int byte, + int delay); +void upd_rl_rank(union cvmx_lmcx_rlevel_rankx *lmc_rlevel_rank, int byte, + int delay); + +int compute_ddr3_rlevel_delay(u8 mstart, u8 width, + union cvmx_lmcx_rlevel_ctl rlevel_ctl); + +int encode_row_lsb_ddr3(int row_lsb); +int encode_pbank_lsb_ddr3(int pbank_lsb); + +int initialize_ddr_clock(struct ddr_priv *priv, struct ddr_conf *ddr_conf, + u32 cpu_hertz, u32 ddr_hertz, u32 ddr_ref_hertz, + int if_num, u32 if_mask); + +void process_custom_dll_offsets(struct ddr_priv *priv, int if_num, + const char *enable_str, + const int8_t *offsets, const char *byte_str, + int mode); +int nonseq_del(struct rlevel_byte_data *rlevel_byte, int start, int end, + int max_adj_delay_inc); +int roundup_ddr3_wlevel_bitmask(int bitmask); + +void oct3_ddr3_seq(struct ddr_priv *priv, int rank_mask, int if_num, + int sequence); +void ddr_init_seq(struct ddr_priv *priv, int rank_mask, int if_num); + +void rlevel_to_wlevel(union cvmx_lmcx_rlevel_rankx *lmc_rlevel_rank, + union cvmx_lmcx_wlevel_rankx *lmc_wlevel_rank, int byte); + +int validate_ddr3_rlevel_bitmask(struct rlevel_bitmask *rlevel_bitmask_p, + int ddr_type); + +void change_dll_offset_enable(struct ddr_priv *priv, int if_num, int change); +unsigned short load_dll_offset(struct ddr_priv *priv, int if_num, + int dll_offset_mode, + int byte_offset, int byte); + +u64 lmc_ddr3_rl_dbg_read(struct ddr_priv *priv, int if_num, int idx); +u64 lmc_ddr3_wl_dbg_read(struct ddr_priv *priv, int if_num, int idx); + +void cvmx_maybe_tune_node(struct ddr_priv *priv, u32 ddr_speed); +void cvmx_dbi_switchover(struct ddr_priv *priv); + +int init_octeon3_ddr3_interface(struct ddr_priv *priv, + struct ddr_conf *ddr_conf, + u32 ddr_hertz, u32 cpu_hertz, u32 ddr_ref_hertz, + int if_num, u32 if_mask); + +char *lookup_env(struct ddr_priv *priv, const char *format, ...); +char *lookup_env_ull(struct ddr_priv *priv, const char *format, ...); + +/* Each board provides a board-specific config table via this function */ +struct ddr_conf *octeon_ddr_conf_table_get(int *count, int *def_ddr_freq); + +#endif /* __OCTEON_DDR_H_ */ diff --git a/arch/mips/mach-octeon/include/mangle-port.h b/arch/mips/mach-octeon/include/mangle-port.h new file mode 100644 index 0000000000..7e95dcef5a --- /dev/null +++ b/arch/mips/mach-octeon/include/mangle-port.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2003, 2004 Ralf Baechle + */ + +#ifndef __ASM_MACH_GENERIC_MANGLE_PORT_H +#define __ASM_MACH_GENERIC_MANGLE_PORT_H + +#include + +#ifdef __BIG_ENDIAN + +static inline bool __should_swizzle_bits(volatile void *a) +{ + extern const bool octeon_should_swizzle_table[]; + u64 did = ((u64)(uintptr_t)a >> 40) & 0xff; + + return octeon_should_swizzle_table[did]; +} + +# define __swizzle_addr_b(port) (port) +# define __swizzle_addr_w(port) (port) +# define __swizzle_addr_l(port) (port) +# define __swizzle_addr_q(port) (port) + +#else /* __LITTLE_ENDIAN */ + +#define __should_swizzle_bits(a) false + +static inline bool __should_swizzle_addr(u64 p) +{ + /* boot bus? */ + return ((p >> 40) & 0xff) == 0; +} + +# define __swizzle_addr_b(port) \ + (__should_swizzle_addr(port) ? (port) ^ 7 : (port)) +# define __swizzle_addr_w(port) \ + (__should_swizzle_addr(port) ? (port) ^ 6 : (port)) +# define __swizzle_addr_l(port) \ + (__should_swizzle_addr(port) ? (port) ^ 4 : (port)) +# define __swizzle_addr_q(port) (port) + +#endif /* __BIG_ENDIAN */ + + +# define ioswabb(a, x) (x) +# define __mem_ioswabb(a, x) (x) +# define ioswabw(a, x) (__should_swizzle_bits(a) ? le16_to_cpu(x) : x) +# define __mem_ioswabw(a, x) (x) +# define ioswabl(a, x) (__should_swizzle_bits(a) ? le32_to_cpu(x) : x) +# define __mem_ioswabl(a, x) (x) +# define ioswabq(a, x) (__should_swizzle_bits(a) ? le64_to_cpu(x) : x) +# define __mem_ioswabq(a, x) (x) + +#endif /* __ASM_MACH_GENERIC_MANGLE_PORT_H */ diff --git a/arch/mips/mach-octeon/lowlevel_init.S b/arch/mips/mach-octeon/lowlevel_init.S index fa87cb4e34..56d1d2261e 100644 --- a/arch/mips/mach-octeon/lowlevel_init.S +++ b/arch/mips/mach-octeon/lowlevel_init.S @@ -10,10 +10,36 @@ #include #include #include +#include + +#define COP0_CVMCTL_REG $9,7 /* Cavium control */ +#define COP0_CVMMEMCTL_REG $11,7 /* Cavium memory control */ +#define COP0_PROC_ID_REG $15,0 .set noreorder LEAF(lowlevel_init) + + /* Set LMEMSZ in CVMMEMCTL register */ + dmfc0 a0, COP0_CVMMEMCTL_REG + dins a0, zero, 0, 9 + mfc0 a4, COP0_PROC_ID_REG + li a5, OCTEON_CN63XX_PASS1_0 /* Octeon cn63xx pass1 chip id */ + bgt a5, a4, 2f + ori a0, 0x104 /* setup 4 lines of scratch */ + ori a6, a5, 8 /* Octeon cn63xx pass2 chip id */ + bge a4, a6, 2f + nop + li a6, 4 + ins a0, a6, 11, 4 /* Set WBTHRESH=4 as per Core-14752 errata */ +2: + dmtc0 a0, COP0_CVMMEMCTL_REG + + /* Set REPUN bit in CVMCTL register */ + dmfc0 a0, COP0_CVMCTL_REG + ori a0, 1<<14 /* enable fixup of unaligned mem access */ + dmtc0 a0, COP0_CVMCTL_REG + jr ra nop END(lowlevel_init) @@ -67,3 +93,53 @@ __dummy: nop END(mips_mach_early_init) + +LEAF(nmi_bootvector) + + /* + * From Marvell original bootvector setup + */ + mfc0 k0, CP0_STATUS + /* Enable 64-bit addressing, set ERL (should already be set) */ + ori k0, 0x84 + mtc0 k0, CP0_STATUS + /* Core-14345, clear L1 Dcache virtual tags if the core hit an NMI */ + cache 17, 0($0) + + /* + * Needed for Linux kernel booting, otherwise it hangs while + * zero'ing all of CVMSEG + */ + dmfc0 a0, COP0_CVMMEMCTL_REG + dins a0, zero, 0, 9 + ori a0, 0x104 /* setup 4 lines of scratch */ + dmtc0 a0, COP0_CVMMEMCTL_REG + + /* + * Load parameters and entry point + */ + PTR_LA t9, nmi_handler_para + sync + + ld s0, 0x00(t9) + ld a0, 0x08(t9) + ld a1, 0x10(t9) + ld a2, 0x18(t9) + ld a3, 0x20(t9) + + /* Finally jump to entry point (start kernel etc) */ + j s0 + nop + + END(nmi_bootvector) + + /* + * Add here some space for the NMI parameters (entry point and args) + */ + .globl nmi_handler_para +nmi_handler_para: + .dword 0 // entry-point + .dword 0 // arg0 + .dword 0 // arg1 + .dword 0 // arg2 + .dword 0 // arg3 diff --git a/board/Marvell/octeon_ebb7304/board.c b/board/Marvell/octeon_ebb7304/board.c index 56e50a9063..611b18fa6a 100644 --- a/board/Marvell/octeon_ebb7304/board.c +++ b/board/Marvell/octeon_ebb7304/board.c @@ -3,7 +3,24 @@ * Copyright (C) 2020 Stefan Roese */ -/* - * Nothing included right now. Code will be added in follow-up - * patches. - */ +#include +#include +#include + +#include + +#include "board_ddr.h" + +#define EBB7304_DEF_DRAM_FREQ 800 + +static struct ddr_conf board_ddr_conf[] = { + OCTEON_EBB7304_DDR_CONFIGURATION +}; + +struct ddr_conf *octeon_ddr_conf_table_get(int *count, int *def_ddr_freq) +{ + *count = ARRAY_SIZE(board_ddr_conf); + *def_ddr_freq = EBB7304_DEF_DRAM_FREQ; + + return board_ddr_conf; +} diff --git a/board/Marvell/octeon_ebb7304/board_ddr.h b/board/Marvell/octeon_ebb7304/board_ddr.h new file mode 100644 index 0000000000..f2f3419e5b --- /dev/null +++ b/board/Marvell/octeon_ebb7304/board_ddr.h @@ -0,0 +1,447 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Marvell International Ltd. + * + * https://spdx.org/licenses + */ + +#ifndef __BOARD_DDR_H__ +#define __BOARD_DDR_H__ + +#define OCTEON_EBB7304_DRAM_SOCKET_CONFIGURATION0 \ + { {0x1050, 0x0}, {NULL, NULL} }, { {0x1051, 0x0}, {NULL, NULL} } +#define OCTEON_EBB7304_DRAM_SOCKET_CONFIGURATION1 \ + { {0x1052, 0x0}, {NULL, NULL} }, { {0x1053, 0x0}, {NULL, NULL} } + +#define OCTEON_EBB7304_BOARD_EEPROM_TWSI_ADDR 0x56 + +/* + * Local copy of these parameters to allow for customization for this + * board design. The generic version resides in lib_octeon_shared.h. + */ + +/* LMC0_MODEREG_PARAMS1 */ +#define OCTEON_EBB7304_MODEREG_PARAMS1_1RANK_1SLOT \ + { \ + .cn78xx = { \ + .pasr_00 = 0, \ + .asr_00 = 0, \ + .srt_00 = 0, \ + .rtt_wr_00 = ddr4_rttwr_80ohm & 3, \ + .rtt_wr_00_ext = (ddr4_rttwr_80ohm >> 2) & 1, \ + .dic_00 = ddr4_dic_34ohm, \ + .rtt_nom_00 = 0, \ + .pasr_01 = 0, \ + .asr_01 = 0, \ + .srt_01 = 0, \ + .rtt_wr_01 = 0, \ + .dic_01 = ddr4_dic_34ohm, \ + .rtt_nom_01 = 0, \ + .pasr_10 = 0, \ + .asr_10 = 0, \ + .srt_10 = 0, \ + .rtt_wr_10 = 0, \ + .dic_10 = ddr4_dic_34ohm, \ + .rtt_nom_10 = 0, \ + .pasr_11 = 0, \ + .asr_11 = 0, \ + .srt_11 = 0, \ + .rtt_wr_11 = 0, \ + .dic_11 = ddr4_dic_34ohm, \ + .rtt_nom_11 = 0, \ + } \ + } + +#define OCTEON_EBB7304_MODEREG_PARAMS1_1RANK_2SLOT \ + { \ + .cn78xx = { \ + .pasr_00 = 0, \ + .asr_00 = 0, \ + .srt_00 = 0, \ + .rtt_wr_00 = ddr4_rttwr_80ohm & 3, \ + .rtt_wr_00_ext = (ddr4_rttwr_80ohm >> 2) & 1, \ + .dic_00 = ddr4_dic_34ohm, \ + .rtt_nom_00 = 0, \ + .pasr_01 = 0, \ + .asr_01 = 0, \ + .srt_01 = 0, \ + .rtt_wr_01 = 0, \ + .dic_01 = ddr4_dic_34ohm, \ + .rtt_nom_01 = 0, \ + .pasr_10 = 0, \ + .asr_10 = 0, \ + .srt_10 = 0, \ + .rtt_wr_10 = ddr4_rttwr_80ohm & 3, \ + .rtt_wr_10_ext = (ddr4_rttwr_80ohm >> 2) & 1, \ + .dic_10 = ddr4_dic_34ohm, \ + .rtt_nom_10 = 0, \ + .pasr_11 = 0, \ + .asr_11 = 0, \ + .srt_11 = 0, \ + .rtt_wr_11 = 0, \ + .dic_11 = ddr4_dic_34ohm, \ + .rtt_nom_11 = 0 \ + } \ + } + +#define OCTEON_EBB7304_MODEREG_PARAMS1_2RANK_1SLOT \ + { \ + .cn78xx = { \ + .pasr_00 = 0, \ + .asr_00 = 0, \ + .srt_00 = 0, \ + .rtt_wr_00 = ddr4_rttwr_240ohm, \ + .dic_00 = ddr4_dic_34ohm, \ + .rtt_nom_00 = 0, \ + .pasr_01 = 0, \ + .asr_01 = 0, \ + .srt_01 = 0, \ + .rtt_wr_01 = ddr4_rttwr_240ohm, \ + .dic_01 = ddr4_dic_34ohm, \ + .rtt_nom_01 = 0, \ + .pasr_10 = 0, \ + .asr_10 = 0, \ + .srt_10 = 0, \ + .dic_10 = ddr4_dic_34ohm, \ + .rtt_nom_10 = 0, \ + .pasr_11 = 0, \ + .asr_11 = 0, \ + .srt_11 = 0, \ + .rtt_wr_11 = 0, \ + .dic_11 = ddr4_dic_34ohm, \ + .rtt_nom_11 = 0, \ + } \ + } + +#define OCTEON_EBB7304_MODEREG_PARAMS1_2RANK_2SLOT \ + { \ + .cn78xx = { \ + .pasr_00 = 0, \ + .asr_00 = 0, \ + .srt_00 = 0, \ + .rtt_wr_00 = ddr4_rttwr_240ohm, \ + .dic_00 = ddr4_dic_34ohm, \ + .rtt_nom_00 = ddr4_rttnom_120ohm, \ + .pasr_01 = 0, \ + .asr_01 = 0, \ + .srt_01 = 0, \ + .rtt_wr_01 = ddr4_rttwr_240ohm, \ + .dic_01 = ddr4_dic_34ohm, \ + .rtt_nom_01 = ddr4_rttnom_120ohm, \ + .pasr_10 = 0, \ + .asr_10 = 0, \ + .srt_10 = 0, \ + .rtt_wr_10 = ddr4_rttwr_240ohm, \ + .dic_10 = ddr4_dic_34ohm, \ + .rtt_nom_10 = ddr4_rttnom_120ohm, \ + .pasr_11 = 0, \ + .asr_11 = 0, \ + .srt_11 = 0, \ + .rtt_wr_11 = ddr4_rttwr_240ohm, \ + .dic_11 = ddr4_dic_34ohm, \ + .rtt_nom_11 = ddr4_rttnom_120ohm, \ + } \ + } + +#define OCTEON_EBB7304_MODEREG_PARAMS1_4RANK_1SLOT \ + { \ + .cn78xx = { \ + .pasr_00 = 0, \ + .asr_00 = 0, \ + .srt_00 = 0, \ + .rtt_wr_00 = rttwr_60ohm, \ + .dic_00 = dic_34ohm, \ + .rtt_nom_00 = rttnom_20ohm, \ + .pasr_01 = 0, \ + .asr_01 = 0, \ + .srt_01 = 0, \ + .rtt_wr_01 = rttwr_60ohm, \ + .dic_01 = dic_34ohm, \ + .rtt_nom_01 = rttnom_none, \ + .pasr_10 = 0, \ + .asr_10 = 0, \ + .srt_10 = 0, \ + .rtt_wr_10 = rttwr_60ohm, \ + .dic_10 = dic_34ohm, \ + .rtt_nom_10 = rttnom_20ohm, \ + .pasr_11 = 0, \ + .asr_11 = 0, \ + .srt_11 = 0, \ + .rtt_wr_11 = rttwr_60ohm, \ + .dic_11 = dic_34ohm, \ + .rtt_nom_11 = rttnom_none, \ + } \ + } + +#define OCTEON_EBB7304_MODEREG_PARAMS2_1RANK_1SLOT \ +{ \ + .cn78xx = { \ + .rtt_park_00 = ddr4_rttpark_60ohm, \ + .vref_value_00 = 0x22, \ + .vref_range_00 = 0, \ + .rtt_park_01 = 0, \ + .vref_value_01 = 0, \ + .vref_range_01 = 0, \ + .rtt_park_10 = 0, \ + .vref_value_10 = 0, \ + .vref_range_10 = 0, \ + .rtt_park_11 = 0, \ + .vref_value_11 = 0, \ + .vref_range_11 = 0 \ + } \ +} + +/* FIX */ +#define OCTEON_EBB7304_MODEREG_PARAMS2_1RANK_2SLOT \ +{ \ + .cn78xx = { \ + .rtt_park_00 = ddr4_rttpark_48ohm, \ + .vref_value_00 = 0x1f, \ + .vref_range_00 = 0, \ + .rtt_park_01 = 0, \ + .vref_value_01 = 0, \ + .vref_range_01 = 0, \ + .rtt_park_10 = ddr4_rttpark_48ohm, \ + .vref_value_10 = 0x1f, \ + .vref_range_10 = 0, \ + .rtt_park_11 = 0, \ + .vref_value_11 = 0, \ + .vref_range_11 = 0 \ + } \ +} + +#define OCTEON_EBB7304_MODEREG_PARAMS2_2RANK_1SLOT \ +{ \ + .cn78xx = { \ + .rtt_park_00 = ddr4_rttpark_120ohm, \ + .vref_value_00 = 0x19, \ + .vref_range_00 = 0, \ + .rtt_park_01 = ddr4_rttpark_120ohm, \ + .vref_value_01 = 0x19, \ + .vref_range_01 = 0, \ + .rtt_park_10 = 0, \ + .vref_value_10 = 0, \ + .vref_range_10 = 0, \ + .rtt_park_11 = 0, \ + .vref_value_11 = 0, \ + .vref_range_11 = 0 \ + } \ +} + +#define OCTEON_EBB7304_MODEREG_PARAMS2_2RANK_2SLOT \ +{ \ + .cn78xx = { \ + .rtt_park_00 = ddr4_rttpark_60ohm, \ + .vref_value_00 = 0x19, \ + .vref_range_00 = 0, \ + .rtt_park_01 = ddr4_rttpark_60ohm, \ + .vref_value_01 = 0x19, \ + .vref_range_01 = 0, \ + .rtt_park_10 = ddr4_rttpark_60ohm, \ + .vref_value_10 = 0x19, \ + .vref_range_10 = 0, \ + .rtt_park_11 = ddr4_rttpark_60ohm, \ + .vref_value_11 = 0x19, \ + .vref_range_11 = 0 \ + } \ +} + +#define OCTEON_EBB7304_MODEREG_PARAMS2_4RANK_1SLOT \ +{ \ + .cn78xx = { \ + .rtt_park_00 = ddr4_rttpark_80ohm, \ + .vref_value_00 = 0x1f, \ + .vref_range_00 = 0, \ + .rtt_park_01 = ddr4_rttpark_80ohm, \ + .vref_value_01 = 0x1f, \ + .vref_range_01 = 0, \ + .rtt_park_10 = 0, \ + .vref_value_10 = 0, \ + .vref_range_10 = 0, \ + .rtt_park_11 = 0, \ + .vref_value_11 = 0, \ + .vref_range_11 = 0 \ + } \ +} + +#define OCTEON_EBB7304_CN78XX_DRAM_ODT_1RANK_CONFIGURATION \ + /* 1 */ \ + { \ + ddr4_dqx_driver_34_ohm, \ + 0x00000000ULL, \ + OCTEON_EBB7304_MODEREG_PARAMS1_1RANK_1SLOT, \ + OCTEON_EBB7304_MODEREG_PARAMS2_1RANK_1SLOT, \ + ddr4_rodt_ctl_48_ohm, \ + 0x00000000ULL, \ + 0 \ + }, \ + /* 2 */ \ + { \ + ddr4_dqx_driver_34_ohm, \ + 0x00000000ULL, \ + OCTEON_EBB7304_MODEREG_PARAMS1_1RANK_2SLOT, \ + OCTEON_EBB7304_MODEREG_PARAMS2_1RANK_2SLOT, \ + ddr4_rodt_ctl_80_ohm, \ + 0x00000000ULL, \ + 0 \ + } + +#define OCTEON_EBB7304_CN78XX_DRAM_ODT_2RANK_CONFIGURATION \ + /* 1 */ \ + { \ + ddr4_dqx_driver_34_ohm, \ + 0x00000000ULL, \ + OCTEON_EBB7304_MODEREG_PARAMS1_2RANK_1SLOT, \ + OCTEON_EBB7304_MODEREG_PARAMS2_2RANK_1SLOT, \ + ddr4_rodt_ctl_80_ohm, \ + 0x00000000ULL, \ + 0 \ + }, \ + /* 2 */ \ + { \ + ddr4_dqx_driver_34_ohm, \ + 0x0c0c0303ULL, \ + OCTEON_EBB7304_MODEREG_PARAMS1_2RANK_2SLOT, \ + OCTEON_EBB7304_MODEREG_PARAMS2_2RANK_2SLOT, \ + ddr4_rodt_ctl_48_ohm, \ + 0x04080102ULL, \ + 0 \ + } + +#define OCTEON_EBB7304_CN78XX_DRAM_ODT_4RANK_CONFIGURATION \ + /* 1 */ \ + { \ + ddr4_dqx_driver_34_ohm, \ + 0x01030203ULL, \ + OCTEON_EBB7304_MODEREG_PARAMS1_4RANK_1SLOT, \ + OCTEON_EBB7304_MODEREG_PARAMS2_4RANK_1SLOT, \ + ddr4_rodt_ctl_48_ohm, \ + 0x01010202ULL, \ + 0 \ + } + +/* + * Construct a static initializer for the ddr_configuration_t variable that + * holds (almost) all of the information required for DDR initialization. + */ + +/* + * The parameters below make up the custom_lmc_config data structure. + * This structure is used to customize the way that the LMC DRAM + * Controller is configured for a particular board design. + * + * Refer to the file lib_octeon_board_table_entry.h for a description + * of the custom board settings. It is usually kept in the following + * location... arch/mips/include/asm/arch-octeon/ + * + */ + +#define OCTEON_EBB7304_DDR_CONFIGURATION \ +/* Interface 0 */ \ +{ \ + .custom_lmc_config = { \ + .min_rtt_nom_idx = 1, \ + .max_rtt_nom_idx = 7, \ + .min_rodt_ctl = 1, \ + .max_rodt_ctl = 7, \ + .ck_ctl = ddr4_driver_34_ohm, \ + .cmd_ctl = ddr4_driver_34_ohm, \ + .ctl_ctl = ddr4_driver_34_ohm, \ + .min_cas_latency = 0, \ + .offset_en = 1, \ + .offset_udimm = 2, \ + .offset_rdimm = 2, \ + .ddr_rtt_nom_auto = 0, \ + .ddr_rodt_ctl_auto = 0, \ + .rlevel_comp_offset_udimm = 0, \ + .rlevel_comp_offset_rdimm = 0, \ + .rlevel_compute = 0, \ + .ddr2t_udimm = 1, \ + .ddr2t_rdimm = 1, \ + .maximum_adjacent_rlevel_delay_increment = 2, \ + .fprch2 = 2, \ + .dll_write_offset = NULL, \ + .dll_read_offset = NULL, \ + .parity = 0 \ + }, \ + .dimm_config_table = { \ + OCTEON_EBB7304_DRAM_SOCKET_CONFIGURATION0, \ + DIMM_CONFIG_TERMINATOR \ + }, \ + .unbuffered = { \ + .ddr_board_delay = 0, \ + .lmc_delay_clk = 0, \ + .lmc_delay_cmd = 0, \ + .lmc_delay_dq = 0 \ + }, \ + .registered = { \ + .ddr_board_delay = 0, \ + .lmc_delay_clk = 0, \ + .lmc_delay_cmd = 0, \ + .lmc_delay_dq = 0 \ + }, \ + .odt_1rank_config = { \ + OCTEON_EBB7304_CN78XX_DRAM_ODT_1RANK_CONFIGURATION \ + }, \ + .odt_2rank_config = { \ + OCTEON_EBB7304_CN78XX_DRAM_ODT_2RANK_CONFIGURATION \ + }, \ + .odt_4rank_config = { \ + OCTEON_EBB7304_CN78XX_DRAM_ODT_4RANK_CONFIGURATION \ + } \ +}, \ +/* Interface 1 */ \ +{ \ + .custom_lmc_config = { \ + .min_rtt_nom_idx = 1, \ + .max_rtt_nom_idx = 7, \ + .min_rodt_ctl = 1, \ + .max_rodt_ctl = 7, \ + .ck_ctl = ddr4_driver_34_ohm, \ + .cmd_ctl = ddr4_driver_34_ohm, \ + .ctl_ctl = ddr4_driver_34_ohm, \ + .min_cas_latency = 0, \ + .offset_en = 1, \ + .offset_udimm = 2, \ + .offset_rdimm = 2, \ + .ddr_rtt_nom_auto = 0, \ + .ddr_rodt_ctl_auto = 0, \ + .rlevel_comp_offset_udimm = 0, \ + .rlevel_comp_offset_rdimm = 0, \ + .rlevel_compute = 0, \ + .ddr2t_udimm = 1, \ + .ddr2t_rdimm = 1, \ + .maximum_adjacent_rlevel_delay_increment = 2, \ + .fprch2 = 2, \ + .dll_write_offset = NULL, \ + .dll_read_offset = NULL, \ + .parity = 0 \ + }, \ + .dimm_config_table = { \ + OCTEON_EBB7304_DRAM_SOCKET_CONFIGURATION1, \ + DIMM_CONFIG_TERMINATOR \ + }, \ + .unbuffered = { \ + .ddr_board_delay = 0, \ + .lmc_delay_clk = 0, \ + .lmc_delay_cmd = 0, \ + .lmc_delay_dq = 0 \ + }, \ + .registered = { \ + .ddr_board_delay = 0, \ + .lmc_delay_clk = 0, \ + .lmc_delay_cmd = 0, \ + .lmc_delay_dq = 0 \ + }, \ + .odt_1rank_config = { \ + OCTEON_EBB7304_CN78XX_DRAM_ODT_1RANK_CONFIGURATION \ + }, \ + .odt_2rank_config = { \ + OCTEON_EBB7304_CN78XX_DRAM_ODT_2RANK_CONFIGURATION \ + }, \ + .odt_4rank_config = { \ + OCTEON_EBB7304_CN78XX_DRAM_ODT_4RANK_CONFIGURATION \ + } \ +}, + +#endif /* __BOARD_DDR_H__ */ diff --git a/configs/octeon_ebb7304_defconfig b/configs/octeon_ebb7304_defconfig index f8d27b01dc..a98d73a268 100644 --- a/configs/octeon_ebb7304_defconfig +++ b/configs/octeon_ebb7304_defconfig @@ -15,12 +15,19 @@ CONFIG_HUSH_PARSER=y CONFIG_CMD_GPIO=y CONFIG_CMD_I2C=y CONFIG_CMD_MTD=y +CONFIG_CMD_PART=y CONFIG_CMD_PCI=y +CONFIG_CMD_USB=y CONFIG_CMD_DHCP=y CONFIG_CMD_PING=y CONFIG_CMD_TIME=y +CONFIG_CMD_EXT4=y +CONFIG_CMD_FAT=y +CONFIG_CMD_FS_GENERIC=y +# CONFIG_DOS_PARTITION is not set CONFIG_ENV_IS_IN_FLASH=y CONFIG_ENV_ADDR=0x1FBFE000 +CONFIG_BLK=y CONFIG_CLK=y # CONFIG_INPUT is not set CONFIG_MTD=y @@ -38,6 +45,9 @@ CONFIG_SPI_FLASH_STMICRO=y # CONFIG_NETDEVICES is not set CONFIG_PCI=y CONFIG_DM_PCI=y +CONFIG_RAM=y +CONFIG_RAM_OCTEON=y +CONFIG_RAM_OCTEON_DDR4=y CONFIG_DEBUG_UART_SHIFT=3 CONFIG_DEBUG_UART_ANNOUNCE=y CONFIG_SYS_NS16550=y @@ -45,4 +55,14 @@ CONFIG_SPI=y CONFIG_OCTEON_SPI=y CONFIG_SYSRESET=y CONFIG_SYSRESET_OCTEON=y +CONFIG_USB=y +CONFIG_DM_USB=y +CONFIG_USB_XHCI_HCD=y +CONFIG_USB_XHCI_DWC3=y +CONFIG_USB_HOST_ETHER=y +CONFIG_USB_ETHER_ASIX=y +CONFIG_USB_ETHER_ASIX88179=y +CONFIG_USB_ETHER_MCS7830=y +CONFIG_USB_ETHER_RTL8152=y +CONFIG_USB_ETHER_SMSC95XX=y CONFIG_HEXDUMP=y diff --git a/drivers/ram/Kconfig b/drivers/ram/Kconfig index 7e6e981897..a0e859afd6 100644 --- a/drivers/ram/Kconfig +++ b/drivers/ram/Kconfig @@ -76,3 +76,4 @@ config IMXRT_SDRAM source "drivers/ram/rockchip/Kconfig" source "drivers/ram/sifive/Kconfig" source "drivers/ram/stm32mp1/Kconfig" +source "drivers/ram/octeon/Kconfig" diff --git a/drivers/ram/Makefile b/drivers/ram/Makefile index 769c9d6218..d685a579a0 100644 --- a/drivers/ram/Makefile +++ b/drivers/ram/Makefile @@ -19,3 +19,5 @@ obj-$(CONFIG_K3_J721E_DDRSS) += k3-j721e/ obj-$(CONFIG_IMXRT_SDRAM) += imxrt_sdram.o obj-$(CONFIG_RAM_SIFIVE) += sifive/ + +obj-$(CONFIG_ARCH_OCTEON) += octeon/ diff --git a/drivers/ram/octeon/Kconfig b/drivers/ram/octeon/Kconfig new file mode 100644 index 0000000000..eb5a1208ed --- /dev/null +++ b/drivers/ram/octeon/Kconfig @@ -0,0 +1,17 @@ +config RAM_OCTEON + bool "Ram drivers for Octeon SoCs" + depends on RAM && ARCH_OCTEON + default n + help + This enables support for RAM drivers for Octeon SoCs. + +if RAM_OCTEON + +config RAM_OCTEON_DDR4 + bool "Octeon III DDR4 RAM support" + default n + help + This enables support for DDR4 RAM suppoort for Octeon III. This does + not include support for Octeon CN70XX. + +endif # RAM_OCTEON diff --git a/drivers/ram/octeon/Makefile b/drivers/ram/octeon/Makefile new file mode 100644 index 0000000000..27649d1e6f --- /dev/null +++ b/drivers/ram/octeon/Makefile @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (c) 2020 Marvell, Inc. +# + +obj-$(CONFIG_RAM_OCTEON_DDR4) += octeon_ddr.o +obj-$(CONFIG_RAM_OCTEON_DDR4) += octeon3_lmc.o +obj-y += dimm_spd_eeprom.o diff --git a/drivers/ram/octeon/dimm_spd_eeprom.c b/drivers/ram/octeon/dimm_spd_eeprom.c new file mode 100644 index 0000000000..30db54804c --- /dev/null +++ b/drivers/ram/octeon/dimm_spd_eeprom.c @@ -0,0 +1,407 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Marvell International Ltd. + */ + +#include +#include + +#include + +#define DEVICE_TYPE DDR4_SPD_KEY_BYTE_DEVICE_TYPE // same for DDR3 and DDR4 +#define MODULE_TYPE DDR4_SPD_KEY_BYTE_MODULE_TYPE // same for DDR3 and DDR4 +#define BUS_WIDTH(t) (((t) == DDR4_DRAM) ? \ + DDR4_SPD_MODULE_MEMORY_BUS_WIDTH : \ + DDR3_SPD_MEMORY_BUS_WIDTH) + +/* + * Allow legacy code to encode bus number in the upper bits of the address + * These are only supported in read_spd() + */ +#define OCTEON_TWSI_BUS_IN_ADDR_BIT 12 +#define OCTEON_TWSI_BUS_IN_ADDR_MASK (15 << OCTEON_TWSI_BUS_IN_ADDR_BIT) +#define OCTEON_TWSI_GET_BUS(addr) \ + (((addr) >> OCTEON_TWSI_BUS_IN_ADDR_BIT) & 0xf) + +const char *ddr3_dimm_types[] = { + /* 0000 */ "Undefined", + /* 0001 */ "RDIMM", + /* 0010 */ "UDIMM", + /* 0011 */ "SO-DIMM", + /* 0100 */ "Micro-DIMM", + /* 0101 */ "Mini-RDIMM", + /* 0110 */ "Mini-UDIMM", + /* 0111 */ "Mini-CDIMM", + /* 1000 */ "72b-SO-UDIMM", + /* 1001 */ "72b-SO-RDIMM", + /* 1010 */ "72b-SO-CDIMM" + /* 1011 */ "LRDIMM", + /* 1100 */ "16b-SO-DIMM", + /* 1101 */ "32b-SO-DIMM", + /* 1110 */ "Reserved", + /* 1111 */ "Reserved" +}; + +const char *ddr4_dimm_types[] = { + /* 0000 */ "Extended", + /* 0001 */ "RDIMM", + /* 0010 */ "UDIMM", + /* 0011 */ "SO-DIMM", + /* 0100 */ "LRDIMM", + /* 0101 */ "Mini-RDIMM", + /* 0110 */ "Mini-UDIMM", + /* 0111 */ "Reserved", + /* 1000 */ "72b-SO-RDIMM", + /* 1001 */ "72b-SO-UDIMM", + /* 1010 */ "Reserved", + /* 1011 */ "Reserved", + /* 1100 */ "16b-SO-DIMM", + /* 1101 */ "32b-SO-DIMM", + /* 1110 */ "Reserved", + /* 1111 */ "Reserved" +}; + +static u16 ddr3_crc16(u8 *ptr, int count) +{ + /* From DDR3 SPD specification */ + int crc, i; + + crc = 0; + while (--count >= 0) { + crc = crc ^ (int)*ptr++ << 8; + for (i = 0; i < 8; ++i) { + if (crc & 0x8000) + crc = crc << 1 ^ 0x1021; + else + crc = crc << 1; + } + } + + return (crc & 0xFFFF); +} + +static int validate_spd_checksum_ddr4(struct dimm_config *dimm_config, + int dimm_index, int twsi_addr, int silent) +{ + u8 *spd_data = dimm_config->spd_data[dimm_index]; + int crc_bytes = 126; + u16 crc_comp; + + /* Check byte 0 to see how many bytes checksum is over */ + if (spd_data[0] & 0x80) + crc_bytes = 117; + + crc_comp = ddr3_crc16(spd_data, crc_bytes); + + if (spd_data[126] == (crc_comp & 0xff) && + spd_data[127] == (crc_comp >> 8)) + return 1; + + if (!silent) { + printf("DDR4 SPD CRC error, spd addr: 0x%x, calculated crc: 0x%04x, read crc: 0x%02x%02x\n", + twsi_addr, crc_comp, spd_data[127], spd_data[126]); + } + + return 0; +} + +static int validate_spd_checksum(struct ddr_priv *priv, + struct dimm_config *dimm_config, + int dimm_index, int twsi_addr, + int silent, u8 rv) +{ + if (ddr_verbose(priv)) + debug("Validating DIMM at address 0x%x\n", twsi_addr); + + if (rv >= 0x8 && rv <= 0xA) + printf("%s: Error: DDR2 support disabled\n", __func__); + + if (rv == 0xB) + printf("%s: Error: DDR3 support disabled\n", __func__); + + if (rv == 0xC) { + return validate_spd_checksum_ddr4(dimm_config, dimm_index, + twsi_addr, silent); + } + + if (!silent) { + printf("Unrecognized DIMM type: 0x%x at spd address: 0x%x\n", + rv, twsi_addr); + } + + return 0; +} + +/* + * Read an DIMM SPD value, either using TWSI to read it from the DIMM, or + * from a provided array. + */ +int read_spd(struct dimm_config *dimm_config, int dimm_index, int spd_field) +{ + dimm_index = !!dimm_index; + + if (spd_field >= SPD_EEPROM_SIZE) { + printf("ERROR: Trying to read unsupported SPD EEPROM value %d\n", + spd_field); + } + + /* + * If pointer to data is provided, use it, otherwise read from SPD + * over twsi + */ + if (dimm_config->spd_ptrs[dimm_index]) + return dimm_config->spd_ptrs[dimm_index][spd_field]; + else if (dimm_config->spd_addrs[dimm_index]) + return dimm_config->spd_data[dimm_index][spd_field]; + + return -1; +} + +int read_spd_init(struct dimm_config *dimm_config, int dimm_index) +{ + u8 busno = OCTEON_TWSI_GET_BUS(dimm_config->spd_addrs[dimm_index]); + u8 cmdno = dimm_config->spd_addrs[dimm_index]; + struct udevice *dev_i2c; + u8 *spd_data; + int ret; + + if (dimm_config->spd_cached[dimm_index]) + return 0; + + dimm_config->spd_cached[dimm_index] = 1; + spd_data = dimm_config->spd_data[dimm_index]; + + ret = i2c_get_chip_for_busnum(busno, cmdno, 2, &dev_i2c); + if (ret) { + debug("Cannot find SPL EEPROM: %d\n", ret); + return -ENODEV; + } + + ret = dm_i2c_read(dev_i2c, 0, spd_data, SPD_EEPROM_SIZE); + + return ret; +} + +int validate_dimm(struct ddr_priv *priv, struct dimm_config *dimm_config, + int dimm_index) +{ + int spd_addr; + + dimm_index = !!dimm_index; /* Normalize to 0/1 */ + spd_addr = dimm_config->spd_addrs[dimm_index]; + + debug("Validating dimm %d, spd addr: 0x%02x spd ptr: %p\n", + dimm_index, + dimm_config->spd_addrs[dimm_index], + dimm_config->spd_ptrs[dimm_index]); + + /* Only validate 'real' dimms, assume compiled in values are OK */ + if (!dimm_config->spd_ptrs[dimm_index]) { + int val0, val1; + int dimm_type; + int ret; + + ret = read_spd_init(dimm_config, dimm_index); + if (ret) + return 0; + + dimm_type = read_spd(dimm_config, dimm_index, + DDR2_SPD_MEM_TYPE) & 0xff; + switch (dimm_type) { + case 0x0B: /* DDR3 */ + if (ddr_verbose(priv)) + printf("Validating DDR3 DIMM %d\n", dimm_index); + val0 = read_spd(dimm_config, dimm_index, + DDR3_SPD_DENSITY_BANKS); + val1 = read_spd(dimm_config, dimm_index, + DDR3_SPD_ADDRESSING_ROW_COL_BITS); + if (val0 < 0 && val1 < 0) { + if (ddr_verbose(priv)) + printf("Error reading SPD for DIMM %d\n", + dimm_index); + return 0; /* Failed to read dimm */ + } + if (val0 == 0xff && val1 == 0xff) { + if (ddr_verbose(priv)) + printf("Blank or unreadable SPD for DIMM %d\n", + dimm_index); + /* Blank SPD or otherwise unreadable device */ + return 0; + } + + /* Don't treat bad checksums as fatal */ + validate_spd_checksum(priv, dimm_config, dimm_index, + spd_addr, 0, dimm_type); + break; + + case 0x0C: /* DDR4 */ + if (ddr_verbose(priv)) + printf("Validating DDR4 DIMM %d\n", dimm_index); + val0 = read_spd(dimm_config, dimm_index, + DDR4_SPD_DENSITY_BANKS); + val1 = read_spd(dimm_config, dimm_index, + DDR4_SPD_ADDRESSING_ROW_COL_BITS); + if (val0 < 0 && val1 < 0) { + if (ddr_verbose(priv)) + printf("Error reading SPD for DIMM %d\n", + dimm_index); + return 0; /* Failed to read dimm */ + } + if (val0 == 0xff && val1 == 0xff) { + if (ddr_verbose(priv)) { + printf("Blank or unreadable SPD for DIMM %d\n", + dimm_index); + } + /* Blank SPD or otherwise unreadable device */ + return 0; + } + + /* Don't treat bad checksums as fatal */ + validate_spd_checksum(priv, dimm_config, dimm_index, + spd_addr, 0, dimm_type); + break; + + case 0x00: + /* Terminator detected. Fail silently. */ + return 0; + + default: + debug("Unknown DIMM type 0x%x for DIMM %d @ 0x%x\n", + dimm_type, dimm_index, + dimm_config->spd_addrs[dimm_index]); + return 0; /* Failed to read dimm */ + } + } + + return 1; +} + +int get_ddr_type(struct dimm_config *dimm_config, int upper_dimm) +{ + int spd_ddr_type; + + spd_ddr_type = read_spd(dimm_config, upper_dimm, DEVICE_TYPE); + + debug("%s:%d spd_ddr_type=0x%02x\n", __func__, __LINE__, + spd_ddr_type); + + /* we return only DDR4 or DDR3 */ + return (spd_ddr_type == 0x0C) ? DDR4_DRAM : DDR3_DRAM; +} + +static int get_dimm_ecc(struct dimm_config *dimm_config, int upper_dimm, + int ddr_type) +{ + return !!(read_spd(dimm_config, upper_dimm, BUS_WIDTH(ddr_type)) & 8); +} + +int get_dimm_module_type(struct dimm_config *dimm_config, int upper_dimm, + int ddr_type) +{ + return read_spd(dimm_config, upper_dimm, MODULE_TYPE) & 0x0f; +} + +char *printable_rank_spec(char *buffer, int num_ranks, int dram_width, + int spd_package) +{ + int die_count = ((spd_package >> 4) & 7) + 1; + + if (spd_package & 0x80) { // non-monolithic + if ((spd_package & 3) == 2) { // 3DS + sprintf(buffer, "%dS%dRx%d", num_ranks, die_count, + dram_width); + } else { // MLS + char hchar = (die_count == 2) ? 'D' : 'Q'; + + sprintf(buffer, "%d%cRx%d", num_ranks, hchar, + dram_width); + } + } else { + sprintf(buffer, "%dRx%d", num_ranks, dram_width); + } + + return buffer; +} + +static void report_common_dimm(struct dimm_config *dimm_config, int upper_dimm, + int dimm, const char **dimm_types, int ddr_type, + char *volt_str, int if_num, + int num_ranks, int dram_width, int spd_package) +{ + unsigned int spd_module_type; + char rank_spec[8]; + int spd_ecc; + + spd_module_type = get_dimm_module_type(dimm_config, upper_dimm, + ddr_type); + spd_ecc = get_dimm_ecc(dimm_config, upper_dimm, ddr_type); + + printable_rank_spec(rank_spec, num_ranks, dram_width, spd_package); + printf("LMC%d.DIMM%d: DDR%d %s %s %s, %s\n", + if_num, dimm, ddr_type, dimm_types[spd_module_type], + rank_spec, spd_ecc ? "ECC" : "non-ECC", volt_str); +} + +static void report_ddr3_dimm(struct dimm_config *dimm_config, int upper_dimm, + int dimm, int if_num) +{ + int spd_voltage; + char *volt_str; + int spd_org = read_spd(dimm_config, upper_dimm, + DDR3_SPD_MODULE_ORGANIZATION); + int num_ranks = 1 + ((spd_org >> 3) & 0x7); + int dram_width = 4 << ((spd_org >> 0) & 0x7); + + spd_voltage = read_spd(dimm_config, upper_dimm, + DDR3_SPD_NOMINAL_VOLTAGE); + if (spd_voltage == 0 || spd_voltage & 3) + volt_str = "1.5V"; + if (spd_voltage & 2) + volt_str = "1.35V"; + if (spd_voltage & 4) + volt_str = "1.2xV"; + + report_common_dimm(dimm_config, upper_dimm, dimm, ddr3_dimm_types, + DDR3_DRAM, volt_str, if_num, + num_ranks, dram_width, /*spd_package*/0); +} + +static void report_ddr4_dimm(struct dimm_config *dimm_config, int upper_dimm, + int dimm, int if_num) +{ + int spd_voltage; + char *volt_str; + int spd_package = 0xff & read_spd(dimm_config, upper_dimm, + DDR4_SPD_PACKAGE_TYPE); + int spd_org = 0xff & read_spd(dimm_config, upper_dimm, + DDR4_SPD_MODULE_ORGANIZATION); + int num_ranks = 1 + ((spd_org >> 3) & 0x7); + int dram_width = 4 << ((spd_org >> 0) & 0x7); + + spd_voltage = read_spd(dimm_config, upper_dimm, + DDR4_SPD_MODULE_NOMINAL_VOLTAGE); + if (spd_voltage == 0x01 || spd_voltage & 0x02) + volt_str = "1.2V"; + if (spd_voltage == 0x04 || spd_voltage & 0x08) + volt_str = "TBD1 V"; + if (spd_voltage == 0x10 || spd_voltage & 0x20) + volt_str = "TBD2 V"; + + report_common_dimm(dimm_config, upper_dimm, dimm, ddr4_dimm_types, + DDR4_DRAM, volt_str, if_num, + num_ranks, dram_width, spd_package); +} + +void report_dimm(struct dimm_config *dimm_config, int upper_dimm, + int dimm, int if_num) +{ + int ddr_type; + + /* ddr_type only indicates DDR4 or DDR3 */ + ddr_type = get_ddr_type(dimm_config, upper_dimm); + + if (ddr_type == DDR4_DRAM) + report_ddr4_dimm(dimm_config, 0, dimm, if_num); + else + report_ddr3_dimm(dimm_config, 0, dimm, if_num); +} diff --git a/drivers/ram/octeon/octeon3_lmc.c b/drivers/ram/octeon/octeon3_lmc.c new file mode 100644 index 0000000000..327cdc5873 --- /dev/null +++ b/drivers/ram/octeon/octeon3_lmc.c @@ -0,0 +1,11030 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Marvell International Ltd. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +/* Random number generator stuff */ + +#define CVMX_RNM_CTL_STATUS 0x0001180040000000 +#define CVMX_OCT_DID_RNG 8ULL + +static u64 cvmx_build_io_address(u64 major_did, u64 sub_did) +{ + return ((0x1ull << 48) | (major_did << 43) | (sub_did << 40)); +} + +static u64 cvmx_rng_get_random64(void) +{ + return csr_rd(cvmx_build_io_address(CVMX_OCT_DID_RNG, 0)); +} + +static void cvmx_rng_enable(void) +{ + u64 val; + + val = csr_rd(CVMX_RNM_CTL_STATUS); + val |= BIT(0) | BIT(1); + csr_wr(CVMX_RNM_CTL_STATUS, val); +} + +#define RLEVEL_PRINTALL_DEFAULT 1 +#define WLEVEL_PRINTALL_DEFAULT 1 + +/* + * Define how many HW WL samples to take for majority voting. + * MUST BE odd!! + * Assume there should only be 2 possible values that will show up, + * so treat ties as a problem!!! + * NOTE: Do not change this without checking the code!!! + */ +#define WLEVEL_LOOPS_DEFAULT 5 + +#define ENABLE_COMPUTED_VREF_ADJUSTMENT 1 +#define SW_WLEVEL_HW_DEFAULT 1 +#define DEFAULT_BEST_RANK_SCORE 9999999 +#define MAX_RANK_SCORE_LIMIT 99 + +/* + * Define how many HW RL samples per rank to take multiple samples will + * allow looking for the best sample score + */ +#define RLEVEL_SAMPLES_DEFAULT 3 + +#define ddr_seq_print(format, ...) do {} while (0) + +struct wlevel_bitcnt { + int bitcnt[4]; +}; + +static void display_dac_dbi_settings(int lmc, int dac_or_dbi, + int ecc_ena, int *settings, char *title); + +static unsigned short load_dac_override(struct ddr_priv *priv, int if_num, + int dac_value, int byte); + +/* "mode" arg */ +#define DBTRAIN_TEST 0 +#define DBTRAIN_DBI 1 +#define DBTRAIN_LFSR 2 + +static int run_best_hw_patterns(struct ddr_priv *priv, int lmc, u64 phys_addr, + int mode, u64 *xor_data); + +#define LMC_DDR3_RESET_ASSERT 0 +#define LMC_DDR3_RESET_DEASSERT 1 + +static void cn7xxx_lmc_ddr3_reset(struct ddr_priv *priv, int if_num, int reset) +{ + union cvmx_lmcx_reset_ctl reset_ctl; + + /* + * 4. Deassert DDRn_RESET_L pin by writing + * LMC(0..3)_RESET_CTL[DDR3RST] = 1 + * without modifying any other LMC(0..3)_RESET_CTL fields. + * 5. Read LMC(0..3)_RESET_CTL and wait for the result. + * 6. Wait a minimum of 500us. This guarantees the necessary T = 500us + * delay between DDRn_RESET_L deassertion and DDRn_DIMM*_CKE* + * assertion. + */ + debug("LMC%d %s DDR_RESET_L\n", if_num, + (reset == + LMC_DDR3_RESET_DEASSERT) ? "De-asserting" : "Asserting"); + + reset_ctl.u64 = lmc_rd(priv, CVMX_LMCX_RESET_CTL(if_num)); + reset_ctl.cn78xx.ddr3rst = reset; + lmc_wr(priv, CVMX_LMCX_RESET_CTL(if_num), reset_ctl.u64); + + lmc_rd(priv, CVMX_LMCX_RESET_CTL(if_num)); + + udelay(500); +} + +static void perform_lmc_reset(struct ddr_priv *priv, int node, int if_num) +{ + /* + * 5.9.6 LMC RESET Initialization + * + * The purpose of this step is to assert/deassert the RESET# pin at the + * DDR3/DDR4 parts. + * + * This LMC RESET step is done for all enabled LMCs. + * + * It may be appropriate to skip this step if the DDR3/DDR4 DRAM parts + * are in self refresh and are currently preserving their + * contents. (Software can determine this via + * LMC(0..3)_RESET_CTL[DDR3PSV] in some circumstances.) The remainder of + * this section assumes that the DRAM contents need not be preserved. + * + * The remainder of this section assumes that the CN78XX DDRn_RESET_L + * pin is attached to the RESET# pin of the attached DDR3/DDR4 parts, + * as will be appropriate in many systems. + * + * (In other systems, such as ones that can preserve DDR3/DDR4 part + * contents while CN78XX is powered down, it will not be appropriate to + * directly attach the CN78XX DDRn_RESET_L pin to DRESET# of the + * DDR3/DDR4 parts, and this section may not apply.) + * + * The remainder of this section describes the sequence for LMCn. + * + * Perform the following six substeps for LMC reset initialization: + * + * 1. If not done already, assert DDRn_RESET_L pin by writing + * LMC(0..3)_RESET_ CTL[DDR3RST] = 0 without modifying any other + * LMC(0..3)_RESET_CTL fields. + */ + + if (!ddr_memory_preserved(priv)) { + /* + * 2. Read LMC(0..3)_RESET_CTL and wait for the result. + */ + + lmc_rd(priv, CVMX_LMCX_RESET_CTL(if_num)); + + /* + * 3. Wait until RESET# assertion-time requirement from JEDEC + * DDR3/DDR4 specification is satisfied (200 us during a + * power-on ramp, 100ns when power is already stable). + */ + + udelay(200); + + /* + * 4. Deassert DDRn_RESET_L pin by writing + * LMC(0..3)_RESET_CTL[DDR3RST] = 1 + * without modifying any other LMC(0..3)_RESET_CTL fields. + * 5. Read LMC(0..3)_RESET_CTL and wait for the result. + * 6. Wait a minimum of 500us. This guarantees the necessary + * T = 500us delay between DDRn_RESET_L deassertion and + * DDRn_DIMM*_CKE* assertion. + */ + cn7xxx_lmc_ddr3_reset(priv, if_num, LMC_DDR3_RESET_DEASSERT); + + /* Toggle Reset Again */ + /* That is, assert, then de-assert, one more time */ + cn7xxx_lmc_ddr3_reset(priv, if_num, LMC_DDR3_RESET_ASSERT); + cn7xxx_lmc_ddr3_reset(priv, if_num, LMC_DDR3_RESET_DEASSERT); + } +} + +void oct3_ddr3_seq(struct ddr_priv *priv, int rank_mask, int if_num, + int sequence) +{ + /* + * 3. Without changing any other fields in LMC(0)_CONFIG, write + * LMC(0)_CONFIG[RANKMASK] then write both + * LMC(0)_SEQ_CTL[SEQ_SEL,INIT_START] = 1 with a single CSR write + * operation. LMC(0)_CONFIG[RANKMASK] bits should be set to indicate + * the ranks that will participate in the sequence. + * + * The LMC(0)_SEQ_CTL[SEQ_SEL] value should select power-up/init or + * selfrefresh exit, depending on whether the DRAM parts are in + * self-refresh and whether their contents should be preserved. While + * LMC performs these sequences, it will not perform any other DDR3 + * transactions. When the sequence is complete, hardware sets the + * LMC(0)_CONFIG[INIT_STATUS] bits for the ranks that have been + * initialized. + * + * If power-up/init is selected immediately following a DRESET + * assertion, LMC executes the sequence described in the "Reset and + * Initialization Procedure" section of the JEDEC DDR3 + * specification. This includes activating CKE, writing all four DDR3 + * mode registers on all selected ranks, and issuing the required + * ZQCL + * command. The LMC(0)_CONFIG[RANKMASK] value should select all ranks + * with attached DRAM in this case. If LMC(0)_CONTROL[RDIMM_ENA] = 1, + * LMC writes the JEDEC standard SSTE32882 control words selected by + * LMC(0)_DIMM_CTL[DIMM*_WMASK] between DDR_CKE* signal assertion and + * the first DDR3 mode register write operation. + * LMC(0)_DIMM_CTL[DIMM*_WMASK] should be cleared to 0 if the + * corresponding DIMM is not present. + * + * If self-refresh exit is selected, LMC executes the required SRX + * command followed by a refresh and ZQ calibration. Section 4.5 + * describes behavior of a REF + ZQCS. LMC does not write the DDR3 + * mode registers as part of this sequence, and the mode register + * parameters must match at self-refresh entry and exit times. + * + * 4. Read LMC(0)_SEQ_CTL and wait for LMC(0)_SEQ_CTL[SEQ_COMPLETE] + * to be set. + * + * 5. Read LMC(0)_CONFIG[INIT_STATUS] and confirm that all ranks have + * been initialized. + */ + + union cvmx_lmcx_seq_ctl seq_ctl; + union cvmx_lmcx_config lmc_config; + int timeout; + + lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num)); + lmc_config.s.rankmask = rank_mask; + lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), lmc_config.u64); + + seq_ctl.u64 = 0; + + seq_ctl.s.init_start = 1; + seq_ctl.s.seq_sel = sequence; + + ddr_seq_print + ("Performing LMC sequence: rank_mask=0x%02x, sequence=0x%x, %s\n", + rank_mask, sequence, sequence_str[sequence]); + + if (seq_ctl.s.seq_sel == 3) + debug("LMC%d: Exiting Self-refresh Rank_mask:%x\n", if_num, + rank_mask); + + lmc_wr(priv, CVMX_LMCX_SEQ_CTL(if_num), seq_ctl.u64); + lmc_rd(priv, CVMX_LMCX_SEQ_CTL(if_num)); + + timeout = 100; + do { + udelay(100); /* Wait a while */ + seq_ctl.u64 = lmc_rd(priv, CVMX_LMCX_SEQ_CTL(if_num)); + if (--timeout == 0) { + printf("Sequence %d timed out\n", sequence); + break; + } + } while (seq_ctl.s.seq_complete != 1); + + ddr_seq_print(" LMC sequence=%x: Completed.\n", sequence); +} + +#define bdk_numa_get_address(n, p) ((p) | ((u64)n) << CVMX_NODE_MEM_SHIFT) +#define AREA_BASE_OFFSET BIT_ULL(26) + +static int test_dram_byte64(struct ddr_priv *priv, int lmc, u64 p, + u64 bitmask, u64 *xor_data) +{ + u64 p1, p2, d1, d2; + u64 v, v1; + u64 p2offset = (1ULL << 26); // offset to area 2 + u64 datamask; + u64 xor; + u64 i, j, k; + u64 ii; + int errors = 0; + //u64 index; + u64 pattern1 = cvmx_rng_get_random64(); + u64 pattern2 = 0; + u64 bad_bits[2] = { 0, 0 }; + int kbitno = (octeon_is_cpuid(OCTEON_CN7XXX)) ? 20 : 18; + union cvmx_l2c_ctl l2c_ctl; + int burst; + int saved_dissblkdty; + int node = 0; + + // Force full cacheline write-backs to boost traffic + l2c_ctl.u64 = l2c_rd(priv, CVMX_L2C_CTL); + saved_dissblkdty = l2c_ctl.cn78xx.dissblkdty; + l2c_ctl.cn78xx.dissblkdty = 1; + l2c_wr(priv, CVMX_L2C_CTL, l2c_ctl.u64); + + if (octeon_is_cpuid(OCTEON_CN73XX) || octeon_is_cpuid(OCTEON_CNF75XX)) + kbitno = 18; + + // Byte lanes may be clear in the mask to indicate no testing on that + //lane. + datamask = bitmask; + + /* + * Add offset to both test regions to not clobber boot stuff + * when running from L2 for NAND boot. + */ + p += AREA_BASE_OFFSET; // make sure base is out of the way of boot + + // final address must include LMC and node + p |= (lmc << 7); /* Map address into proper interface */ + p = bdk_numa_get_address(node, p); /* Map to node */ + p |= 1ull << 63; + +#define II_INC BIT_ULL(22) +#define II_MAX BIT_ULL(22) +#define K_INC BIT_ULL(14) +#define K_MAX BIT_ULL(kbitno) +#define J_INC BIT_ULL(9) +#define J_MAX BIT_ULL(12) +#define I_INC BIT_ULL(3) +#define I_MAX BIT_ULL(7) + + debug("N%d.LMC%d: %s: phys_addr=0x%llx/0x%llx (0x%llx)\n", + node, lmc, __func__, p, p + p2offset, 1ULL << kbitno); + + // loops are ordered so that only a single 64-bit slot is written to + // each cacheline at one time, then the cachelines are forced out; + // this should maximize read/write traffic + + // FIXME? extend the range of memory tested!! + for (ii = 0; ii < II_MAX; ii += II_INC) { + for (i = 0; i < I_MAX; i += I_INC) { + for (k = 0; k < K_MAX; k += K_INC) { + for (j = 0; j < J_MAX; j += J_INC) { + p1 = p + ii + k + j; + p2 = p1 + p2offset; + + v = pattern1 * (p1 + i); + // write the same thing to both areas + v1 = v; + + cvmx_write64_uint64(p1 + i, v); + cvmx_write64_uint64(p2 + i, v1); + + CVMX_CACHE_WBIL2(p1, 0); + CVMX_CACHE_WBIL2(p2, 0); + } + } + } + } + + CVMX_DCACHE_INVALIDATE; + + debug("N%d.LMC%d: dram_tuning_mem_xor: done INIT loop\n", node, lmc); + + /* Make a series of passes over the memory areas. */ + + for (burst = 0; burst < 1 /* was: dram_tune_use_bursts */ ; burst++) { + u64 this_pattern = cvmx_rng_get_random64(); + + pattern2 ^= this_pattern; + + /* + * XOR the data with a random value, applying the change to both + * memory areas. + */ + + // FIXME? extend the range of memory tested!! + for (ii = 0; ii < II_MAX; ii += II_INC) { + // FIXME: rearranged, did not make much difference? + for (i = 0; i < I_MAX; i += I_INC) { + for (k = 0; k < K_MAX; k += K_INC) { + for (j = 0; j < J_MAX; j += J_INC) { + p1 = p + ii + k + j; + p2 = p1 + p2offset; + + v = cvmx_read64_uint64(p1 + + i) ^ + this_pattern; + v1 = cvmx_read64_uint64(p2 + + i) ^ + this_pattern; + + cvmx_write64_uint64(p1 + i, v); + cvmx_write64_uint64(p2 + i, v1); + + CVMX_CACHE_WBIL2(p1, 0); + CVMX_CACHE_WBIL2(p2, 0); + } + } + } + } + + CVMX_DCACHE_INVALIDATE; + + debug("N%d.LMC%d: dram_tuning_mem_xor: done MODIFY loop\n", + node, lmc); + + /* + * Look for differences in the areas. If there is a mismatch, + * reset both memory locations with the same pattern. Failing + * to do so means that on all subsequent passes the pair of + * locations remain out of sync giving spurious errors. + */ + + // FIXME: Change the loop order so that an entire cache line + // is compared at one time. This is so that a read + // error that occurs *anywhere* on the cacheline will + // be caught, rather than comparing only 1 cacheline + // slot at a time, where an error on a different + // slot will be missed that time around + // Does the above make sense? + + // FIXME? extend the range of memory tested!! + for (ii = 0; ii < II_MAX; ii += II_INC) { + for (k = 0; k < K_MAX; k += K_INC) { + for (j = 0; j < J_MAX; j += J_INC) { + p1 = p + ii + k + j; + p2 = p1 + p2offset; + + // process entire cachelines in the + //innermost loop + for (i = 0; i < I_MAX; i += I_INC) { + int bybit = 1; + // start in byte lane 0 + u64 bymsk = 0xffULL; + + // FIXME: this should predict + // what we find...??? + v = ((p1 + i) * pattern1) ^ + pattern2; + d1 = cvmx_read64_uint64(p1 + i); + d2 = cvmx_read64_uint64(p2 + i); + + // union of error bits only in + // active byte lanes + xor = ((d1 ^ v) | (d2 ^ v)) & + datamask; + + if (!xor) + continue; + + // accumulate bad bits + bad_bits[0] |= xor; + + while (xor != 0) { + debug("ERROR(%03d): [0x%016llX] [0x%016llX] expected 0x%016llX d1 %016llX d2 %016llX\n", + burst, p1, p2, v, + d1, d2); + // error(s) in this lane + if (xor & bymsk) { + // set the byte + // error bit + errors |= bybit; + // clear byte + // lane in + // error bits + xor &= ~bymsk; + // clear the + // byte lane in + // the mask + datamask &= ~bymsk; +#if EXIT_WHEN_ALL_LANES_HAVE_ERRORS + // nothing + // left to do + if (datamask == 0) { + return errors; + } +#endif /* EXIT_WHEN_ALL_LANES_HAVE_ERRORS */ + } + // move mask into + // next byte lane + bymsk <<= 8; + // move bit into next + // byte position + bybit <<= 1; + } + } + CVMX_CACHE_WBIL2(p1, 0); + CVMX_CACHE_WBIL2(p2, 0); + } + } + } + + debug("N%d.LMC%d: dram_tuning_mem_xor: done TEST loop\n", + node, lmc); + } + + if (xor_data) { // send the bad bits back... + xor_data[0] = bad_bits[0]; + xor_data[1] = bad_bits[1]; // let it be zeroed + } + + // Restore original setting that could enable partial cacheline writes + l2c_ctl.u64 = l2c_rd(priv, CVMX_L2C_CTL); + l2c_ctl.cn78xx.dissblkdty = saved_dissblkdty; + l2c_wr(priv, CVMX_L2C_CTL, l2c_ctl.u64); + + return errors; +} + +static void ddr4_mrw(struct ddr_priv *priv, int if_num, int rank, + int mr_wr_addr, int mr_wr_sel, int mr_wr_bg1) +{ + union cvmx_lmcx_mr_mpr_ctl lmc_mr_mpr_ctl; + + lmc_mr_mpr_ctl.u64 = 0; + lmc_mr_mpr_ctl.cn78xx.mr_wr_addr = (mr_wr_addr == -1) ? 0 : mr_wr_addr; + lmc_mr_mpr_ctl.cn78xx.mr_wr_sel = mr_wr_sel; + lmc_mr_mpr_ctl.cn78xx.mr_wr_rank = rank; + lmc_mr_mpr_ctl.cn78xx.mr_wr_use_default_value = + (mr_wr_addr == -1) ? 1 : 0; + lmc_mr_mpr_ctl.cn78xx.mr_wr_bg1 = mr_wr_bg1; + lmc_wr(priv, CVMX_LMCX_MR_MPR_CTL(if_num), lmc_mr_mpr_ctl.u64); + + /* Mode Register Write */ + oct3_ddr3_seq(priv, 1 << rank, if_num, 0x8); +} + +#define INV_A0_17(x) ((x) ^ 0x22bf8) + +static void set_mpr_mode(struct ddr_priv *priv, int rank_mask, + int if_num, int dimm_count, int mpr, int bg1) +{ + int rankx; + + debug("All Ranks: Set mpr mode = %x %c-side\n", + mpr, (bg1 == 0) ? 'A' : 'B'); + + for (rankx = 0; rankx < dimm_count * 4; rankx++) { + if (!(rank_mask & (1 << rankx))) + continue; + if (bg1 == 0) { + /* MR3 A-side */ + ddr4_mrw(priv, if_num, rankx, mpr << 2, 3, bg1); + } else { + /* MR3 B-side */ + ddr4_mrw(priv, if_num, rankx, INV_A0_17(mpr << 2), ~3, + bg1); + } + } +} + +static void do_ddr4_mpr_read(struct ddr_priv *priv, int if_num, + int rank, int page, int location) +{ + union cvmx_lmcx_mr_mpr_ctl lmc_mr_mpr_ctl; + + lmc_mr_mpr_ctl.u64 = lmc_rd(priv, CVMX_LMCX_MR_MPR_CTL(if_num)); + lmc_mr_mpr_ctl.cn70xx.mr_wr_addr = 0; + lmc_mr_mpr_ctl.cn70xx.mr_wr_sel = page; /* Page */ + lmc_mr_mpr_ctl.cn70xx.mr_wr_rank = rank; + lmc_mr_mpr_ctl.cn70xx.mpr_loc = location; + lmc_mr_mpr_ctl.cn70xx.mpr_wr = 0; /* Read=0, Write=1 */ + lmc_wr(priv, CVMX_LMCX_MR_MPR_CTL(if_num), lmc_mr_mpr_ctl.u64); + + /* MPR register access sequence */ + oct3_ddr3_seq(priv, 1 << rank, if_num, 0x9); + + debug("LMC_MR_MPR_CTL : 0x%016llx\n", + lmc_mr_mpr_ctl.u64); + debug("lmc_mr_mpr_ctl.cn70xx.mr_wr_addr: 0x%02x\n", + lmc_mr_mpr_ctl.cn70xx.mr_wr_addr); + debug("lmc_mr_mpr_ctl.cn70xx.mr_wr_sel : 0x%02x\n", + lmc_mr_mpr_ctl.cn70xx.mr_wr_sel); + debug("lmc_mr_mpr_ctl.cn70xx.mpr_loc : 0x%02x\n", + lmc_mr_mpr_ctl.cn70xx.mpr_loc); + debug("lmc_mr_mpr_ctl.cn70xx.mpr_wr : 0x%02x\n", + lmc_mr_mpr_ctl.cn70xx.mpr_wr); +} + +static int set_rdimm_mode(struct ddr_priv *priv, int if_num, int enable) +{ + union cvmx_lmcx_control lmc_control; + int save_rdimm_mode; + + lmc_control.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num)); + save_rdimm_mode = lmc_control.s.rdimm_ena; + lmc_control.s.rdimm_ena = enable; + debug("Setting RDIMM_ENA = %x\n", enable); + lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), lmc_control.u64); + + return save_rdimm_mode; +} + +static void ddr4_mpr_read(struct ddr_priv *priv, int if_num, int rank, + int page, int location, u64 *mpr_data) +{ + do_ddr4_mpr_read(priv, if_num, rank, page, location); + + mpr_data[0] = lmc_rd(priv, CVMX_LMCX_MPR_DATA0(if_num)); +} + +/* Display MPR values for Page */ +static void display_mpr_page(struct ddr_priv *priv, int rank_mask, + int if_num, int page) +{ + int rankx, location; + u64 mpr_data[3]; + + for (rankx = 0; rankx < 4; rankx++) { + if (!(rank_mask & (1 << rankx))) + continue; + + debug("N0.LMC%d.R%d: MPR Page %d loc [0:3]: ", + if_num, rankx, page); + for (location = 0; location < 4; location++) { + ddr4_mpr_read(priv, if_num, rankx, page, location, + mpr_data); + debug("0x%02llx ", mpr_data[0] & 0xFF); + } + debug("\n"); + + } /* for (rankx = 0; rankx < 4; rankx++) */ +} + +static void ddr4_mpr_write(struct ddr_priv *priv, int if_num, int rank, + int page, int location, u8 mpr_data) +{ + union cvmx_lmcx_mr_mpr_ctl lmc_mr_mpr_ctl; + + lmc_mr_mpr_ctl.u64 = 0; + lmc_mr_mpr_ctl.cn70xx.mr_wr_addr = mpr_data; + lmc_mr_mpr_ctl.cn70xx.mr_wr_sel = page; /* Page */ + lmc_mr_mpr_ctl.cn70xx.mr_wr_rank = rank; + lmc_mr_mpr_ctl.cn70xx.mpr_loc = location; + lmc_mr_mpr_ctl.cn70xx.mpr_wr = 1; /* Read=0, Write=1 */ + lmc_wr(priv, CVMX_LMCX_MR_MPR_CTL(if_num), lmc_mr_mpr_ctl.u64); + + /* MPR register access sequence */ + oct3_ddr3_seq(priv, 1 << rank, if_num, 0x9); + + debug("LMC_MR_MPR_CTL : 0x%016llx\n", + lmc_mr_mpr_ctl.u64); + debug("lmc_mr_mpr_ctl.cn70xx.mr_wr_addr: 0x%02x\n", + lmc_mr_mpr_ctl.cn70xx.mr_wr_addr); + debug("lmc_mr_mpr_ctl.cn70xx.mr_wr_sel : 0x%02x\n", + lmc_mr_mpr_ctl.cn70xx.mr_wr_sel); + debug("lmc_mr_mpr_ctl.cn70xx.mpr_loc : 0x%02x\n", + lmc_mr_mpr_ctl.cn70xx.mpr_loc); + debug("lmc_mr_mpr_ctl.cn70xx.mpr_wr : 0x%02x\n", + lmc_mr_mpr_ctl.cn70xx.mpr_wr); +} + +static void set_vref(struct ddr_priv *priv, int if_num, int rank, + int range, int value) +{ + union cvmx_lmcx_mr_mpr_ctl lmc_mr_mpr_ctl; + union cvmx_lmcx_modereg_params3 lmc_modereg_params3; + int mr_wr_addr = 0; + + lmc_mr_mpr_ctl.u64 = 0; + lmc_modereg_params3.u64 = lmc_rd(priv, + CVMX_LMCX_MODEREG_PARAMS3(if_num)); + + /* A12:A10 tCCD_L */ + mr_wr_addr |= lmc_modereg_params3.s.tccd_l << 10; + mr_wr_addr |= 1 << 7; /* A7 1 = Enable(Training Mode) */ + mr_wr_addr |= range << 6; /* A6 vrefDQ Training Range */ + mr_wr_addr |= value << 0; /* A5:A0 vrefDQ Training Value */ + + lmc_mr_mpr_ctl.cn70xx.mr_wr_addr = mr_wr_addr; + lmc_mr_mpr_ctl.cn70xx.mr_wr_sel = 6; /* Write MR6 */ + lmc_mr_mpr_ctl.cn70xx.mr_wr_rank = rank; + lmc_wr(priv, CVMX_LMCX_MR_MPR_CTL(if_num), lmc_mr_mpr_ctl.u64); + + /* 0x8 = Mode Register Write */ + oct3_ddr3_seq(priv, 1 << rank, if_num, 0x8); + + /* + * It is vendor specific whether vref_value is captured with A7=1. + * A subsequent MRS might be necessary. + */ + oct3_ddr3_seq(priv, 1 << rank, if_num, 0x8); + + mr_wr_addr &= ~(1 << 7); /* A7 0 = Disable(Training Mode) */ + lmc_mr_mpr_ctl.cn70xx.mr_wr_addr = mr_wr_addr; + lmc_wr(priv, CVMX_LMCX_MR_MPR_CTL(if_num), lmc_mr_mpr_ctl.u64); +} + +static void set_dram_output_inversion(struct ddr_priv *priv, int if_num, + int dimm_count, int rank_mask, + int inversion) +{ + union cvmx_lmcx_ddr4_dimm_ctl lmc_ddr4_dimm_ctl; + union cvmx_lmcx_dimmx_params lmc_dimmx_params; + union cvmx_lmcx_dimm_ctl lmc_dimm_ctl; + int dimm_no; + + /* Don't touch extenced register control words */ + lmc_ddr4_dimm_ctl.u64 = 0; + lmc_wr(priv, CVMX_LMCX_DDR4_DIMM_CTL(if_num), lmc_ddr4_dimm_ctl.u64); + + debug("All DIMMs: Register Control Word RC0 : %x\n", + (inversion & 1)); + + for (dimm_no = 0; dimm_no < dimm_count; ++dimm_no) { + lmc_dimmx_params.u64 = + lmc_rd(priv, CVMX_LMCX_DIMMX_PARAMS(dimm_no, if_num)); + lmc_dimmx_params.s.rc0 = + (lmc_dimmx_params.s.rc0 & ~1) | (inversion & 1); + + lmc_wr(priv, + CVMX_LMCX_DIMMX_PARAMS(dimm_no, if_num), + lmc_dimmx_params.u64); + } + + /* LMC0_DIMM_CTL */ + lmc_dimm_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DIMM_CTL(if_num)); + lmc_dimm_ctl.s.dimm0_wmask = 0x1; + lmc_dimm_ctl.s.dimm1_wmask = (dimm_count > 1) ? 0x0001 : 0x0000; + + debug("LMC DIMM_CTL : 0x%016llx\n", + lmc_dimm_ctl.u64); + lmc_wr(priv, CVMX_LMCX_DIMM_CTL(if_num), lmc_dimm_ctl.u64); + + oct3_ddr3_seq(priv, rank_mask, if_num, 0x7); /* Init RCW */ +} + +static void write_mpr_page0_pattern(struct ddr_priv *priv, int rank_mask, + int if_num, int dimm_count, int pattern, + int location_mask) +{ + int rankx; + int location; + + for (rankx = 0; rankx < dimm_count * 4; rankx++) { + if (!(rank_mask & (1 << rankx))) + continue; + for (location = 0; location < 4; ++location) { + if (!(location_mask & (1 << location))) + continue; + + ddr4_mpr_write(priv, if_num, rankx, + /* page */ 0, /* location */ location, + pattern); + } + } +} + +static void change_rdimm_mpr_pattern(struct ddr_priv *priv, int rank_mask, + int if_num, int dimm_count) +{ + int save_ref_zqcs_int; + union cvmx_lmcx_config lmc_config; + + /* + * Okay, here is the latest sequence. This should work for all + * chips and passes (78,88,73,etc). This sequence should be run + * immediately after DRAM INIT. The basic idea is to write the + * same pattern into each of the 4 MPR locations in the DRAM, so + * that the same value is returned when doing MPR reads regardless + * of the inversion state. My advice is to put this into a + * function, change_rdimm_mpr_pattern or something like that, so + * that it can be called multiple times, as I think David wants a + * clock-like pattern for OFFSET training, but does not want a + * clock pattern for Bit-Deskew. You should then be able to call + * this at any point in the init sequence (after DRAM init) to + * change the pattern to a new value. + * Mike + * + * A correction: PHY doesn't need any pattern during offset + * training, but needs clock like pattern for internal vref and + * bit-dskew training. So for that reason, these steps below have + * to be conducted before those trainings to pre-condition + * the pattern. David + * + * Note: Step 3, 4, 8 and 9 have to be done through RDIMM + * sequence. If you issue MRW sequence to do RCW write (in o78 pass + * 1 at least), LMC will still do two commands because + * CONTROL[RDIMM_ENA] is still set high. We don't want it to have + * any unintentional mode register write so it's best to do what + * Mike is doing here. + * Andrew + */ + + /* 1) Disable refresh (REF_ZQCS_INT = 0) */ + + debug("1) Disable refresh (REF_ZQCS_INT = 0)\n"); + + lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num)); + save_ref_zqcs_int = lmc_config.cn78xx.ref_zqcs_int; + lmc_config.cn78xx.ref_zqcs_int = 0; + lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), lmc_config.u64); + + /* + * 2) Put all devices in MPR mode (Run MRW sequence (sequence=8) + * with MODEREG_PARAMS0[MPRLOC]=0, + * MODEREG_PARAMS0[MPR]=1, MR_MPR_CTL[MR_WR_SEL]=3, and + * MR_MPR_CTL[MR_WR_USE_DEFAULT_VALUE]=1) + */ + + debug("2) Put all devices in MPR mode (Run MRW sequence (sequence=8)\n"); + + /* A-side */ + set_mpr_mode(priv, rank_mask, if_num, dimm_count, 1, 0); + /* B-side */ + set_mpr_mode(priv, rank_mask, if_num, dimm_count, 1, 1); + + /* + * a. Or you can set MR_MPR_CTL[MR_WR_USE_DEFAULT_VALUE]=0 and set + * the value you would like directly into + * MR_MPR_CTL[MR_WR_ADDR] + */ + + /* + * 3) Disable RCD Parity (if previously enabled) - parity does not + * work if inversion disabled + */ + + debug("3) Disable RCD Parity\n"); + + /* + * 4) Disable Inversion in the RCD. + * a. I did (3&4) via the RDIMM sequence (seq_sel=7), but it + * may be easier to use the MRW sequence (seq_sel=8). Just set + * MR_MPR_CTL[MR_WR_SEL]=7, MR_MPR_CTL[MR_WR_ADDR][3:0]=data, + * MR_MPR_CTL[MR_WR_ADDR][7:4]=RCD reg + */ + + debug("4) Disable Inversion in the RCD.\n"); + + set_dram_output_inversion(priv, if_num, dimm_count, rank_mask, 1); + + /* + * 5) Disable CONTROL[RDIMM_ENA] so that MR sequence goes out + * non-inverted. + */ + + debug("5) Disable CONTROL[RDIMM_ENA]\n"); + + set_rdimm_mode(priv, if_num, 0); + + /* + * 6) Write all 4 MPR registers with the desired pattern (have to + * do this for all enabled ranks) + * a. MR_MPR_CTL.MPR_WR=1, MR_MPR_CTL.MPR_LOC=0..3, + * MR_MPR_CTL.MR_WR_SEL=0, MR_MPR_CTL.MR_WR_ADDR[7:0]=pattern + */ + + debug("6) Write all 4 MPR page 0 Training Patterns\n"); + + write_mpr_page0_pattern(priv, rank_mask, if_num, dimm_count, 0x55, 0x8); + + /* 7) Re-enable RDIMM_ENA */ + + debug("7) Re-enable RDIMM_ENA\n"); + + set_rdimm_mode(priv, if_num, 1); + + /* 8) Re-enable RDIMM inversion */ + + debug("8) Re-enable RDIMM inversion\n"); + + set_dram_output_inversion(priv, if_num, dimm_count, rank_mask, 0); + + /* 9) Re-enable RDIMM parity (if desired) */ + + debug("9) Re-enable RDIMM parity (if desired)\n"); + + /* + * 10)Take B-side devices out of MPR mode (Run MRW sequence + * (sequence=8) with MODEREG_PARAMS0[MPRLOC]=0, + * MODEREG_PARAMS0[MPR]=0, MR_MPR_CTL[MR_WR_SEL]=3, and + * MR_MPR_CTL[MR_WR_USE_DEFAULT_VALUE]=1) + */ + + debug("10)Take B-side devices out of MPR mode\n"); + + set_mpr_mode(priv, rank_mask, if_num, dimm_count, + /* mpr */ 0, /* bg1 */ 1); + + /* + * a. Or you can set MR_MPR_CTL[MR_WR_USE_DEFAULT_VALUE]=0 and + * set the value you would like directly into MR_MPR_CTL[MR_WR_ADDR] + */ + + /* 11)Re-enable refresh (REF_ZQCS_INT=previous value) */ + + debug("11)Re-enable refresh (REF_ZQCS_INT=previous value)\n"); + + lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num)); + lmc_config.cn78xx.ref_zqcs_int = save_ref_zqcs_int; + lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), lmc_config.u64); +} + +static int validate_hwl_seq(int *wl, int *seq) +{ + // sequence index, step through the sequence array + int seqx; + int bitnum; + + seqx = 0; + + while (seq[seqx + 1] >= 0) { // stop on next seq entry == -1 + // but now, check current versus next + bitnum = (wl[seq[seqx]] << 2) | wl[seq[seqx + 1]]; + // magic validity number (see matrix above) + if (!((1 << bitnum) & 0xBDE7)) + return 1; + seqx++; + } + + return 0; +} + +static int validate_hw_wl_settings(int if_num, + union cvmx_lmcx_wlevel_rankx + *lmc_wlevel_rank, int is_rdimm, int ecc_ena) +{ + int wl[9], byte, errors; + + // arrange the sequences so + // index 0 has byte 0, etc, ECC in middle + int useq[] = { 0, 1, 2, 3, 8, 4, 5, 6, 7, -1 }; + // index 0 is ECC, then go down + int rseq1[] = { 8, 3, 2, 1, 0, -1 }; + // index 0 has byte 4, then go up + int rseq2[] = { 4, 5, 6, 7, -1 }; + // index 0 has byte 0, etc, no ECC + int useqno[] = { 0, 1, 2, 3, 4, 5, 6, 7, -1 }; + // index 0 is byte 3, then go down, no ECC + int rseq1no[] = { 3, 2, 1, 0, -1 }; + + // in the CSR, bytes 0-7 are always data, byte 8 is ECC + for (byte = 0; byte < (8 + ecc_ena); byte++) { + // preprocess :-) + wl[byte] = (get_wl_rank(lmc_wlevel_rank, byte) >> + 1) & 3; + } + + errors = 0; + if (is_rdimm) { // RDIMM order + errors = validate_hwl_seq(wl, (ecc_ena) ? rseq1 : rseq1no); + errors += validate_hwl_seq(wl, rseq2); + } else { // UDIMM order + errors = validate_hwl_seq(wl, (ecc_ena) ? useq : useqno); + } + + return errors; +} + +static unsigned int extr_wr(u64 u, int x) +{ + return (unsigned int)(((u >> (x * 12 + 5)) & 0x3ULL) | + ((u >> (51 + x - 2)) & 0x4ULL)); +} + +static void insrt_wr(u64 *up, int x, int v) +{ + u64 u = *up; + + u &= ~(((0x3ULL) << (x * 12 + 5)) | ((0x1ULL) << (51 + x))); + *up = (u | ((v & 0x3ULL) << (x * 12 + 5)) | + ((v & 0x4ULL) << (51 + x - 2))); +} + +/* Read out Deskew Settings for DDR */ + +struct deskew_bytes { + u16 bits[8]; +}; + +struct deskew_data { + struct deskew_bytes bytes[9]; +}; + +struct dac_data { + int bytes[9]; +}; + +// T88 pass 1, skip 4=DAC +static const u8 dsk_bit_seq_p1[8] = { 0, 1, 2, 3, 5, 6, 7, 8 }; +// T88 Pass 2, skip 4=DAC and 5=DBI +static const u8 dsk_bit_seq_p2[8] = { 0, 1, 2, 3, 6, 7, 8, 9 }; + +static void get_deskew_settings(struct ddr_priv *priv, int if_num, + struct deskew_data *dskdat) +{ + union cvmx_lmcx_phy_ctl phy_ctl; + union cvmx_lmcx_config lmc_config; + int bit_index; + int byte_lane, byte_limit; + // NOTE: these are for pass 2.x + int is_o78p2 = !octeon_is_cpuid(OCTEON_CN78XX_PASS1_X); + const u8 *bit_seq = (is_o78p2) ? dsk_bit_seq_p2 : dsk_bit_seq_p1; + + lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num)); + byte_limit = ((!lmc_config.s.mode32b) ? 8 : 4) + lmc_config.s.ecc_ena; + + memset(dskdat, 0, sizeof(*dskdat)); + + phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num)); + phy_ctl.s.dsk_dbg_clk_scaler = 3; + + for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) { + phy_ctl.s.dsk_dbg_byte_sel = byte_lane; // set byte lane + + for (bit_index = 0; bit_index < 8; ++bit_index) { + // set bit number and start read sequence + phy_ctl.s.dsk_dbg_bit_sel = bit_seq[bit_index]; + phy_ctl.s.dsk_dbg_rd_start = 1; + lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64); + + // poll for read sequence to complete + do { + phy_ctl.u64 = + lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num)); + } while (phy_ctl.s.dsk_dbg_rd_complete != 1); + + // record the data + dskdat->bytes[byte_lane].bits[bit_index] = + phy_ctl.s.dsk_dbg_rd_data & 0x3ff; + } + } +} + +static void display_deskew_settings(struct ddr_priv *priv, int if_num, + struct deskew_data *dskdat, + int print_enable) +{ + int byte_lane; + int bit_num; + u16 flags, deskew; + union cvmx_lmcx_config lmc_config; + int byte_limit; + const char *fc = " ?-=+*#&"; + + lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num)); + byte_limit = ((lmc_config.s.mode32b) ? 4 : 8) + lmc_config.s.ecc_ena; + + if (print_enable) { + debug("N0.LMC%d: Deskew Data: Bit => :", + if_num); + for (bit_num = 7; bit_num >= 0; --bit_num) + debug(" %3d ", bit_num); + debug("\n"); + } + + for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) { + if (print_enable) + debug("N0.LMC%d: Bit Deskew Byte %d %s :", + if_num, byte_lane, + (print_enable >= 3) ? "FINAL" : " "); + + for (bit_num = 7; bit_num >= 0; --bit_num) { + flags = dskdat->bytes[byte_lane].bits[bit_num] & 7; + deskew = dskdat->bytes[byte_lane].bits[bit_num] >> 3; + + if (print_enable) + debug(" %3d %c", deskew, fc[flags ^ 1]); + + } /* for (bit_num = 7; bit_num >= 0; --bit_num) */ + + if (print_enable) + debug("\n"); + } +} + +static void override_deskew_settings(struct ddr_priv *priv, int if_num, + struct deskew_data *dskdat) +{ + union cvmx_lmcx_phy_ctl phy_ctl; + union cvmx_lmcx_config lmc_config; + + int bit, byte_lane, byte_limit; + u64 csr_data; + + lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num)); + byte_limit = ((lmc_config.s.mode32b) ? 4 : 8) + lmc_config.s.ecc_ena; + + phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num)); + + phy_ctl.s.phy_reset = 0; + phy_ctl.s.dsk_dbg_num_bits_sel = 1; + phy_ctl.s.dsk_dbg_offset = 0; + phy_ctl.s.dsk_dbg_clk_scaler = 3; + + phy_ctl.s.dsk_dbg_wr_mode = 1; + phy_ctl.s.dsk_dbg_load_dis = 0; + phy_ctl.s.dsk_dbg_overwrt_ena = 0; + + phy_ctl.s.phy_dsk_reset = 0; + + lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64); + lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num)); + + for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) { + csr_data = 0; + // FIXME: can we ignore DBI? + for (bit = 0; bit < 8; ++bit) { + // fetch input and adjust + u64 bits = (dskdat->bytes[byte_lane].bits[bit] >> 3) & + 0x7F; + + /* + * lmc_general_purpose0.data[6:0] // DQ0 + * lmc_general_purpose0.data[13:7] // DQ1 + * lmc_general_purpose0.data[20:14] // DQ2 + * lmc_general_purpose0.data[27:21] // DQ3 + * lmc_general_purpose0.data[34:28] // DQ4 + * lmc_general_purpose0.data[41:35] // DQ5 + * lmc_general_purpose0.data[48:42] // DQ6 + * lmc_general_purpose0.data[55:49] // DQ7 + * lmc_general_purpose0.data[62:56] // DBI + */ + csr_data |= (bits << (7 * bit)); + + } /* for (bit = 0; bit < 8; ++bit) */ + + // update GP0 with the bit data for this byte lane + lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE0(if_num), csr_data); + lmc_rd(priv, CVMX_LMCX_GENERAL_PURPOSE0(if_num)); + + // start the deskew load sequence + phy_ctl.s.dsk_dbg_byte_sel = byte_lane; + phy_ctl.s.dsk_dbg_rd_start = 1; + lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64); + + // poll for read sequence to complete + do { + udelay(100); + phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num)); + } while (phy_ctl.s.dsk_dbg_rd_complete != 1); + } + + // tell phy to use the new settings + phy_ctl.s.dsk_dbg_overwrt_ena = 1; + phy_ctl.s.dsk_dbg_rd_start = 0; + lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64); + + phy_ctl.s.dsk_dbg_wr_mode = 0; + lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64); +} + +static void process_by_rank_dac(struct ddr_priv *priv, int if_num, + int rank_mask, struct dac_data *dacdat) +{ + union cvmx_lmcx_config lmc_config; + int rankx, byte_lane; + int byte_limit; + int rank_count; + struct dac_data dacsum; + int lane_probs; + + lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num)); + byte_limit = ((lmc_config.s.mode32b) ? 4 : 8) + lmc_config.s.ecc_ena; + + memset((void *)&dacsum, 0, sizeof(dacsum)); + rank_count = 0; + lane_probs = 0; + + for (rankx = 0; rankx < 4; rankx++) { + if (!(rank_mask & (1 << rankx))) + continue; + rank_count++; + + display_dac_dbi_settings(if_num, /*dac */ 1, + lmc_config.s.ecc_ena, + &dacdat[rankx].bytes[0], + "By-Ranks VREF"); + // sum + for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) { + if (rank_count == 2) { + int ranks_diff = + abs((dacsum.bytes[byte_lane] - + dacdat[rankx].bytes[byte_lane])); + + // FIXME: is 19 a good number? + if (ranks_diff > 19) + lane_probs |= (1 << byte_lane); + } + dacsum.bytes[byte_lane] += + dacdat[rankx].bytes[byte_lane]; + } + } + + // average + for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) + dacsum.bytes[byte_lane] /= rank_count; // FIXME: nint? + + display_dac_dbi_settings(if_num, /*dac */ 1, lmc_config.s.ecc_ena, + &dacsum.bytes[0], "All-Rank VREF"); + + if (lane_probs) { + debug("N0.LMC%d: All-Rank VREF DAC Problem Bytelane(s): 0x%03x\n", + if_num, lane_probs); + } + + // finally, write the averaged DAC values + for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) { + load_dac_override(priv, if_num, dacsum.bytes[byte_lane], + byte_lane); + } +} + +static void process_by_rank_dsk(struct ddr_priv *priv, int if_num, + int rank_mask, struct deskew_data *dskdat) +{ + union cvmx_lmcx_config lmc_config; + int rankx, lane, bit; + int byte_limit; + struct deskew_data dsksum, dskcnt; + u16 deskew; + + lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num)); + byte_limit = ((lmc_config.s.mode32b) ? 4 : 8) + lmc_config.s.ecc_ena; + + memset((void *)&dsksum, 0, sizeof(dsksum)); + memset((void *)&dskcnt, 0, sizeof(dskcnt)); + + for (rankx = 0; rankx < 4; rankx++) { + if (!(rank_mask & (1 << rankx))) + continue; + + // sum ranks + for (lane = 0; lane < byte_limit; lane++) { + for (bit = 0; bit < 8; ++bit) { + deskew = dskdat[rankx].bytes[lane].bits[bit]; + // if flags indicate sat hi or lo, skip it + if (deskew & 6) + continue; + + // clear flags + dsksum.bytes[lane].bits[bit] += + deskew & ~7; + // count entries + dskcnt.bytes[lane].bits[bit] += 1; + } + } + } + + // average ranks + for (lane = 0; lane < byte_limit; lane++) { + for (bit = 0; bit < 8; ++bit) { + int div = dskcnt.bytes[lane].bits[bit]; + + if (div > 0) { + dsksum.bytes[lane].bits[bit] /= div; + // clear flags + dsksum.bytes[lane].bits[bit] &= ~7; + // set LOCK + dsksum.bytes[lane].bits[bit] |= 1; + } else { + // FIXME? use reset value? + dsksum.bytes[lane].bits[bit] = + (64 << 3) | 1; + } + } + } + + // TME for FINAL version + display_deskew_settings(priv, if_num, &dsksum, /*VBL_TME */ 3); + + // finally, write the averaged DESKEW values + override_deskew_settings(priv, if_num, &dsksum); +} + +struct deskew_counts { + int saturated; // number saturated + int unlocked; // number unlocked + int nibrng_errs; // nibble range errors + int nibunl_errs; // nibble unlocked errors + int bitval_errs; // bit value errors +}; + +#define MIN_BITVAL 17 +#define MAX_BITVAL 110 + +static void validate_deskew_training(struct ddr_priv *priv, int rank_mask, + int if_num, struct deskew_counts *counts, + int print_flags) +{ + int byte_lane, bit_index, nib_num; + int nibrng_errs, nibunl_errs, bitval_errs; + union cvmx_lmcx_config lmc_config; + s16 nib_min[2], nib_max[2], nib_unl[2]; + int byte_limit; + int print_enable = print_flags & 1; + struct deskew_data dskdat; + s16 flags, deskew; + const char *fc = " ?-=+*#&"; + int bit_last; + + lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num)); + byte_limit = ((!lmc_config.s.mode32b) ? 8 : 4) + lmc_config.s.ecc_ena; + + memset(counts, 0, sizeof(struct deskew_counts)); + + get_deskew_settings(priv, if_num, &dskdat); + + if (print_enable) { + debug("N0.LMC%d: Deskew Settings: Bit => :", + if_num); + for (bit_index = 7; bit_index >= 0; --bit_index) + debug(" %3d ", bit_index); + debug("\n"); + } + + for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) { + if (print_enable) + debug("N0.LMC%d: Bit Deskew Byte %d %s :", + if_num, byte_lane, + (print_flags & 2) ? "FINAL" : " "); + + nib_min[0] = 127; + nib_min[1] = 127; + nib_max[0] = 0; + nib_max[1] = 0; + nib_unl[0] = 0; + nib_unl[1] = 0; + + if (lmc_config.s.mode32b == 1 && byte_lane == 4) { + bit_last = 3; + if (print_enable) + debug(" "); + } else { + bit_last = 7; + } + + for (bit_index = bit_last; bit_index >= 0; --bit_index) { + nib_num = (bit_index > 3) ? 1 : 0; + + flags = dskdat.bytes[byte_lane].bits[bit_index] & 7; + deskew = dskdat.bytes[byte_lane].bits[bit_index] >> 3; + + counts->saturated += !!(flags & 6); + + // Do range calc even when locked; it could happen + // that a bit is still unlocked after final retry, + // and we want to have an external retry if a RANGE + // error is present at exit... + nib_min[nib_num] = min(nib_min[nib_num], deskew); + nib_max[nib_num] = max(nib_max[nib_num], deskew); + + if (!(flags & 1)) { // only when not locked + counts->unlocked += 1; + nib_unl[nib_num] += 1; + } + + if (print_enable) + debug(" %3d %c", deskew, fc[flags ^ 1]); + } + + /* + * Now look for nibble errors + * + * For bit 55, it looks like a bit deskew problem. When the + * upper nibble of byte 6 needs to go to saturation, bit 7 + * of byte 6 locks prematurely at 64. For DIMMs with raw + * card A and B, can we reset the deskew training when we + * encounter this case? The reset criteria should be looking + * at one nibble at a time for raw card A and B; if the + * bit-deskew setting within a nibble is different by > 33, + * we'll issue a reset to the bit deskew training. + * + * LMC0 Bit Deskew Byte(6): 64 0 - 0 - 0 - 26 61 35 64 + */ + // upper nibble range, then lower nibble range + nibrng_errs = ((nib_max[1] - nib_min[1]) > 33) ? 1 : 0; + nibrng_errs |= ((nib_max[0] - nib_min[0]) > 33) ? 1 : 0; + + // check for nibble all unlocked + nibunl_errs = ((nib_unl[0] == 4) || (nib_unl[1] == 4)) ? 1 : 0; + + // check for bit value errors, ie < 17 or > 110 + // FIXME? assume max always > MIN_BITVAL and min < MAX_BITVAL + bitval_errs = ((nib_max[1] > MAX_BITVAL) || + (nib_max[0] > MAX_BITVAL)) ? 1 : 0; + bitval_errs |= ((nib_min[1] < MIN_BITVAL) || + (nib_min[0] < MIN_BITVAL)) ? 1 : 0; + + if ((nibrng_errs != 0 || nibunl_errs != 0 || + bitval_errs != 0) && print_enable) { + debug(" %c%c%c", + (nibrng_errs) ? 'R' : ' ', + (nibunl_errs) ? 'U' : ' ', + (bitval_errs) ? 'V' : ' '); + } + + if (print_enable) + debug("\n"); + + counts->nibrng_errs |= (nibrng_errs << byte_lane); + counts->nibunl_errs |= (nibunl_errs << byte_lane); + counts->bitval_errs |= (bitval_errs << byte_lane); + } +} + +static unsigned short load_dac_override(struct ddr_priv *priv, int if_num, + int dac_value, int byte) +{ + union cvmx_lmcx_dll_ctl3 ddr_dll_ctl3; + // single bytelanes incr by 1; A is for ALL + int bytex = (byte == 0x0A) ? byte : byte + 1; + + ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num)); + + SET_DDR_DLL_CTL3(byte_sel, bytex); + SET_DDR_DLL_CTL3(offset, dac_value >> 1); + + ddr_dll_ctl3.cn73xx.bit_select = 0x9; /* No-op */ + lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64); + + ddr_dll_ctl3.cn73xx.bit_select = 0xC; /* vref bypass setting load */ + lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64); + + ddr_dll_ctl3.cn73xx.bit_select = 0xD; /* vref bypass on. */ + lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64); + + ddr_dll_ctl3.cn73xx.bit_select = 0x9; /* No-op */ + lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64); + + lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num)); // flush writes + + return (unsigned short)GET_DDR_DLL_CTL3(offset); +} + +// arg dac_or_dbi is 1 for DAC, 0 for DBI +// returns 9 entries (bytelanes 0 through 8) in settings[] +// returns 0 if OK, -1 if a problem +static int read_dac_dbi_settings(struct ddr_priv *priv, int if_num, + int dac_or_dbi, int *settings) +{ + union cvmx_lmcx_phy_ctl phy_ctl; + int byte_lane, bit_num; + int deskew; + int dac_value; + int new_deskew_layout = 0; + + new_deskew_layout = octeon_is_cpuid(OCTEON_CN73XX) || + octeon_is_cpuid(OCTEON_CNF75XX); + new_deskew_layout |= (octeon_is_cpuid(OCTEON_CN78XX) && + !octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)); + + phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num)); + phy_ctl.s.dsk_dbg_clk_scaler = 3; + lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64); + + bit_num = (dac_or_dbi) ? 4 : 5; + // DBI not available + if (bit_num == 5 && !new_deskew_layout) + return -1; + + // FIXME: always assume ECC is available + for (byte_lane = 8; byte_lane >= 0; --byte_lane) { + //set byte lane and bit to read + phy_ctl.s.dsk_dbg_bit_sel = bit_num; + phy_ctl.s.dsk_dbg_byte_sel = byte_lane; + lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64); + + //start read sequence + phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num)); + phy_ctl.s.dsk_dbg_rd_start = 1; + lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64); + + //poll for read sequence to complete + do { + phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num)); + } while (phy_ctl.s.dsk_dbg_rd_complete != 1); + + // keep the flag bits where they are for DBI + deskew = phy_ctl.s.dsk_dbg_rd_data; /* >> 3 */ + dac_value = phy_ctl.s.dsk_dbg_rd_data & 0xff; + + settings[byte_lane] = (dac_or_dbi) ? dac_value : deskew; + } + + return 0; +} + +// print out the DBI settings array +// arg dac_or_dbi is 1 for DAC, 0 for DBI +static void display_dac_dbi_settings(int lmc, int dac_or_dbi, + int ecc_ena, int *settings, char *title) +{ + int byte; + int flags; + int deskew; + const char *fc = " ?-=+*#&"; + + debug("N0.LMC%d: %s %s Settings %d:0 :", + lmc, title, (dac_or_dbi) ? "DAC" : "DBI", 7 + ecc_ena); + // FIXME: what about 32-bit mode? + for (byte = (7 + ecc_ena); byte >= 0; --byte) { + if (dac_or_dbi) { // DAC + flags = 1; // say its locked to get blank + deskew = settings[byte] & 0xff; + } else { // DBI + flags = settings[byte] & 7; + deskew = (settings[byte] >> 3) & 0x7f; + } + debug(" %3d %c", deskew, fc[flags ^ 1]); + } + debug("\n"); +} + +// Find a HWL majority +static int find_wl_majority(struct wlevel_bitcnt *bc, int *mx, int *mc, + int *xc, int *cc) +{ + int ix, ic; + + *mx = -1; + *mc = 0; + *xc = 0; + *cc = 0; + + for (ix = 0; ix < 4; ix++) { + ic = bc->bitcnt[ix]; + + // make a bitmask of the ones with a count + if (ic > 0) { + *mc |= (1 << ix); + *cc += 1; // count how many had non-zero counts + } + + // find the majority + if (ic > *xc) { // new max? + *xc = ic; // yes + *mx = ix; // set its index + } + } + + return (*mx << 1); +} + +// Evaluate the DAC settings array +static int evaluate_dac_settings(int if_64b, int ecc_ena, int *settings) +{ + int byte, lane, dac, comp; + int last = (if_64b) ? 7 : 3; + + // FIXME: change the check...??? + // this looks only for sets of DAC values whose max/min differ by a lot + // let any EVEN go so long as it is within range... + for (byte = (last + ecc_ena); byte >= 0; --byte) { + dac = settings[byte] & 0xff; + + for (lane = (last + ecc_ena); lane >= 0; --lane) { + comp = settings[lane] & 0xff; + if (abs((dac - comp)) > 25) + return 1; + } + } + + return 0; +} + +static void perform_offset_training(struct ddr_priv *priv, int rank_mask, + int if_num) +{ + union cvmx_lmcx_phy_ctl lmc_phy_ctl; + u64 orig_phy_ctl; + const char *s; + + /* + * 4.8.6 LMC Offset Training + * + * LMC requires input-receiver offset training. + * + * 1. Write LMC(0)_PHY_CTL[DAC_ON] = 1 + */ + lmc_phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num)); + orig_phy_ctl = lmc_phy_ctl.u64; + lmc_phy_ctl.s.dac_on = 1; + + // allow full CSR override + s = lookup_env_ull(priv, "ddr_phy_ctl"); + if (s) + lmc_phy_ctl.u64 = strtoull(s, NULL, 0); + + // do not print or write if CSR does not change... + if (lmc_phy_ctl.u64 != orig_phy_ctl) { + debug("PHY_CTL : 0x%016llx\n", + lmc_phy_ctl.u64); + lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), lmc_phy_ctl.u64); + } + + /* + * 2. Write LMC(0)_SEQ_CTL[SEQ_SEL] = 0x0B and + * LMC(0)_SEQ_CTL[INIT_START] = 1. + * + * 3. Wait for LMC(0)_SEQ_CTL[SEQ_COMPLETE] to be set to 1. + */ + /* Start Offset training sequence */ + oct3_ddr3_seq(priv, rank_mask, if_num, 0x0B); +} + +static void perform_internal_vref_training(struct ddr_priv *priv, + int rank_mask, int if_num) +{ + union cvmx_lmcx_ext_config ext_config; + union cvmx_lmcx_dll_ctl3 ddr_dll_ctl3; + + // First, make sure all byte-lanes are out of VREF bypass mode + ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num)); + + ddr_dll_ctl3.cn78xx.byte_sel = 0x0A; /* all byte-lanes */ + ddr_dll_ctl3.cn78xx.bit_select = 0x09; /* No-op */ + lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64); + + ddr_dll_ctl3.cn78xx.bit_select = 0x0E; /* vref bypass off. */ + lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64); + + ddr_dll_ctl3.cn78xx.bit_select = 0x09; /* No-op */ + lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64); + + /* + * 4.8.7 LMC Internal vref Training + * + * LMC requires input-reference-voltage training. + * + * 1. Write LMC(0)_EXT_CONFIG[VREFINT_SEQ_DESKEW] = 0. + */ + ext_config.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG(if_num)); + ext_config.s.vrefint_seq_deskew = 0; + + ddr_seq_print("Performing LMC sequence: vrefint_seq_deskew = %d\n", + ext_config.s.vrefint_seq_deskew); + + lmc_wr(priv, CVMX_LMCX_EXT_CONFIG(if_num), ext_config.u64); + + /* + * 2. Write LMC(0)_SEQ_CTL[SEQ_SEL] = 0x0a and + * LMC(0)_SEQ_CTL[INIT_START] = 1. + * + * 3. Wait for LMC(0)_SEQ_CTL[SEQ_COMPLETE] to be set to 1. + */ + /* Start LMC Internal vref Training */ + oct3_ddr3_seq(priv, rank_mask, if_num, 0x0A); +} + +#define dbg_avg(format, ...) // debug(format, ##__VA_ARGS__) + +static int process_samples_average(s16 *bytes, int num_samples, + int lmc, int lane_no) +{ + int i, sadj, sum = 0, ret, asum, trunc; + s16 smin = 32767, smax = -32768; + int nmin, nmax; + //int rng; + + dbg_avg("DBG_AVG%d.%d: ", lmc, lane_no); + + for (i = 0; i < num_samples; i++) { + sum += bytes[i]; + if (bytes[i] < smin) + smin = bytes[i]; + if (bytes[i] > smax) + smax = bytes[i]; + dbg_avg(" %3d", bytes[i]); + } + + nmin = 0; + nmax = 0; + for (i = 0; i < num_samples; i++) { + if (bytes[i] == smin) + nmin += 1; + if (bytes[i] == smax) + nmax += 1; + } + dbg_avg(" (min=%3d/%d, max=%3d/%d, range=%2d, samples=%2d)", + smin, nmin, smax, nmax, rng, num_samples); + + asum = sum - smin - smax; + + sadj = divide_nint(asum * 10, (num_samples - 2)); + + trunc = asum / (num_samples - 2); + + dbg_avg(" [%3d.%d, %3d]", sadj / 10, sadj % 10, trunc); + + sadj = divide_nint(sadj, 10); + if (trunc & 1) + ret = trunc; + else if (sadj & 1) + ret = sadj; + else + ret = trunc + 1; + + dbg_avg(" -> %3d\n", ret); + + return ret; +} + +#define DEFAULT_SAT_RETRY_LIMIT 11 // 1 + 10 retries + +#define default_lock_retry_limit 20 // 20 retries +#define deskew_validation_delay 10000 // 10 millisecs + +static int perform_deskew_training(struct ddr_priv *priv, int rank_mask, + int if_num, int spd_rawcard_aorb) +{ + int unsaturated, locked; + int sat_retries, sat_retries_limit; + int lock_retries, lock_retries_total, lock_retries_limit; + int print_first; + int print_them_all; + struct deskew_counts dsk_counts; + union cvmx_lmcx_phy_ctl phy_ctl; + char *s; + int has_no_sat = octeon_is_cpuid(OCTEON_CN78XX_PASS2_X) || + octeon_is_cpuid(OCTEON_CNF75XX); + int disable_bitval_retries = 1; // default to disabled + + debug("N0.LMC%d: Performing Deskew Training.\n", if_num); + + sat_retries = 0; + sat_retries_limit = (has_no_sat) ? 5 : DEFAULT_SAT_RETRY_LIMIT; + + lock_retries_total = 0; + unsaturated = 0; + print_first = 1; // print the first one + // set to true for printing all normal deskew attempts + print_them_all = 0; + + // provide override for bitval_errs causing internal VREF retries + s = env_get("ddr_disable_bitval_retries"); + if (s) + disable_bitval_retries = !!simple_strtoul(s, NULL, 0); + + lock_retries_limit = default_lock_retry_limit; + if ((octeon_is_cpuid(OCTEON_CN78XX_PASS2_X)) || + (octeon_is_cpuid(OCTEON_CN73XX)) || + (octeon_is_cpuid(OCTEON_CNF75XX))) + lock_retries_limit *= 2; // give new chips twice as many + + do { /* while (sat_retries < sat_retry_limit) */ + /* + * 4.8.8 LMC Deskew Training + * + * LMC requires input-read-data deskew training. + * + * 1. Write LMC(0)_EXT_CONFIG[VREFINT_SEQ_DESKEW] = 1. + */ + + union cvmx_lmcx_ext_config ext_config; + + ext_config.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG(if_num)); + ext_config.s.vrefint_seq_deskew = 1; + + ddr_seq_print + ("Performing LMC sequence: vrefint_seq_deskew = %d\n", + ext_config.s.vrefint_seq_deskew); + + lmc_wr(priv, CVMX_LMCX_EXT_CONFIG(if_num), ext_config.u64); + + /* + * 2. Write LMC(0)_SEQ_CTL[SEQ_SEL] = 0x0A and + * LMC(0)_SEQ_CTL[INIT_START] = 1. + * + * 3. Wait for LMC(0)_SEQ_CTL[SEQ_COMPLETE] to be set to 1. + */ + + phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num)); + phy_ctl.s.phy_dsk_reset = 1; /* RESET Deskew sequence */ + lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64); + + /* LMC Deskew Training */ + oct3_ddr3_seq(priv, rank_mask, if_num, 0x0A); + + lock_retries = 0; + +perform_deskew_training: + + phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num)); + phy_ctl.s.phy_dsk_reset = 0; /* Normal Deskew sequence */ + lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64); + + /* LMC Deskew Training */ + oct3_ddr3_seq(priv, rank_mask, if_num, 0x0A); + + // Moved this from validate_deskew_training + /* Allow deskew results to stabilize before evaluating them. */ + udelay(deskew_validation_delay); + + // Now go look at lock and saturation status... + validate_deskew_training(priv, rank_mask, if_num, &dsk_counts, + print_first); + // after printing the first and not doing them all, no more + if (print_first && !print_them_all) + print_first = 0; + + unsaturated = (dsk_counts.saturated == 0); + locked = (dsk_counts.unlocked == 0); + + // only do locking retries if unsaturated or rawcard A or B, + // otherwise full SAT retry + if (unsaturated || (spd_rawcard_aorb && !has_no_sat)) { + if (!locked) { // and not locked + lock_retries++; + lock_retries_total++; + if (lock_retries <= lock_retries_limit) { + goto perform_deskew_training; + } else { + debug("N0.LMC%d: LOCK RETRIES failed after %d retries\n", + if_num, lock_retries_limit); + } + } else { + // only print if we did try + if (lock_retries_total > 0) + debug("N0.LMC%d: LOCK RETRIES successful after %d retries\n", + if_num, lock_retries); + } + } /* if (unsaturated || spd_rawcard_aorb) */ + + ++sat_retries; + + /* + * At this point, check for a DDR4 RDIMM that will not + * benefit from SAT retries; if so, exit + */ + if (spd_rawcard_aorb && !has_no_sat) { + debug("N0.LMC%d: Deskew Training Loop: Exiting for RAWCARD == A or B.\n", + if_num); + break; // no sat or lock retries + } + + } while (!unsaturated && (sat_retries < sat_retries_limit)); + + debug("N0.LMC%d: Deskew Training %s. %d sat-retries, %d lock-retries\n", + if_num, (sat_retries >= DEFAULT_SAT_RETRY_LIMIT) ? + "Timed Out" : "Completed", sat_retries - 1, lock_retries_total); + + // FIXME? add saturation to reasons for fault return - give it a + // chance via Internal VREF + // FIXME? add OPTIONAL bit value to reasons for fault return - + // give it a chance via Internal VREF + if (dsk_counts.nibrng_errs != 0 || dsk_counts.nibunl_errs != 0 || + (dsk_counts.bitval_errs != 0 && !disable_bitval_retries) || + !unsaturated) { + debug("N0.LMC%d: Nibble or Saturation Error(s) found, returning FAULT\n", + if_num); + // FIXME: do we want this output always for errors? + validate_deskew_training(priv, rank_mask, if_num, + &dsk_counts, 1); + return -1; // we did retry locally, they did not help + } + + // NOTE: we (currently) always print one last training validation + // before starting Read Leveling... + + return 0; +} + +#define SCALING_FACTOR (1000) + +// NOTE: this gets called for 1-rank and 2-rank DIMMs in single-slot config +static int compute_vref_1slot_2rank(int rtt_wr, int rtt_park, int dqx_ctl, + int rank_count, int dram_connection) +{ + u64 reff_s; + u64 rser_s = (dram_connection) ? 0 : 15; + u64 vdd = 1200; + u64 vref; + // 99 == HiZ + u64 rtt_wr_s = (((rtt_wr == 0) || rtt_wr == 99) ? + 1 * 1024 * 1024 : rtt_wr); + u64 rtt_park_s = (((rtt_park == 0) || ((rank_count == 1) && + (rtt_wr != 0))) ? + 1 * 1024 * 1024 : rtt_park); + u64 dqx_ctl_s = (dqx_ctl == 0 ? 1 * 1024 * 1024 : dqx_ctl); + int vref_value; + u64 rangepc = 6000; // range1 base + u64 vrefpc; + int vref_range = 0; + + reff_s = divide_nint((rtt_wr_s * rtt_park_s), (rtt_wr_s + rtt_park_s)); + + vref = (((rser_s + dqx_ctl_s) * SCALING_FACTOR) / + (rser_s + dqx_ctl_s + reff_s)) + SCALING_FACTOR; + + vref = (vref * vdd) / 2 / SCALING_FACTOR; + + vrefpc = (vref * 100 * 100) / vdd; + + if (vrefpc < rangepc) { // < range1 base, use range2 + vref_range = 1 << 6; // set bit A6 for range2 + rangepc = 4500; // range2 base is 45% + } + + vref_value = divide_nint(vrefpc - rangepc, 65); + if (vref_value < 0) + vref_value = vref_range; // set to base of range + else + vref_value |= vref_range; + + debug("rtt_wr: %d, rtt_park: %d, dqx_ctl: %d, rank_count: %d\n", + rtt_wr, rtt_park, dqx_ctl, rank_count); + debug("rtt_wr_s: %lld, rtt_park_s: %lld, dqx_ctl_s: %lld, vref_value: 0x%x, range: %d\n", + rtt_wr_s, rtt_park_s, dqx_ctl_s, vref_value ^ vref_range, + vref_range ? 2 : 1); + + return vref_value; +} + +// NOTE: this gets called for 1-rank and 2-rank DIMMs in two-slot configs +static int compute_vref_2slot_2rank(int rtt_wr, int rtt_park_00, + int rtt_park_01, + int dqx_ctl, int rtt_nom, + int dram_connection) +{ + u64 rser = (dram_connection) ? 0 : 15; + u64 vdd = 1200; + u64 vl, vlp, vcm; + u64 rd0, rd1, rpullup; + // 99 == HiZ + u64 rtt_wr_s = (((rtt_wr == 0) || rtt_wr == 99) ? + 1 * 1024 * 1024 : rtt_wr); + u64 rtt_park_00_s = (rtt_park_00 == 0 ? 1 * 1024 * 1024 : rtt_park_00); + u64 rtt_park_01_s = (rtt_park_01 == 0 ? 1 * 1024 * 1024 : rtt_park_01); + u64 dqx_ctl_s = (dqx_ctl == 0 ? 1 * 1024 * 1024 : dqx_ctl); + u64 rtt_nom_s = (rtt_nom == 0 ? 1 * 1024 * 1024 : rtt_nom); + int vref_value; + u64 rangepc = 6000; // range1 base + u64 vrefpc; + int vref_range = 0; + + // rd0 = (RTT_NOM (parallel) RTT_WR) + = + // ((RTT_NOM * RTT_WR) / (RTT_NOM + RTT_WR)) + RSER + rd0 = divide_nint((rtt_nom_s * rtt_wr_s), + (rtt_nom_s + rtt_wr_s)) + rser; + + // rd1 = (RTT_PARK_00 (parallel) RTT_PARK_01) + RSER = + // ((RTT_PARK_00 * RTT_PARK_01) / (RTT_PARK_00 + RTT_PARK_01)) + RSER + rd1 = divide_nint((rtt_park_00_s * rtt_park_01_s), + (rtt_park_00_s + rtt_park_01_s)) + rser; + + // rpullup = rd0 (parallel) rd1 = (rd0 * rd1) / (rd0 + rd1) + rpullup = divide_nint((rd0 * rd1), (rd0 + rd1)); + + // vl = (DQX_CTL / (DQX_CTL + rpullup)) * 1.2 + vl = divide_nint((dqx_ctl_s * vdd), (dqx_ctl_s + rpullup)); + + // vlp = ((RSER / rd0) * (1.2 - vl)) + vl + vlp = divide_nint((rser * (vdd - vl)), rd0) + vl; + + // vcm = (vlp + 1.2) / 2 + vcm = divide_nint((vlp + vdd), 2); + + // vrefpc = (vcm / 1.2) * 100 + vrefpc = divide_nint((vcm * 100 * 100), vdd); + + if (vrefpc < rangepc) { // < range1 base, use range2 + vref_range = 1 << 6; // set bit A6 for range2 + rangepc = 4500; // range2 base is 45% + } + + vref_value = divide_nint(vrefpc - rangepc, 65); + if (vref_value < 0) + vref_value = vref_range; // set to base of range + else + vref_value |= vref_range; + + debug("rtt_wr:%d, rtt_park_00:%d, rtt_park_01:%d, dqx_ctl:%d, rtt_nom:%d, vref_value:%d (0x%x)\n", + rtt_wr, rtt_park_00, rtt_park_01, dqx_ctl, rtt_nom, vref_value, + vref_value); + + return vref_value; +} + +// NOTE: only call this for DIMMs with 1 or 2 ranks, not 4. +static int compute_vref_val(struct ddr_priv *priv, int if_num, int rankx, + int dimm_count, int rank_count, + struct impedence_values *imp_values, + int is_stacked_die, int dram_connection) +{ + int computed_final_vref_value = 0; + int enable_adjust = ENABLE_COMPUTED_VREF_ADJUSTMENT; + const char *s; + int rtt_wr, dqx_ctl, rtt_nom, index; + union cvmx_lmcx_modereg_params1 lmc_modereg_params1; + union cvmx_lmcx_modereg_params2 lmc_modereg_params2; + union cvmx_lmcx_comp_ctl2 comp_ctl2; + int rtt_park; + int rtt_park_00; + int rtt_park_01; + + debug("N0.LMC%d.R%d: %s(...dram_connection = %d)\n", + if_num, rankx, __func__, dram_connection); + + // allow some overrides... + s = env_get("ddr_adjust_computed_vref"); + if (s) { + enable_adjust = !!simple_strtoul(s, NULL, 0); + if (!enable_adjust) { + debug("N0.LMC%d.R%d: DISABLE adjustment of computed VREF\n", + if_num, rankx); + } + } + + s = env_get("ddr_set_computed_vref"); + if (s) { + int new_vref = simple_strtoul(s, NULL, 0); + + debug("N0.LMC%d.R%d: OVERRIDE computed VREF to 0x%x (%d)\n", + if_num, rankx, new_vref, new_vref); + return new_vref; + } + + /* + * Calculate an alternative to the measured vref value + * but only for configurations we know how to... + */ + // We have code for 2-rank DIMMs in both 1-slot or 2-slot configs, + // and can use the 2-rank 1-slot code for 1-rank DIMMs in 1-slot + // configs, and can use the 2-rank 2-slot code for 1-rank DIMMs + // in 2-slot configs. + + lmc_modereg_params1.u64 = + lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS1(if_num)); + lmc_modereg_params2.u64 = + lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS2(if_num)); + comp_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num)); + dqx_ctl = imp_values->dqx_strength[comp_ctl2.s.dqx_ctl]; + + // WR always comes from the current rank + index = (lmc_modereg_params1.u64 >> (rankx * 12 + 5)) & 0x03; + if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) + index |= lmc_modereg_params1.u64 >> (51 + rankx - 2) & 0x04; + rtt_wr = imp_values->rtt_wr_ohms[index]; + + // separate calculations for 1 vs 2 DIMMs per LMC + if (dimm_count == 1) { + // PARK comes from this rank if 1-rank, otherwise other rank + index = + (lmc_modereg_params2.u64 >> + ((rankx ^ (rank_count - 1)) * 10 + 0)) & 0x07; + rtt_park = imp_values->rtt_nom_ohms[index]; + computed_final_vref_value = + compute_vref_1slot_2rank(rtt_wr, rtt_park, dqx_ctl, + rank_count, dram_connection); + } else { + // get both PARK values from the other DIMM + index = + (lmc_modereg_params2.u64 >> ((rankx ^ 0x02) * 10 + 0)) & + 0x07; + rtt_park_00 = imp_values->rtt_nom_ohms[index]; + index = + (lmc_modereg_params2.u64 >> ((rankx ^ 0x03) * 10 + 0)) & + 0x07; + rtt_park_01 = imp_values->rtt_nom_ohms[index]; + // NOM comes from this rank if 1-rank, otherwise other rank + index = + (lmc_modereg_params1.u64 >> + ((rankx ^ (rank_count - 1)) * 12 + 9)) & 0x07; + rtt_nom = imp_values->rtt_nom_ohms[index]; + computed_final_vref_value = + compute_vref_2slot_2rank(rtt_wr, rtt_park_00, rtt_park_01, + dqx_ctl, rtt_nom, dram_connection); + } + + if (enable_adjust) { + union cvmx_lmcx_config lmc_config; + union cvmx_lmcx_control lmc_control; + + lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num)); + lmc_control.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num)); + + /* + * New computed vref = existing computed vref – X + * + * The value of X is depending on different conditions. + * Both #122 and #139 are 2Rx4 RDIMM, while #124 is stacked + * die 2Rx4, so I conclude the results into two conditions: + * + * 1. Stacked Die: 2Rx4 + * 1-slot: offset = 7. i, e New computed vref = existing + * computed vref – 7 + * 2-slot: offset = 6 + * + * 2. Regular: 2Rx4 + * 1-slot: offset = 3 + * 2-slot: offset = 2 + */ + // we know we never get called unless DDR4, so test just + // the other conditions + if (lmc_control.s.rdimm_ena == 1 && + rank_count == 2 && lmc_config.s.mode_x4dev) { + // it must first be RDIMM and 2-rank and x4 + int adj; + + // now do according to stacked die or not... + if (is_stacked_die) + adj = (dimm_count == 1) ? -7 : -6; + else + adj = (dimm_count == 1) ? -3 : -2; + + // we must have adjusted it, so print it out if + // verbosity is right + debug("N0.LMC%d.R%d: adjusting computed vref from %2d (0x%02x) to %2d (0x%02x)\n", + if_num, rankx, computed_final_vref_value, + computed_final_vref_value, + computed_final_vref_value + adj, + computed_final_vref_value + adj); + computed_final_vref_value += adj; + } + } + + return computed_final_vref_value; +} + +static void unpack_rlevel_settings(int if_bytemask, int ecc_ena, + struct rlevel_byte_data *rlevel_byte, + union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank) +{ + if ((if_bytemask & 0xff) == 0xff) { + if (ecc_ena) { + rlevel_byte[8].delay = lmc_rlevel_rank.s.byte7; + rlevel_byte[7].delay = lmc_rlevel_rank.s.byte6; + rlevel_byte[6].delay = lmc_rlevel_rank.s.byte5; + rlevel_byte[5].delay = lmc_rlevel_rank.s.byte4; + /* ECC */ + rlevel_byte[4].delay = lmc_rlevel_rank.s.byte8; + } else { + rlevel_byte[7].delay = lmc_rlevel_rank.s.byte7; + rlevel_byte[6].delay = lmc_rlevel_rank.s.byte6; + rlevel_byte[5].delay = lmc_rlevel_rank.s.byte5; + rlevel_byte[4].delay = lmc_rlevel_rank.s.byte4; + } + } else { + rlevel_byte[8].delay = lmc_rlevel_rank.s.byte8; /* unused */ + rlevel_byte[7].delay = lmc_rlevel_rank.s.byte7; /* unused */ + rlevel_byte[6].delay = lmc_rlevel_rank.s.byte6; /* unused */ + rlevel_byte[5].delay = lmc_rlevel_rank.s.byte5; /* unused */ + rlevel_byte[4].delay = lmc_rlevel_rank.s.byte4; /* ECC */ + } + + rlevel_byte[3].delay = lmc_rlevel_rank.s.byte3; + rlevel_byte[2].delay = lmc_rlevel_rank.s.byte2; + rlevel_byte[1].delay = lmc_rlevel_rank.s.byte1; + rlevel_byte[0].delay = lmc_rlevel_rank.s.byte0; +} + +static void pack_rlevel_settings(int if_bytemask, int ecc_ena, + struct rlevel_byte_data *rlevel_byte, + union cvmx_lmcx_rlevel_rankx + *final_rlevel_rank) +{ + union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank = *final_rlevel_rank; + + if ((if_bytemask & 0xff) == 0xff) { + if (ecc_ena) { + lmc_rlevel_rank.s.byte7 = rlevel_byte[8].delay; + lmc_rlevel_rank.s.byte6 = rlevel_byte[7].delay; + lmc_rlevel_rank.s.byte5 = rlevel_byte[6].delay; + lmc_rlevel_rank.s.byte4 = rlevel_byte[5].delay; + /* ECC */ + lmc_rlevel_rank.s.byte8 = rlevel_byte[4].delay; + } else { + lmc_rlevel_rank.s.byte7 = rlevel_byte[7].delay; + lmc_rlevel_rank.s.byte6 = rlevel_byte[6].delay; + lmc_rlevel_rank.s.byte5 = rlevel_byte[5].delay; + lmc_rlevel_rank.s.byte4 = rlevel_byte[4].delay; + } + } else { + lmc_rlevel_rank.s.byte8 = rlevel_byte[8].delay; + lmc_rlevel_rank.s.byte7 = rlevel_byte[7].delay; + lmc_rlevel_rank.s.byte6 = rlevel_byte[6].delay; + lmc_rlevel_rank.s.byte5 = rlevel_byte[5].delay; + lmc_rlevel_rank.s.byte4 = rlevel_byte[4].delay; + } + + lmc_rlevel_rank.s.byte3 = rlevel_byte[3].delay; + lmc_rlevel_rank.s.byte2 = rlevel_byte[2].delay; + lmc_rlevel_rank.s.byte1 = rlevel_byte[1].delay; + lmc_rlevel_rank.s.byte0 = rlevel_byte[0].delay; + + *final_rlevel_rank = lmc_rlevel_rank; +} + +/////////////////// These are the RLEVEL settings display routines + +// flags +#define WITH_NOTHING 0 +#define WITH_SCORE 1 +#define WITH_AVERAGE 2 +#define WITH_FINAL 4 +#define WITH_COMPUTE 8 + +static void do_display_rl(int if_num, + union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank, + int rank, int flags, int score) +{ + char score_buf[16]; + char *msg_buf; + char hex_buf[20]; + + if (flags & WITH_SCORE) { + snprintf(score_buf, sizeof(score_buf), "(%d)", score); + } else { + score_buf[0] = ' '; + score_buf[1] = 0; + } + + if (flags & WITH_AVERAGE) { + msg_buf = " DELAY AVERAGES "; + } else if (flags & WITH_FINAL) { + msg_buf = " FINAL SETTINGS "; + } else if (flags & WITH_COMPUTE) { + msg_buf = " COMPUTED DELAYS "; + } else { + snprintf(hex_buf, sizeof(hex_buf), "0x%016llX", + (unsigned long long)lmc_rlevel_rank.u64); + msg_buf = hex_buf; + } + + debug("N0.LMC%d.R%d: Rlevel Rank %#4x, %s : %5d %5d %5d %5d %5d %5d %5d %5d %5d %s\n", + if_num, rank, lmc_rlevel_rank.s.status, msg_buf, + lmc_rlevel_rank.s.byte8, lmc_rlevel_rank.s.byte7, + lmc_rlevel_rank.s.byte6, lmc_rlevel_rank.s.byte5, + lmc_rlevel_rank.s.byte4, lmc_rlevel_rank.s.byte3, + lmc_rlevel_rank.s.byte2, lmc_rlevel_rank.s.byte1, + lmc_rlevel_rank.s.byte0, score_buf); +} + +static void display_rl(int if_num, + union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank, int rank) +{ + do_display_rl(if_num, lmc_rlevel_rank, rank, 0, 0); +} + +static void display_rl_with_score(int if_num, + union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank, + int rank, int score) +{ + do_display_rl(if_num, lmc_rlevel_rank, rank, 1, score); +} + +static void display_rl_with_final(int if_num, + union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank, + int rank) +{ + do_display_rl(if_num, lmc_rlevel_rank, rank, 4, 0); +} + +static void display_rl_with_computed(int if_num, + union cvmx_lmcx_rlevel_rankx + lmc_rlevel_rank, int rank, int score) +{ + do_display_rl(if_num, lmc_rlevel_rank, rank, 9, score); +} + +// flag values +#define WITH_RODT_BLANK 0 +#define WITH_RODT_SKIPPING 1 +#define WITH_RODT_BESTROW 2 +#define WITH_RODT_BESTSCORE 3 +// control +#define SKIP_SKIPPING 1 + +static const char *with_rodt_canned_msgs[4] = { + " ", "SKIPPING ", "BEST ROW ", "BEST SCORE" +}; + +static void display_rl_with_rodt(int if_num, + union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank, + int rank, int score, + int nom_ohms, int rodt_ohms, int flag) +{ + const char *msg_buf; + char set_buf[20]; + +#if SKIP_SKIPPING + if (flag == WITH_RODT_SKIPPING) + return; +#endif + + msg_buf = with_rodt_canned_msgs[flag]; + if (nom_ohms < 0) { + snprintf(set_buf, sizeof(set_buf), " RODT %3d ", + rodt_ohms); + } else { + snprintf(set_buf, sizeof(set_buf), "NOM %3d RODT %3d", nom_ohms, + rodt_ohms); + } + + debug("N0.LMC%d.R%d: Rlevel %s %s : %5d %5d %5d %5d %5d %5d %5d %5d %5d (%d)\n", + if_num, rank, set_buf, msg_buf, lmc_rlevel_rank.s.byte8, + lmc_rlevel_rank.s.byte7, lmc_rlevel_rank.s.byte6, + lmc_rlevel_rank.s.byte5, lmc_rlevel_rank.s.byte4, + lmc_rlevel_rank.s.byte3, lmc_rlevel_rank.s.byte2, + lmc_rlevel_rank.s.byte1, lmc_rlevel_rank.s.byte0, score); +} + +static void do_display_wl(int if_num, + union cvmx_lmcx_wlevel_rankx lmc_wlevel_rank, + int rank, int flags) +{ + char *msg_buf; + char hex_buf[20]; + + if (flags & WITH_FINAL) { + msg_buf = " FINAL SETTINGS "; + } else { + snprintf(hex_buf, sizeof(hex_buf), "0x%016llX", + (unsigned long long)lmc_wlevel_rank.u64); + msg_buf = hex_buf; + } + + debug("N0.LMC%d.R%d: Wlevel Rank %#4x, %s : %5d %5d %5d %5d %5d %5d %5d %5d %5d\n", + if_num, rank, lmc_wlevel_rank.s.status, msg_buf, + lmc_wlevel_rank.s.byte8, lmc_wlevel_rank.s.byte7, + lmc_wlevel_rank.s.byte6, lmc_wlevel_rank.s.byte5, + lmc_wlevel_rank.s.byte4, lmc_wlevel_rank.s.byte3, + lmc_wlevel_rank.s.byte2, lmc_wlevel_rank.s.byte1, + lmc_wlevel_rank.s.byte0); +} + +static void display_wl(int if_num, + union cvmx_lmcx_wlevel_rankx lmc_wlevel_rank, int rank) +{ + do_display_wl(if_num, lmc_wlevel_rank, rank, WITH_NOTHING); +} + +static void display_wl_with_final(int if_num, + union cvmx_lmcx_wlevel_rankx lmc_wlevel_rank, + int rank) +{ + do_display_wl(if_num, lmc_wlevel_rank, rank, WITH_FINAL); +} + +// pretty-print bitmask adjuster +static u64 ppbm(u64 bm) +{ + if (bm != 0ul) { + while ((bm & 0x0fful) == 0ul) + bm >>= 4; + } + + return bm; +} + +// xlate PACKED index to UNPACKED index to use with rlevel_byte +#define XPU(i, e) (((i) < 4) ? (i) : (((i) < 8) ? (i) + (e) : 4)) +// xlate UNPACKED index to PACKED index to use with rlevel_bitmask +#define XUP(i, e) (((i) < 4) ? (i) : (e) ? (((i) > 4) ? (i) - 1 : 8) : (i)) + +// flag values +#define WITH_WL_BITMASKS 0 +#define WITH_RL_BITMASKS 1 +#define WITH_RL_MASK_SCORES 2 +#define WITH_RL_SEQ_SCORES 3 + +static void do_display_bm(int if_num, int rank, void *bm, + int flags, int ecc) +{ + if (flags == WITH_WL_BITMASKS) { + // wlevel_bitmask array in PACKED index order, so just + // print them + int *bitmasks = (int *)bm; + + debug("N0.LMC%d.R%d: Wlevel Debug Bitmasks : %05x %05x %05x %05x %05x %05x %05x %05x %05x\n", + if_num, rank, bitmasks[8], bitmasks[7], bitmasks[6], + bitmasks[5], bitmasks[4], bitmasks[3], bitmasks[2], + bitmasks[1], bitmasks[0] + ); + } else if (flags == WITH_RL_BITMASKS) { + // rlevel_bitmask array in PACKED index order, so just + // print them + struct rlevel_bitmask *rlevel_bitmask = + (struct rlevel_bitmask *)bm; + + debug("N0.LMC%d.R%d: Rlevel Debug Bitmasks 8:0 : %05llx %05llx %05llx %05llx %05llx %05llx %05llx %05llx %05llx\n", + if_num, rank, ppbm(rlevel_bitmask[8].bm), + ppbm(rlevel_bitmask[7].bm), ppbm(rlevel_bitmask[6].bm), + ppbm(rlevel_bitmask[5].bm), ppbm(rlevel_bitmask[4].bm), + ppbm(rlevel_bitmask[3].bm), ppbm(rlevel_bitmask[2].bm), + ppbm(rlevel_bitmask[1].bm), ppbm(rlevel_bitmask[0].bm) + ); + } else if (flags == WITH_RL_MASK_SCORES) { + // rlevel_bitmask array in PACKED index order, so just + // print them + struct rlevel_bitmask *rlevel_bitmask = + (struct rlevel_bitmask *)bm; + + debug("N0.LMC%d.R%d: Rlevel Debug Bitmask Scores 8:0 : %5d %5d %5d %5d %5d %5d %5d %5d %5d\n", + if_num, rank, rlevel_bitmask[8].errs, + rlevel_bitmask[7].errs, rlevel_bitmask[6].errs, + rlevel_bitmask[5].errs, rlevel_bitmask[4].errs, + rlevel_bitmask[3].errs, rlevel_bitmask[2].errs, + rlevel_bitmask[1].errs, rlevel_bitmask[0].errs); + } else if (flags == WITH_RL_SEQ_SCORES) { + // rlevel_byte array in UNPACKED index order, so xlate + // and print them + struct rlevel_byte_data *rlevel_byte = + (struct rlevel_byte_data *)bm; + + debug("N0.LMC%d.R%d: Rlevel Debug Non-seq Scores 8:0 : %5d %5d %5d %5d %5d %5d %5d %5d %5d\n", + if_num, rank, rlevel_byte[XPU(8, ecc)].sqerrs, + rlevel_byte[XPU(7, ecc)].sqerrs, + rlevel_byte[XPU(6, ecc)].sqerrs, + rlevel_byte[XPU(5, ecc)].sqerrs, + rlevel_byte[XPU(4, ecc)].sqerrs, + rlevel_byte[XPU(3, ecc)].sqerrs, + rlevel_byte[XPU(2, ecc)].sqerrs, + rlevel_byte[XPU(1, ecc)].sqerrs, + rlevel_byte[XPU(0, ecc)].sqerrs); + } +} + +static void display_wl_bm(int if_num, int rank, int *bitmasks) +{ + do_display_bm(if_num, rank, (void *)bitmasks, WITH_WL_BITMASKS, 0); +} + +static void display_rl_bm(int if_num, int rank, + struct rlevel_bitmask *bitmasks, int ecc_ena) +{ + do_display_bm(if_num, rank, (void *)bitmasks, WITH_RL_BITMASKS, + ecc_ena); +} + +static void display_rl_bm_scores(int if_num, int rank, + struct rlevel_bitmask *bitmasks, int ecc_ena) +{ + do_display_bm(if_num, rank, (void *)bitmasks, WITH_RL_MASK_SCORES, + ecc_ena); +} + +static void display_rl_seq_scores(int if_num, int rank, + struct rlevel_byte_data *bytes, int ecc_ena) +{ + do_display_bm(if_num, rank, (void *)bytes, WITH_RL_SEQ_SCORES, ecc_ena); +} + +#define RODT_OHMS_COUNT 8 +#define RTT_NOM_OHMS_COUNT 8 +#define RTT_NOM_TABLE_COUNT 8 +#define RTT_WR_OHMS_COUNT 8 +#define DIC_OHMS_COUNT 3 +#define DRIVE_STRENGTH_COUNT 15 + +static unsigned char ddr4_rodt_ohms[RODT_OHMS_COUNT] = { + 0, 40, 60, 80, 120, 240, 34, 48 }; +static unsigned char ddr4_rtt_nom_ohms[RTT_NOM_OHMS_COUNT] = { + 0, 60, 120, 40, 240, 48, 80, 34 }; +static unsigned char ddr4_rtt_nom_table[RTT_NOM_TABLE_COUNT] = { + 0, 4, 2, 6, 1, 5, 3, 7 }; +// setting HiZ ohms to 99 for computed vref +static unsigned char ddr4_rtt_wr_ohms[RTT_WR_OHMS_COUNT] = { + 0, 120, 240, 99, 80 }; +static unsigned char ddr4_dic_ohms[DIC_OHMS_COUNT] = { 34, 48 }; +static short ddr4_drive_strength[DRIVE_STRENGTH_COUNT] = { + 0, 0, 26, 30, 34, 40, 48, 68, 0, 0, 0, 0, 0, 0, 0 }; +static short ddr4_dqx_strength[DRIVE_STRENGTH_COUNT] = { + 0, 24, 27, 30, 34, 40, 48, 60, 0, 0, 0, 0, 0, 0, 0 }; +struct impedence_values ddr4_impedence_val = { + .rodt_ohms = ddr4_rodt_ohms, + .rtt_nom_ohms = ddr4_rtt_nom_ohms, + .rtt_nom_table = ddr4_rtt_nom_table, + .rtt_wr_ohms = ddr4_rtt_wr_ohms, + .dic_ohms = ddr4_dic_ohms, + .drive_strength = ddr4_drive_strength, + .dqx_strength = ddr4_dqx_strength, +}; + +static unsigned char ddr3_rodt_ohms[RODT_OHMS_COUNT] = { + 0, 20, 30, 40, 60, 120, 0, 0 }; +static unsigned char ddr3_rtt_nom_ohms[RTT_NOM_OHMS_COUNT] = { + 0, 60, 120, 40, 20, 30, 0, 0 }; +static unsigned char ddr3_rtt_nom_table[RTT_NOM_TABLE_COUNT] = { + 0, 2, 1, 3, 5, 4, 0, 0 }; +static unsigned char ddr3_rtt_wr_ohms[RTT_WR_OHMS_COUNT] = { 0, 60, 120 }; +static unsigned char ddr3_dic_ohms[DIC_OHMS_COUNT] = { 40, 34 }; +static short ddr3_drive_strength[DRIVE_STRENGTH_COUNT] = { + 0, 24, 27, 30, 34, 40, 48, 60, 0, 0, 0, 0, 0, 0, 0 }; +static struct impedence_values ddr3_impedence_val = { + .rodt_ohms = ddr3_rodt_ohms, + .rtt_nom_ohms = ddr3_rtt_nom_ohms, + .rtt_nom_table = ddr3_rtt_nom_table, + .rtt_wr_ohms = ddr3_rtt_wr_ohms, + .dic_ohms = ddr3_dic_ohms, + .drive_strength = ddr3_drive_strength, + .dqx_strength = ddr3_drive_strength, +}; + +static u64 hertz_to_psecs(u64 hertz) +{ + /* Clock in psecs */ + return divide_nint((u64)1000 * 1000 * 1000 * 1000, hertz); +} + +#define DIVIDEND_SCALE 1000 /* Scale to avoid rounding error. */ + +static u64 psecs_to_mts(u64 psecs) +{ + return divide_nint(divide_nint((u64)(2 * 1000000 * DIVIDEND_SCALE), + psecs), DIVIDEND_SCALE); +} + +#define WITHIN(v, b, m) (((v) >= ((b) - (m))) && ((v) <= ((b) + (m)))) + +static unsigned long pretty_psecs_to_mts(u64 psecs) +{ + u64 ret = 0; // default to error + + if (WITHIN(psecs, 2500, 1)) + ret = 800; + else if (WITHIN(psecs, 1875, 1)) + ret = 1066; + else if (WITHIN(psecs, 1500, 1)) + ret = 1333; + else if (WITHIN(psecs, 1250, 1)) + ret = 1600; + else if (WITHIN(psecs, 1071, 1)) + ret = 1866; + else if (WITHIN(psecs, 937, 1)) + ret = 2133; + else if (WITHIN(psecs, 833, 1)) + ret = 2400; + else if (WITHIN(psecs, 750, 1)) + ret = 2666; + return ret; +} + +static u64 mts_to_hertz(u64 mts) +{ + return ((mts * 1000 * 1000) / 2); +} + +static int compute_rc3x(int64_t tclk_psecs) +{ + long speed; + long tclk_psecs_min, tclk_psecs_max; + long data_rate_mhz, data_rate_mhz_min, data_rate_mhz_max; + int rc3x; + +#define ENCODING_BASE 1240 + + data_rate_mhz = psecs_to_mts(tclk_psecs); + + /* + * 2400 MT/s is a special case. Using integer arithmetic it rounds + * from 833 psecs to 2401 MT/s. Force it to 2400 to pick the + * proper setting from the table. + */ + if (tclk_psecs == 833) + data_rate_mhz = 2400; + + for (speed = ENCODING_BASE; speed < 3200; speed += 20) { + int error = 0; + + /* Clock in psecs */ + tclk_psecs_min = hertz_to_psecs(mts_to_hertz(speed + 00)); + /* Clock in psecs */ + tclk_psecs_max = hertz_to_psecs(mts_to_hertz(speed + 18)); + + data_rate_mhz_min = psecs_to_mts(tclk_psecs_min); + data_rate_mhz_max = psecs_to_mts(tclk_psecs_max); + + /* Force alingment to multiple to avound rounding errors. */ + data_rate_mhz_min = ((data_rate_mhz_min + 18) / 20) * 20; + data_rate_mhz_max = ((data_rate_mhz_max + 18) / 20) * 20; + + error += (speed + 00 != data_rate_mhz_min); + error += (speed + 20 != data_rate_mhz_max); + + rc3x = (speed - ENCODING_BASE) / 20; + + if (data_rate_mhz <= (speed + 20)) + break; + } + + return rc3x; +} + +/* + * static global variables needed, so that functions (loops) can be + * restructured from the main huge function. Its not elegant, but the + * only way to break the original functions like init_octeon3_ddr3_interface() + * into separate logical smaller functions with less indentation levels. + */ +static int if_num __section(".data"); +static u32 if_mask __section(".data"); +static int ddr_hertz __section(".data"); + +static struct ddr_conf *ddr_conf __section(".data"); +static const struct dimm_odt_config *odt_1rank_config __section(".data"); +static const struct dimm_odt_config *odt_2rank_config __section(".data"); +static const struct dimm_odt_config *odt_4rank_config __section(".data"); +static struct dimm_config *dimm_config_table __section(".data"); +static const struct dimm_odt_config *odt_config __section(".data"); +static const struct ddr3_custom_config *c_cfg __section(".data"); + +static int odt_idx __section(".data"); + +static ulong tclk_psecs __section(".data"); +static ulong eclk_psecs __section(".data"); + +static int row_bits __section(".data"); +static int col_bits __section(".data"); +static int num_banks __section(".data"); +static int num_ranks __section(".data"); +static int dram_width __section(".data"); +static int dimm_count __section(".data"); +/* Accumulate and report all the errors before giving up */ +static int fatal_error __section(".data"); +/* Flag that indicates safe DDR settings should be used */ +static int safe_ddr_flag __section(".data"); +/* Octeon II Default: 64bit interface width */ +static int if_64b __section(".data"); +static int if_bytemask __section(".data"); +static u32 mem_size_mbytes __section(".data"); +static unsigned int didx __section(".data"); +static int bank_bits __section(".data"); +static int bunk_enable __section(".data"); +static int rank_mask __section(".data"); +static int column_bits_start __section(".data"); +static int row_lsb __section(".data"); +static int pbank_lsb __section(".data"); +static int use_ecc __section(".data"); +static int mtb_psec __section(".data"); +static short ftb_dividend __section(".data"); +static short ftb_divisor __section(".data"); +static int taamin __section(".data"); +static int tckmin __section(".data"); +static int cl __section(".data"); +static int min_cas_latency __section(".data"); +static int max_cas_latency __section(".data"); +static int override_cas_latency __section(".data"); +static int ddr_rtt_nom_auto __section(".data"); +static int ddr_rodt_ctl_auto __section(".data"); + +static int spd_addr __section(".data"); +static int spd_org __section(".data"); +static int spd_banks __section(".data"); +static int spd_rdimm __section(".data"); +static int spd_dimm_type __section(".data"); +static int spd_ecc __section(".data"); +static u32 spd_cas_latency __section(".data"); +static int spd_mtb_dividend __section(".data"); +static int spd_mtb_divisor __section(".data"); +static int spd_tck_min __section(".data"); +static int spd_taa_min __section(".data"); +static int spd_twr __section(".data"); +static int spd_trcd __section(".data"); +static int spd_trrd __section(".data"); +static int spd_trp __section(".data"); +static int spd_tras __section(".data"); +static int spd_trc __section(".data"); +static int spd_trfc __section(".data"); +static int spd_twtr __section(".data"); +static int spd_trtp __section(".data"); +static int spd_tfaw __section(".data"); +static int spd_addr_mirror __section(".data"); +static int spd_package __section(".data"); +static int spd_rawcard __section(".data"); +static int spd_rawcard_aorb __section(".data"); +static int spd_rdimm_registers __section(".data"); +static int spd_thermal_sensor __section(".data"); + +static int is_stacked_die __section(".data"); +static int is_3ds_dimm __section(".data"); +// 3DS: logical ranks per package rank +static int lranks_per_prank __section(".data"); +// 3DS: logical ranks bits +static int lranks_bits __section(".data"); +// in Mbits; only used for 3DS +static int die_capacity __section(".data"); + +static enum ddr_type ddr_type __section(".data"); + +static int twr __section(".data"); +static int trcd __section(".data"); +static int trrd __section(".data"); +static int trp __section(".data"); +static int tras __section(".data"); +static int trc __section(".data"); +static int trfc __section(".data"); +static int twtr __section(".data"); +static int trtp __section(".data"); +static int tfaw __section(".data"); + +static int ddr4_tckavgmin __section(".data"); +static int ddr4_tckavgmax __section(".data"); +static int ddr4_trdcmin __section(".data"); +static int ddr4_trpmin __section(".data"); +static int ddr4_trasmin __section(".data"); +static int ddr4_trcmin __section(".data"); +static int ddr4_trfc1min __section(".data"); +static int ddr4_trfc2min __section(".data"); +static int ddr4_trfc4min __section(".data"); +static int ddr4_tfawmin __section(".data"); +static int ddr4_trrd_smin __section(".data"); +static int ddr4_trrd_lmin __section(".data"); +static int ddr4_tccd_lmin __section(".data"); + +static int wl_mask_err __section(".data"); +static int wl_loops __section(".data"); +static int default_rtt_nom[4] __section(".data"); +static int dyn_rtt_nom_mask __section(".data"); +static struct impedence_values *imp_val __section(".data"); +static char default_rodt_ctl __section(".data"); +// default to disabled (ie, try LMC restart, not chip reset) +static int ddr_disable_chip_reset __section(".data"); +static const char *dimm_type_name __section(".data"); +static int match_wl_rtt_nom __section(".data"); + +struct hwl_alt_by_rank { + u16 hwl_alt_mask; // mask of bytelanes with alternate + u16 hwl_alt_delay[9]; // bytelane alternate avail if mask=1 +}; + +static struct hwl_alt_by_rank hwl_alts[4] __section(".data"); + +#define DEFAULT_INTERNAL_VREF_TRAINING_LIMIT 3 // was: 5 +static int internal_retries __section(".data"); + +static int deskew_training_errors __section(".data"); +static struct deskew_counts deskew_training_results __section(".data"); +static int disable_deskew_training __section(".data"); +static int restart_if_dsk_incomplete __section(".data"); +static int dac_eval_retries __section(".data"); +static int dac_settings[9] __section(".data"); +static int num_samples __section(".data"); +static int sample __section(".data"); +static int lane __section(".data"); +static int last_lane __section(".data"); +static int total_dac_eval_retries __section(".data"); +static int dac_eval_exhausted __section(".data"); + +#define DEFAULT_DAC_SAMPLES 7 // originally was 5 +#define DAC_RETRIES_LIMIT 2 + +struct bytelane_sample { + s16 bytes[DEFAULT_DAC_SAMPLES]; +}; + +static struct bytelane_sample lanes[9] __section(".data"); + +static char disable_sequential_delay_check __section(".data"); +static int wl_print __section(".data"); + +static int enable_by_rank_init __section(".data"); +static int saved_rank_mask __section(".data"); +static int by_rank __section(".data"); +static struct deskew_data rank_dsk[4] __section(".data"); +static struct dac_data rank_dac[4] __section(".data"); + +// todo: perhaps remove node at some time completely? +static int node __section(".data"); +static int base_cl __section(".data"); + +/* Parameters from DDR3 Specifications */ +#define DDR3_TREFI 7800000 /* 7.8 us */ +#define DDR3_ZQCS 80000ull /* 80 ns */ +#define DDR3_ZQCS_INTERNAL 1280000000ull /* 128ms/100 */ +#define DDR3_TCKE 5000 /* 5 ns */ +#define DDR3_TMRD 4 /* 4 nCK */ +#define DDR3_TDLLK 512 /* 512 nCK */ +#define DDR3_TMPRR 1 /* 1 nCK */ +#define DDR3_TWLMRD 40 /* 40 nCK */ +#define DDR3_TWLDQSEN 25 /* 25 nCK */ + +/* Parameters from DDR4 Specifications */ +#define DDR4_TMRD 8 /* 8 nCK */ +#define DDR4_TDLLK 768 /* 768 nCK */ + +static void lmc_config(struct ddr_priv *priv) +{ + union cvmx_lmcx_config cfg; + char *s; + + cfg.u64 = 0; + + cfg.cn78xx.ecc_ena = use_ecc; + cfg.cn78xx.row_lsb = encode_row_lsb_ddr3(row_lsb); + cfg.cn78xx.pbank_lsb = encode_pbank_lsb_ddr3(pbank_lsb); + + cfg.cn78xx.idlepower = 0; /* Disabled */ + + s = lookup_env(priv, "ddr_idlepower"); + if (s) + cfg.cn78xx.idlepower = simple_strtoul(s, NULL, 0); + + cfg.cn78xx.forcewrite = 0; /* Disabled */ + /* Include memory reference address in the ECC */ + cfg.cn78xx.ecc_adr = 1; + + s = lookup_env(priv, "ddr_ecc_adr"); + if (s) + cfg.cn78xx.ecc_adr = simple_strtoul(s, NULL, 0); + + cfg.cn78xx.reset = 0; + + /* + * Program LMC0_CONFIG[24:18], ref_zqcs_int(6:0) to + * RND-DN(tREFI/clkPeriod/512) Program LMC0_CONFIG[36:25], + * ref_zqcs_int(18:7) to + * RND-DN(ZQCS_Interval/clkPeriod/(512*128)). Note that this + * value should always be greater than 32, to account for + * resistor calibration delays. + */ + + cfg.cn78xx.ref_zqcs_int = ((DDR3_TREFI / tclk_psecs / 512) & 0x7f); + cfg.cn78xx.ref_zqcs_int |= + ((max(33ull, (DDR3_ZQCS_INTERNAL / (tclk_psecs / 100) / + (512 * 128))) & 0xfff) << 7); + + cfg.cn78xx.early_dqx = 1; /* Default to enabled */ + + s = lookup_env(priv, "ddr_early_dqx"); + if (!s) + s = lookup_env(priv, "ddr%d_early_dqx", if_num); + + if (s) + cfg.cn78xx.early_dqx = simple_strtoul(s, NULL, 0); + + cfg.cn78xx.sref_with_dll = 0; + + cfg.cn78xx.rank_ena = bunk_enable; + cfg.cn78xx.rankmask = rank_mask; /* Set later */ + cfg.cn78xx.mirrmask = (spd_addr_mirror << 1 | spd_addr_mirror << 3) & + rank_mask; + /* Set once and don't change it. */ + cfg.cn78xx.init_status = rank_mask; + cfg.cn78xx.early_unload_d0_r0 = 0; + cfg.cn78xx.early_unload_d0_r1 = 0; + cfg.cn78xx.early_unload_d1_r0 = 0; + cfg.cn78xx.early_unload_d1_r1 = 0; + cfg.cn78xx.scrz = 0; + if (octeon_is_cpuid(OCTEON_CN70XX)) + cfg.cn78xx.mode32b = 1; /* Read-only. Always 1. */ + cfg.cn78xx.mode_x4dev = (dram_width == 4) ? 1 : 0; + cfg.cn78xx.bg2_enable = ((ddr_type == DDR4_DRAM) && + (dram_width == 16)) ? 0 : 1; + + s = lookup_env_ull(priv, "ddr_config"); + if (s) + cfg.u64 = simple_strtoull(s, NULL, 0); + debug("LMC_CONFIG : 0x%016llx\n", + cfg.u64); + lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), cfg.u64); +} + +static void lmc_control(struct ddr_priv *priv) +{ + union cvmx_lmcx_control ctrl; + char *s; + + ctrl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num)); + ctrl.s.rdimm_ena = spd_rdimm; + ctrl.s.bwcnt = 0; /* Clear counter later */ + if (spd_rdimm) + ctrl.s.ddr2t = (safe_ddr_flag ? 1 : c_cfg->ddr2t_rdimm); + else + ctrl.s.ddr2t = (safe_ddr_flag ? 1 : c_cfg->ddr2t_udimm); + ctrl.s.pocas = 0; + ctrl.s.fprch2 = (safe_ddr_flag ? 2 : c_cfg->fprch2); + ctrl.s.throttle_rd = safe_ddr_flag ? 1 : 0; + ctrl.s.throttle_wr = safe_ddr_flag ? 1 : 0; + ctrl.s.inorder_rd = safe_ddr_flag ? 1 : 0; + ctrl.s.inorder_wr = safe_ddr_flag ? 1 : 0; + ctrl.s.elev_prio_dis = safe_ddr_flag ? 1 : 0; + /* discards writes to addresses that don't exist in the DRAM */ + ctrl.s.nxm_write_en = 0; + ctrl.s.max_write_batch = 8; + ctrl.s.xor_bank = 1; + ctrl.s.auto_dclkdis = 1; + ctrl.s.int_zqcs_dis = 0; + ctrl.s.ext_zqcs_dis = 0; + ctrl.s.bprch = 1; + ctrl.s.wodt_bprch = 1; + ctrl.s.rodt_bprch = 1; + + s = lookup_env(priv, "ddr_xor_bank"); + if (s) + ctrl.s.xor_bank = simple_strtoul(s, NULL, 0); + + s = lookup_env(priv, "ddr_2t"); + if (s) + ctrl.s.ddr2t = simple_strtoul(s, NULL, 0); + + s = lookup_env(priv, "ddr_fprch2"); + if (s) + ctrl.s.fprch2 = simple_strtoul(s, NULL, 0); + + s = lookup_env(priv, "ddr_bprch"); + if (s) + ctrl.s.bprch = simple_strtoul(s, NULL, 0); + + s = lookup_env(priv, "ddr_wodt_bprch"); + if (s) + ctrl.s.wodt_bprch = simple_strtoul(s, NULL, 0); + + s = lookup_env(priv, "ddr_rodt_bprch"); + if (s) + ctrl.s.rodt_bprch = simple_strtoul(s, NULL, 0); + + s = lookup_env(priv, "ddr_int_zqcs_dis"); + if (s) + ctrl.s.int_zqcs_dis = simple_strtoul(s, NULL, 0); + + s = lookup_env(priv, "ddr_ext_zqcs_dis"); + if (s) + ctrl.s.ext_zqcs_dis = simple_strtoul(s, NULL, 0); + + s = lookup_env_ull(priv, "ddr_control"); + if (s) + ctrl.u64 = simple_strtoull(s, NULL, 0); + + debug("LMC_CONTROL : 0x%016llx\n", + ctrl.u64); + lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctrl.u64); +} + +static void lmc_timing_params0(struct ddr_priv *priv) +{ + union cvmx_lmcx_timing_params0 tp0; + unsigned int trp_value; + char *s; + + tp0.u64 = lmc_rd(priv, CVMX_LMCX_TIMING_PARAMS0(if_num)); + + trp_value = divide_roundup(trp, tclk_psecs) - 1; + debug("TIMING_PARAMS0[TRP]: NEW 0x%x, OLD 0x%x\n", trp_value, + trp_value + + (unsigned int)(divide_roundup(max(4ull * tclk_psecs, 7500ull), + tclk_psecs)) - 4); + s = lookup_env_ull(priv, "ddr_use_old_trp"); + if (s) { + if (!!simple_strtoull(s, NULL, 0)) { + trp_value += + divide_roundup(max(4ull * tclk_psecs, 7500ull), + tclk_psecs) - 4; + debug("TIMING_PARAMS0[trp]: USING OLD 0x%x\n", + trp_value); + } + } + + tp0.cn78xx.txpr = + divide_roundup(max(5ull * tclk_psecs, trfc + 10000ull), + 16 * tclk_psecs); + tp0.cn78xx.trp = trp_value & 0x1f; + tp0.cn78xx.tcksre = + divide_roundup(max(5ull * tclk_psecs, 10000ull), tclk_psecs) - 1; + + if (ddr_type == DDR4_DRAM) { + int tzqinit = 4; // Default to 4, for all DDR4 speed bins + + s = lookup_env(priv, "ddr_tzqinit"); + if (s) + tzqinit = simple_strtoul(s, NULL, 0); + + tp0.cn78xx.tzqinit = tzqinit; + /* Always 8. */ + tp0.cn78xx.tzqcs = divide_roundup(128 * tclk_psecs, + (16 * tclk_psecs)); + tp0.cn78xx.tcke = + divide_roundup(max(3 * tclk_psecs, (ulong)DDR3_TCKE), + tclk_psecs) - 1; + tp0.cn78xx.tmrd = + divide_roundup((DDR4_TMRD * tclk_psecs), tclk_psecs) - 1; + tp0.cn78xx.tmod = 25; /* 25 is the max allowed */ + tp0.cn78xx.tdllk = divide_roundup(DDR4_TDLLK, 256); + } else { + tp0.cn78xx.tzqinit = + divide_roundup(max(512ull * tclk_psecs, 640000ull), + (256 * tclk_psecs)); + tp0.cn78xx.tzqcs = + divide_roundup(max(64ull * tclk_psecs, DDR3_ZQCS), + (16 * tclk_psecs)); + tp0.cn78xx.tcke = divide_roundup(DDR3_TCKE, tclk_psecs) - 1; + tp0.cn78xx.tmrd = + divide_roundup((DDR3_TMRD * tclk_psecs), tclk_psecs) - 1; + tp0.cn78xx.tmod = + divide_roundup(max(12ull * tclk_psecs, 15000ull), + tclk_psecs) - 1; + tp0.cn78xx.tdllk = divide_roundup(DDR3_TDLLK, 256); + } + + s = lookup_env_ull(priv, "ddr_timing_params0"); + if (s) + tp0.u64 = simple_strtoull(s, NULL, 0); + debug("TIMING_PARAMS0 : 0x%016llx\n", + tp0.u64); + lmc_wr(priv, CVMX_LMCX_TIMING_PARAMS0(if_num), tp0.u64); +} + +static void lmc_timing_params1(struct ddr_priv *priv) +{ + union cvmx_lmcx_timing_params1 tp1; + unsigned int txp, temp_trcd, trfc_dlr; + char *s; + + tp1.u64 = lmc_rd(priv, CVMX_LMCX_TIMING_PARAMS1(if_num)); + + /* .cn70xx. */ + tp1.s.tmprr = divide_roundup(DDR3_TMPRR * tclk_psecs, tclk_psecs) - 1; + + tp1.cn78xx.tras = divide_roundup(tras, tclk_psecs) - 1; + + temp_trcd = divide_roundup(trcd, tclk_psecs); + if (temp_trcd > 15) { + debug("TIMING_PARAMS1[trcd]: need extension bit for 0x%x\n", + temp_trcd); + } + if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) && temp_trcd > 15) { + /* + * Let .trcd=0 serve as a flag that the field has + * overflowed. Must use Additive Latency mode as a + * workaround. + */ + temp_trcd = 0; + } + tp1.cn78xx.trcd = (temp_trcd >> 0) & 0xf; + tp1.cn78xx.trcd_ext = (temp_trcd >> 4) & 0x1; + + tp1.cn78xx.twtr = divide_roundup(twtr, tclk_psecs) - 1; + tp1.cn78xx.trfc = divide_roundup(trfc, 8 * tclk_psecs); + + if (ddr_type == DDR4_DRAM) { + /* Workaround bug 24006. Use Trrd_l. */ + tp1.cn78xx.trrd = + divide_roundup(ddr4_trrd_lmin, tclk_psecs) - 2; + } else { + tp1.cn78xx.trrd = divide_roundup(trrd, tclk_psecs) - 2; + } + + /* + * tXP = max( 3nCK, 7.5 ns) DDR3-800 tCLK = 2500 psec + * tXP = max( 3nCK, 7.5 ns) DDR3-1066 tCLK = 1875 psec + * tXP = max( 3nCK, 6.0 ns) DDR3-1333 tCLK = 1500 psec + * tXP = max( 3nCK, 6.0 ns) DDR3-1600 tCLK = 1250 psec + * tXP = max( 3nCK, 6.0 ns) DDR3-1866 tCLK = 1071 psec + * tXP = max( 3nCK, 6.0 ns) DDR3-2133 tCLK = 937 psec + */ + txp = (tclk_psecs < 1875) ? 6000 : 7500; + txp = divide_roundup(max((unsigned int)(3 * tclk_psecs), txp), + tclk_psecs) - 1; + if (txp > 7) { + debug("TIMING_PARAMS1[txp]: need extension bit for 0x%x\n", + txp); + } + if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) && txp > 7) + txp = 7; // max it out + tp1.cn78xx.txp = (txp >> 0) & 7; + tp1.cn78xx.txp_ext = (txp >> 3) & 1; + + tp1.cn78xx.twlmrd = divide_roundup(DDR3_TWLMRD * tclk_psecs, + 4 * tclk_psecs); + tp1.cn78xx.twldqsen = divide_roundup(DDR3_TWLDQSEN * tclk_psecs, + 4 * tclk_psecs); + tp1.cn78xx.tfaw = divide_roundup(tfaw, 4 * tclk_psecs); + tp1.cn78xx.txpdll = divide_roundup(max(10ull * tclk_psecs, 24000ull), + tclk_psecs) - 1; + + if (ddr_type == DDR4_DRAM && is_3ds_dimm) { + /* + * 4 Gb: tRFC_DLR = 90 ns + * 8 Gb: tRFC_DLR = 120 ns + * 16 Gb: tRFC_DLR = 190 ns FIXME? + */ + if (die_capacity == 0x1000) // 4 Gbit + trfc_dlr = 90; + else if (die_capacity == 0x2000) // 8 Gbit + trfc_dlr = 120; + else if (die_capacity == 0x4000) // 16 Gbit + trfc_dlr = 190; + else + trfc_dlr = 0; + + if (trfc_dlr == 0) { + debug("N%d.LMC%d: ERROR: tRFC_DLR: die_capacity %u Mbit is illegal\n", + node, if_num, die_capacity); + } else { + tp1.cn78xx.trfc_dlr = + divide_roundup(trfc_dlr * 1000UL, 8 * tclk_psecs); + debug("N%d.LMC%d: TIMING_PARAMS1[trfc_dlr] set to %u\n", + node, if_num, tp1.cn78xx.trfc_dlr); + } + } + + s = lookup_env_ull(priv, "ddr_timing_params1"); + if (s) + tp1.u64 = simple_strtoull(s, NULL, 0); + + debug("TIMING_PARAMS1 : 0x%016llx\n", + tp1.u64); + lmc_wr(priv, CVMX_LMCX_TIMING_PARAMS1(if_num), tp1.u64); +} + +static void lmc_timing_params2(struct ddr_priv *priv) +{ + if (ddr_type == DDR4_DRAM) { + union cvmx_lmcx_timing_params1 tp1; + union cvmx_lmcx_timing_params2 tp2; + int temp_trrd_l; + + tp1.u64 = lmc_rd(priv, CVMX_LMCX_TIMING_PARAMS1(if_num)); + tp2.u64 = lmc_rd(priv, CVMX_LMCX_TIMING_PARAMS2(if_num)); + debug("TIMING_PARAMS2 : 0x%016llx\n", + tp2.u64); + + temp_trrd_l = divide_roundup(ddr4_trrd_lmin, tclk_psecs) - 2; + if (temp_trrd_l > 7) + debug("TIMING_PARAMS2[trrd_l]: need extension bit for 0x%x\n", + temp_trrd_l); + if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) && temp_trrd_l > 7) + temp_trrd_l = 7; // max it out + tp2.cn78xx.trrd_l = (temp_trrd_l >> 0) & 7; + tp2.cn78xx.trrd_l_ext = (temp_trrd_l >> 3) & 1; + + // correct for 1600-2400 + tp2.s.twtr_l = divide_nint(max(4ull * tclk_psecs, 7500ull), + tclk_psecs) - 1; + tp2.s.t_rw_op_max = 7; + tp2.s.trtp = divide_roundup(max(4ull * tclk_psecs, 7500ull), + tclk_psecs) - 1; + + debug("TIMING_PARAMS2 : 0x%016llx\n", + tp2.u64); + lmc_wr(priv, CVMX_LMCX_TIMING_PARAMS2(if_num), tp2.u64); + + /* + * Workaround Errata 25823 - LMC: Possible DDR4 tWTR_L not met + * for Write-to-Read operations to the same Bank Group + */ + if (tp1.cn78xx.twtr < (tp2.s.twtr_l - 4)) { + tp1.cn78xx.twtr = tp2.s.twtr_l - 4; + debug("ERRATA 25823: NEW: TWTR: %d, TWTR_L: %d\n", + tp1.cn78xx.twtr, tp2.s.twtr_l); + debug("TIMING_PARAMS1 : 0x%016llx\n", + tp1.u64); + lmc_wr(priv, CVMX_LMCX_TIMING_PARAMS1(if_num), tp1.u64); + } + } +} + +static void lmc_modereg_params0(struct ddr_priv *priv) +{ + union cvmx_lmcx_modereg_params0 mp0; + int param; + char *s; + + mp0.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num)); + + if (ddr_type == DDR4_DRAM) { + mp0.s.cwl = 0; /* 1600 (1250ps) */ + if (tclk_psecs < 1250) + mp0.s.cwl = 1; /* 1866 (1072ps) */ + if (tclk_psecs < 1072) + mp0.s.cwl = 2; /* 2133 (938ps) */ + if (tclk_psecs < 938) + mp0.s.cwl = 3; /* 2400 (833ps) */ + if (tclk_psecs < 833) + mp0.s.cwl = 4; /* 2666 (750ps) */ + if (tclk_psecs < 750) + mp0.s.cwl = 5; /* 3200 (625ps) */ + } else { + /* + ** CSR CWL CAS write Latency + ** === === ================================= + ** 0 5 ( tCK(avg) >= 2.5 ns) + ** 1 6 (2.5 ns > tCK(avg) >= 1.875 ns) + ** 2 7 (1.875 ns > tCK(avg) >= 1.5 ns) + ** 3 8 (1.5 ns > tCK(avg) >= 1.25 ns) + ** 4 9 (1.25 ns > tCK(avg) >= 1.07 ns) + ** 5 10 (1.07 ns > tCK(avg) >= 0.935 ns) + ** 6 11 (0.935 ns > tCK(avg) >= 0.833 ns) + ** 7 12 (0.833 ns > tCK(avg) >= 0.75 ns) + */ + + mp0.s.cwl = 0; + if (tclk_psecs < 2500) + mp0.s.cwl = 1; + if (tclk_psecs < 1875) + mp0.s.cwl = 2; + if (tclk_psecs < 1500) + mp0.s.cwl = 3; + if (tclk_psecs < 1250) + mp0.s.cwl = 4; + if (tclk_psecs < 1070) + mp0.s.cwl = 5; + if (tclk_psecs < 935) + mp0.s.cwl = 6; + if (tclk_psecs < 833) + mp0.s.cwl = 7; + } + + s = lookup_env(priv, "ddr_cwl"); + if (s) + mp0.s.cwl = simple_strtoul(s, NULL, 0) - 5; + + if (ddr_type == DDR4_DRAM) { + debug("%-45s : %d, [0x%x]\n", "CAS Write Latency CWL, [CSR]", + mp0.s.cwl + 9 + + ((mp0.s.cwl > 2) ? (mp0.s.cwl - 3) * 2 : 0), mp0.s.cwl); + } else { + debug("%-45s : %d, [0x%x]\n", "CAS Write Latency CWL, [CSR]", + mp0.s.cwl + 5, mp0.s.cwl); + } + + mp0.s.mprloc = 0; + mp0.s.mpr = 0; + mp0.s.dll = (ddr_type == DDR4_DRAM); /* 0 for DDR3 and 1 for DDR4 */ + mp0.s.al = 0; + mp0.s.wlev = 0; /* Read Only */ + if (octeon_is_cpuid(OCTEON_CN70XX) || ddr_type == DDR4_DRAM) + mp0.s.tdqs = 0; + else + mp0.s.tdqs = 1; + mp0.s.qoff = 0; + + s = lookup_env(priv, "ddr_cl"); + if (s) { + cl = simple_strtoul(s, NULL, 0); + debug("CAS Latency : %6d\n", + cl); + } + + if (ddr_type == DDR4_DRAM) { + mp0.s.cl = 0x0; + if (cl > 9) + mp0.s.cl = 0x1; + if (cl > 10) + mp0.s.cl = 0x2; + if (cl > 11) + mp0.s.cl = 0x3; + if (cl > 12) + mp0.s.cl = 0x4; + if (cl > 13) + mp0.s.cl = 0x5; + if (cl > 14) + mp0.s.cl = 0x6; + if (cl > 15) + mp0.s.cl = 0x7; + if (cl > 16) + mp0.s.cl = 0x8; + if (cl > 18) + mp0.s.cl = 0x9; + if (cl > 20) + mp0.s.cl = 0xA; + if (cl > 24) + mp0.s.cl = 0xB; + } else { + mp0.s.cl = 0x2; + if (cl > 5) + mp0.s.cl = 0x4; + if (cl > 6) + mp0.s.cl = 0x6; + if (cl > 7) + mp0.s.cl = 0x8; + if (cl > 8) + mp0.s.cl = 0xA; + if (cl > 9) + mp0.s.cl = 0xC; + if (cl > 10) + mp0.s.cl = 0xE; + if (cl > 11) + mp0.s.cl = 0x1; + if (cl > 12) + mp0.s.cl = 0x3; + if (cl > 13) + mp0.s.cl = 0x5; + if (cl > 14) + mp0.s.cl = 0x7; + if (cl > 15) + mp0.s.cl = 0x9; + } + + mp0.s.rbt = 0; /* Read Only. */ + mp0.s.tm = 0; + mp0.s.dllr = 0; + + param = divide_roundup(twr, tclk_psecs); + + if (ddr_type == DDR4_DRAM) { /* DDR4 */ + mp0.s.wrp = 1; + if (param > 12) + mp0.s.wrp = 2; + if (param > 14) + mp0.s.wrp = 3; + if (param > 16) + mp0.s.wrp = 4; + if (param > 18) + mp0.s.wrp = 5; + if (param > 20) + mp0.s.wrp = 6; + if (param > 24) /* RESERVED in DDR4 spec */ + mp0.s.wrp = 7; + } else { /* DDR3 */ + mp0.s.wrp = 1; + if (param > 5) + mp0.s.wrp = 2; + if (param > 6) + mp0.s.wrp = 3; + if (param > 7) + mp0.s.wrp = 4; + if (param > 8) + mp0.s.wrp = 5; + if (param > 10) + mp0.s.wrp = 6; + if (param > 12) + mp0.s.wrp = 7; + } + + mp0.s.ppd = 0; + + s = lookup_env(priv, "ddr_wrp"); + if (s) + mp0.s.wrp = simple_strtoul(s, NULL, 0); + + debug("%-45s : %d, [0x%x]\n", + "Write recovery for auto precharge WRP, [CSR]", param, mp0.s.wrp); + + s = lookup_env_ull(priv, "ddr_modereg_params0"); + if (s) + mp0.u64 = simple_strtoull(s, NULL, 0); + + debug("MODEREG_PARAMS0 : 0x%016llx\n", + mp0.u64); + lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num), mp0.u64); +} + +static void lmc_modereg_params1(struct ddr_priv *priv) +{ + union cvmx_lmcx_modereg_params1 mp1; + char *s; + int i; + + mp1.u64 = odt_config[odt_idx].modereg_params1.u64; + + /* + * Special request: mismatched DIMM support. Slot 0: 2-Rank, + * Slot 1: 1-Rank + */ + if (rank_mask == 0x7) { /* 2-Rank, 1-Rank */ + mp1.s.rtt_nom_00 = 0; + mp1.s.rtt_nom_01 = 3; /* rttnom_40ohm */ + mp1.s.rtt_nom_10 = 3; /* rttnom_40ohm */ + mp1.s.rtt_nom_11 = 0; + dyn_rtt_nom_mask = 0x6; + } + + s = lookup_env(priv, "ddr_rtt_nom_mask"); + if (s) + dyn_rtt_nom_mask = simple_strtoul(s, NULL, 0); + + /* + * Save the original rtt_nom settings before sweeping through + * settings. + */ + default_rtt_nom[0] = mp1.s.rtt_nom_00; + default_rtt_nom[1] = mp1.s.rtt_nom_01; + default_rtt_nom[2] = mp1.s.rtt_nom_10; + default_rtt_nom[3] = mp1.s.rtt_nom_11; + + ddr_rtt_nom_auto = c_cfg->ddr_rtt_nom_auto; + + for (i = 0; i < 4; ++i) { + u64 value; + + s = lookup_env(priv, "ddr_rtt_nom_%1d%1d", !!(i & 2), + !!(i & 1)); + if (!s) + s = lookup_env(priv, "ddr%d_rtt_nom_%1d%1d", if_num, + !!(i & 2), !!(i & 1)); + if (s) { + value = simple_strtoul(s, NULL, 0); + mp1.u64 &= ~((u64)0x7 << (i * 12 + 9)); + mp1.u64 |= ((value & 0x7) << (i * 12 + 9)); + default_rtt_nom[i] = value; + ddr_rtt_nom_auto = 0; + } + } + + s = lookup_env(priv, "ddr_rtt_nom"); + if (!s) + s = lookup_env(priv, "ddr%d_rtt_nom", if_num); + if (s) { + u64 value; + + value = simple_strtoul(s, NULL, 0); + + if (dyn_rtt_nom_mask & 1) { + default_rtt_nom[0] = value; + mp1.s.rtt_nom_00 = value; + } + if (dyn_rtt_nom_mask & 2) { + default_rtt_nom[1] = value; + mp1.s.rtt_nom_01 = value; + } + if (dyn_rtt_nom_mask & 4) { + default_rtt_nom[2] = value; + mp1.s.rtt_nom_10 = value; + } + if (dyn_rtt_nom_mask & 8) { + default_rtt_nom[3] = value; + mp1.s.rtt_nom_11 = value; + } + + ddr_rtt_nom_auto = 0; + } + + for (i = 0; i < 4; ++i) { + u64 value; + + s = lookup_env(priv, "ddr_rtt_wr_%1d%1d", !!(i & 2), !!(i & 1)); + if (!s) + s = lookup_env(priv, "ddr%d_rtt_wr_%1d%1d", if_num, + !!(i & 2), !!(i & 1)); + if (s) { + value = simple_strtoul(s, NULL, 0); + insrt_wr(&mp1.u64, i, value); + } + } + + // Make sure 78XX pass 1 has valid RTT_WR settings, because + // configuration files may be set-up for later chips, and + // 78XX pass 1 supports no RTT_WR extension bits + if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) { + for (i = 0; i < 4; ++i) { + // if 80 or undefined + if (extr_wr(mp1.u64, i) > 3) { + // FIXME? always insert 120 + insrt_wr(&mp1.u64, i, 1); + debug("RTT_WR_%d%d set to 120 for CN78XX pass 1\n", + !!(i & 2), i & 1); + } + } + } + + s = lookup_env(priv, "ddr_dic"); + if (s) { + u64 value = simple_strtoul(s, NULL, 0); + + for (i = 0; i < 4; ++i) { + mp1.u64 &= ~((u64)0x3 << (i * 12 + 7)); + mp1.u64 |= ((value & 0x3) << (i * 12 + 7)); + } + } + + for (i = 0; i < 4; ++i) { + u64 value; + + s = lookup_env(priv, "ddr_dic_%1d%1d", !!(i & 2), !!(i & 1)); + if (s) { + value = simple_strtoul(s, NULL, 0); + mp1.u64 &= ~((u64)0x3 << (i * 12 + 7)); + mp1.u64 |= ((value & 0x3) << (i * 12 + 7)); + } + } + + s = lookup_env_ull(priv, "ddr_modereg_params1"); + if (s) + mp1.u64 = simple_strtoull(s, NULL, 0); + + debug("RTT_NOM %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n", + imp_val->rtt_nom_ohms[mp1.s.rtt_nom_11], + imp_val->rtt_nom_ohms[mp1.s.rtt_nom_10], + imp_val->rtt_nom_ohms[mp1.s.rtt_nom_01], + imp_val->rtt_nom_ohms[mp1.s.rtt_nom_00], + mp1.s.rtt_nom_11, + mp1.s.rtt_nom_10, mp1.s.rtt_nom_01, mp1.s.rtt_nom_00); + + debug("RTT_WR %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n", + imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 3)], + imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 2)], + imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 1)], + imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 0)], + extr_wr(mp1.u64, 3), + extr_wr(mp1.u64, 2), extr_wr(mp1.u64, 1), extr_wr(mp1.u64, 0)); + + debug("DIC %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n", + imp_val->dic_ohms[mp1.s.dic_11], + imp_val->dic_ohms[mp1.s.dic_10], + imp_val->dic_ohms[mp1.s.dic_01], + imp_val->dic_ohms[mp1.s.dic_00], + mp1.s.dic_11, mp1.s.dic_10, mp1.s.dic_01, mp1.s.dic_00); + + debug("MODEREG_PARAMS1 : 0x%016llx\n", + mp1.u64); + lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS1(if_num), mp1.u64); +} + +static void lmc_modereg_params2(struct ddr_priv *priv) +{ + char *s; + int i; + + if (ddr_type == DDR4_DRAM) { + union cvmx_lmcx_modereg_params2 mp2; + + mp2.u64 = odt_config[odt_idx].modereg_params2.u64; + + s = lookup_env(priv, "ddr_rtt_park"); + if (s) { + u64 value = simple_strtoul(s, NULL, 0); + + for (i = 0; i < 4; ++i) { + mp2.u64 &= ~((u64)0x7 << (i * 10 + 0)); + mp2.u64 |= ((value & 0x7) << (i * 10 + 0)); + } + } + + for (i = 0; i < 4; ++i) { + u64 value; + + s = lookup_env(priv, "ddr_rtt_park_%1d%1d", !!(i & 2), + !!(i & 1)); + if (s) { + value = simple_strtoul(s, NULL, 0); + mp2.u64 &= ~((u64)0x7 << (i * 10 + 0)); + mp2.u64 |= ((value & 0x7) << (i * 10 + 0)); + } + } + + s = lookup_env_ull(priv, "ddr_modereg_params2"); + if (s) + mp2.u64 = simple_strtoull(s, NULL, 0); + + debug("RTT_PARK %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n", + imp_val->rtt_nom_ohms[mp2.s.rtt_park_11], + imp_val->rtt_nom_ohms[mp2.s.rtt_park_10], + imp_val->rtt_nom_ohms[mp2.s.rtt_park_01], + imp_val->rtt_nom_ohms[mp2.s.rtt_park_00], + mp2.s.rtt_park_11, mp2.s.rtt_park_10, mp2.s.rtt_park_01, + mp2.s.rtt_park_00); + + debug("%-45s : 0x%x,0x%x,0x%x,0x%x\n", "VREF_RANGE", + mp2.s.vref_range_11, + mp2.s.vref_range_10, + mp2.s.vref_range_01, mp2.s.vref_range_00); + + debug("%-45s : 0x%x,0x%x,0x%x,0x%x\n", "VREF_VALUE", + mp2.s.vref_value_11, + mp2.s.vref_value_10, + mp2.s.vref_value_01, mp2.s.vref_value_00); + + debug("MODEREG_PARAMS2 : 0x%016llx\n", + mp2.u64); + lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS2(if_num), mp2.u64); + } +} + +static void lmc_modereg_params3(struct ddr_priv *priv) +{ + char *s; + + if (ddr_type == DDR4_DRAM) { + union cvmx_lmcx_modereg_params3 mp3; + + mp3.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS3(if_num)); + /* Disable as workaround to Errata 20547 */ + mp3.s.rd_dbi = 0; + mp3.s.tccd_l = max(divide_roundup(ddr4_tccd_lmin, tclk_psecs), + 5ull) - 4; + + s = lookup_env(priv, "ddr_rd_preamble"); + if (s) + mp3.s.rd_preamble = !!simple_strtoul(s, NULL, 0); + + if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) { + int delay = 0; + + if (lranks_per_prank == 4 && ddr_hertz >= 1000000000) + delay = 1; + + mp3.s.xrank_add_tccd_l = delay; + mp3.s.xrank_add_tccd_s = delay; + } + + lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS3(if_num), mp3.u64); + debug("MODEREG_PARAMS3 : 0x%016llx\n", + mp3.u64); + } +} + +static void lmc_nxm(struct ddr_priv *priv) +{ + union cvmx_lmcx_nxm lmc_nxm; + int num_bits = row_lsb + row_bits + lranks_bits - 26; + char *s; + + lmc_nxm.u64 = lmc_rd(priv, CVMX_LMCX_NXM(if_num)); + + /* .cn78xx. */ + if (rank_mask & 0x1) + lmc_nxm.cn78xx.mem_msb_d0_r0 = num_bits; + if (rank_mask & 0x2) + lmc_nxm.cn78xx.mem_msb_d0_r1 = num_bits; + if (rank_mask & 0x4) + lmc_nxm.cn78xx.mem_msb_d1_r0 = num_bits; + if (rank_mask & 0x8) + lmc_nxm.cn78xx.mem_msb_d1_r1 = num_bits; + + /* Set the mask for non-existent ranks. */ + lmc_nxm.cn78xx.cs_mask = ~rank_mask & 0xff; + + s = lookup_env_ull(priv, "ddr_nxm"); + if (s) + lmc_nxm.u64 = simple_strtoull(s, NULL, 0); + + debug("LMC_NXM : 0x%016llx\n", + lmc_nxm.u64); + lmc_wr(priv, CVMX_LMCX_NXM(if_num), lmc_nxm.u64); +} + +static void lmc_wodt_mask(struct ddr_priv *priv) +{ + union cvmx_lmcx_wodt_mask wodt_mask; + char *s; + + wodt_mask.u64 = odt_config[odt_idx].odt_mask; + + s = lookup_env_ull(priv, "ddr_wodt_mask"); + if (s) + wodt_mask.u64 = simple_strtoull(s, NULL, 0); + + debug("WODT_MASK : 0x%016llx\n", + wodt_mask.u64); + lmc_wr(priv, CVMX_LMCX_WODT_MASK(if_num), wodt_mask.u64); +} + +static void lmc_rodt_mask(struct ddr_priv *priv) +{ + union cvmx_lmcx_rodt_mask rodt_mask; + int rankx; + char *s; + + rodt_mask.u64 = odt_config[odt_idx].rodt_ctl; + + s = lookup_env_ull(priv, "ddr_rodt_mask"); + if (s) + rodt_mask.u64 = simple_strtoull(s, NULL, 0); + + debug("%-45s : 0x%016llx\n", "RODT_MASK", rodt_mask.u64); + lmc_wr(priv, CVMX_LMCX_RODT_MASK(if_num), rodt_mask.u64); + + dyn_rtt_nom_mask = 0; + for (rankx = 0; rankx < dimm_count * 4; rankx++) { + if (!(rank_mask & (1 << rankx))) + continue; + dyn_rtt_nom_mask |= ((rodt_mask.u64 >> (8 * rankx)) & 0xff); + } + if (num_ranks == 4) { + /* + * Normally ODT1 is wired to rank 1. For quad-ranked DIMMs + * ODT1 is wired to the third rank (rank 2). The mask, + * dyn_rtt_nom_mask, is used to indicate for which ranks + * to sweep RTT_NOM during read-leveling. Shift the bit + * from the ODT1 position over to the "ODT2" position so + * that the read-leveling analysis comes out right. + */ + int odt1_bit = dyn_rtt_nom_mask & 2; + + dyn_rtt_nom_mask &= ~2; + dyn_rtt_nom_mask |= odt1_bit << 1; + } + debug("%-45s : 0x%02x\n", "DYN_RTT_NOM_MASK", dyn_rtt_nom_mask); +} + +static void lmc_comp_ctl2(struct ddr_priv *priv) +{ + union cvmx_lmcx_comp_ctl2 cc2; + char *s; + + cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num)); + + cc2.cn78xx.dqx_ctl = odt_config[odt_idx].odt_ena; + /* Default 4=34.3 ohm */ + cc2.cn78xx.ck_ctl = (c_cfg->ck_ctl == 0) ? 4 : c_cfg->ck_ctl; + /* Default 4=34.3 ohm */ + cc2.cn78xx.cmd_ctl = (c_cfg->cmd_ctl == 0) ? 4 : c_cfg->cmd_ctl; + /* Default 4=34.3 ohm */ + cc2.cn78xx.control_ctl = (c_cfg->ctl_ctl == 0) ? 4 : c_cfg->ctl_ctl; + + ddr_rodt_ctl_auto = c_cfg->ddr_rodt_ctl_auto; + s = lookup_env(priv, "ddr_rodt_ctl_auto"); + if (s) + ddr_rodt_ctl_auto = !!simple_strtoul(s, NULL, 0); + + default_rodt_ctl = odt_config[odt_idx].qs_dic; + s = lookup_env(priv, "ddr_rodt_ctl"); + if (!s) + s = lookup_env(priv, "ddr%d_rodt_ctl", if_num); + if (s) { + default_rodt_ctl = simple_strtoul(s, NULL, 0); + ddr_rodt_ctl_auto = 0; + } + + cc2.cn70xx.rodt_ctl = default_rodt_ctl; + + // if DDR4, force CK_CTL to 26 ohms if it is currently 34 ohms, + // and DCLK speed is 1 GHz or more... + if (ddr_type == DDR4_DRAM && cc2.s.ck_ctl == ddr4_driver_34_ohm && + ddr_hertz >= 1000000000) { + // lowest for DDR4 is 26 ohms + cc2.s.ck_ctl = ddr4_driver_26_ohm; + debug("N%d.LMC%d: Forcing DDR4 COMP_CTL2[CK_CTL] to %d, %d ohms\n", + node, if_num, cc2.s.ck_ctl, + imp_val->drive_strength[cc2.s.ck_ctl]); + } + + // if DDR4, 2DPC, UDIMM, force CONTROL_CTL and CMD_CTL to 26 ohms, + // if DCLK speed is 1 GHz or more... + if (ddr_type == DDR4_DRAM && dimm_count == 2 && + (spd_dimm_type == 2 || spd_dimm_type == 6) && + ddr_hertz >= 1000000000) { + // lowest for DDR4 is 26 ohms + cc2.cn78xx.control_ctl = ddr4_driver_26_ohm; + // lowest for DDR4 is 26 ohms + cc2.cn78xx.cmd_ctl = ddr4_driver_26_ohm; + debug("N%d.LMC%d: Forcing DDR4 COMP_CTL2[CONTROL_CTL,CMD_CTL] to %d, %d ohms\n", + node, if_num, ddr4_driver_26_ohm, + imp_val->drive_strength[ddr4_driver_26_ohm]); + } + + s = lookup_env(priv, "ddr_ck_ctl"); + if (s) + cc2.cn78xx.ck_ctl = simple_strtoul(s, NULL, 0); + + s = lookup_env(priv, "ddr_cmd_ctl"); + if (s) + cc2.cn78xx.cmd_ctl = simple_strtoul(s, NULL, 0); + + s = lookup_env(priv, "ddr_control_ctl"); + if (s) + cc2.cn70xx.control_ctl = simple_strtoul(s, NULL, 0); + + s = lookup_env(priv, "ddr_dqx_ctl"); + if (s) + cc2.cn78xx.dqx_ctl = simple_strtoul(s, NULL, 0); + + debug("%-45s : %d, %d ohms\n", "DQX_CTL ", cc2.cn78xx.dqx_ctl, + imp_val->drive_strength[cc2.cn78xx.dqx_ctl]); + debug("%-45s : %d, %d ohms\n", "CK_CTL ", cc2.cn78xx.ck_ctl, + imp_val->drive_strength[cc2.cn78xx.ck_ctl]); + debug("%-45s : %d, %d ohms\n", "CMD_CTL ", cc2.cn78xx.cmd_ctl, + imp_val->drive_strength[cc2.cn78xx.cmd_ctl]); + debug("%-45s : %d, %d ohms\n", "CONTROL_CTL ", + cc2.cn78xx.control_ctl, + imp_val->drive_strength[cc2.cn78xx.control_ctl]); + debug("Read ODT_CTL : 0x%x (%d ohms)\n", + cc2.cn78xx.rodt_ctl, imp_val->rodt_ohms[cc2.cn78xx.rodt_ctl]); + + debug("%-45s : 0x%016llx\n", "COMP_CTL2", cc2.u64); + lmc_wr(priv, CVMX_LMCX_COMP_CTL2(if_num), cc2.u64); +} + +static void lmc_phy_ctl(struct ddr_priv *priv) +{ + union cvmx_lmcx_phy_ctl phy_ctl; + + phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num)); + phy_ctl.s.ts_stagger = 0; + // FIXME: are there others TBD? + phy_ctl.s.dsk_dbg_overwrt_ena = 0; + + if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) && lranks_per_prank > 1) { + // C0 is TEN, C1 is A17 + phy_ctl.s.c0_sel = 2; + phy_ctl.s.c1_sel = 2; + debug("N%d.LMC%d: 3DS: setting PHY_CTL[cx_csel] = %d\n", + node, if_num, phy_ctl.s.c1_sel); + } + + debug("PHY_CTL : 0x%016llx\n", + phy_ctl.u64); + lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64); +} + +static void lmc_ext_config(struct ddr_priv *priv) +{ + union cvmx_lmcx_ext_config ext_cfg; + char *s; + + ext_cfg.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG(if_num)); + ext_cfg.s.vrefint_seq_deskew = 0; + ext_cfg.s.read_ena_bprch = 1; + ext_cfg.s.read_ena_fprch = 1; + ext_cfg.s.drive_ena_fprch = 1; + ext_cfg.s.drive_ena_bprch = 1; + // make sure this is OFF for all current chips + ext_cfg.s.invert_data = 0; + + s = lookup_env(priv, "ddr_read_fprch"); + if (s) + ext_cfg.s.read_ena_fprch = strtoul(s, NULL, 0); + + s = lookup_env(priv, "ddr_read_bprch"); + if (s) + ext_cfg.s.read_ena_bprch = strtoul(s, NULL, 0); + + s = lookup_env(priv, "ddr_drive_fprch"); + if (s) + ext_cfg.s.drive_ena_fprch = strtoul(s, NULL, 0); + + s = lookup_env(priv, "ddr_drive_bprch"); + if (s) + ext_cfg.s.drive_ena_bprch = strtoul(s, NULL, 0); + + if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) && lranks_per_prank > 1) { + ext_cfg.s.dimm0_cid = lranks_bits; + ext_cfg.s.dimm1_cid = lranks_bits; + debug("N%d.LMC%d: 3DS: setting EXT_CONFIG[dimmx_cid] = %d\n", + node, if_num, ext_cfg.s.dimm0_cid); + } + + lmc_wr(priv, CVMX_LMCX_EXT_CONFIG(if_num), ext_cfg.u64); + debug("%-45s : 0x%016llx\n", "EXT_CONFIG", ext_cfg.u64); +} + +static void lmc_ext_config2(struct ddr_priv *priv) +{ + char *s; + + // NOTE: all chips have this register, but not necessarily the + // fields we modify... + if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) && + !octeon_is_cpuid(OCTEON_CN73XX)) { + union cvmx_lmcx_ext_config2 ext_cfg2; + int value = 1; // default to 1 + + ext_cfg2.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG2(if_num)); + + s = lookup_env(priv, "ddr_ext2_delay_unload"); + if (s) + value = !!simple_strtoul(s, NULL, 0); + + ext_cfg2.s.delay_unload_r0 = value; + ext_cfg2.s.delay_unload_r1 = value; + ext_cfg2.s.delay_unload_r2 = value; + ext_cfg2.s.delay_unload_r3 = value; + + lmc_wr(priv, CVMX_LMCX_EXT_CONFIG2(if_num), ext_cfg2.u64); + debug("%-45s : 0x%016llx\n", "EXT_CONFIG2", ext_cfg2.u64); + } +} + +static void lmc_dimm01_params_loop(struct ddr_priv *priv) +{ + union cvmx_lmcx_dimmx_params dimm_p; + int dimmx = didx; + char *s; + int rc; + int i; + + dimm_p.u64 = lmc_rd(priv, CVMX_LMCX_DIMMX_PARAMS(dimmx, if_num)); + + if (ddr_type == DDR4_DRAM) { + union cvmx_lmcx_dimmx_ddr4_params0 ddr4_p0; + union cvmx_lmcx_dimmx_ddr4_params1 ddr4_p1; + union cvmx_lmcx_ddr4_dimm_ctl ddr4_ctl; + + dimm_p.s.rc0 = 0; + dimm_p.s.rc1 = 0; + dimm_p.s.rc2 = 0; + + rc = read_spd(&dimm_config_table[didx], 0, + DDR4_SPD_RDIMM_REGISTER_DRIVE_STRENGTH_CTL); + dimm_p.s.rc3 = (rc >> 4) & 0xf; + dimm_p.s.rc4 = ((rc >> 0) & 0x3) << 2; + dimm_p.s.rc4 |= ((rc >> 2) & 0x3) << 0; + + rc = read_spd(&dimm_config_table[didx], 0, + DDR4_SPD_RDIMM_REGISTER_DRIVE_STRENGTH_CK); + dimm_p.s.rc5 = ((rc >> 0) & 0x3) << 2; + dimm_p.s.rc5 |= ((rc >> 2) & 0x3) << 0; + + dimm_p.s.rc6 = 0; + dimm_p.s.rc7 = 0; + dimm_p.s.rc8 = 0; + dimm_p.s.rc9 = 0; + + /* + * rc10 DDR4 RDIMM Operating Speed + * === =================================================== + * 0 tclk_psecs >= 1250 psec DDR4-1600 (1250 ps) + * 1 1250 psec > tclk_psecs >= 1071 psec DDR4-1866 (1071 ps) + * 2 1071 psec > tclk_psecs >= 938 psec DDR4-2133 ( 938 ps) + * 3 938 psec > tclk_psecs >= 833 psec DDR4-2400 ( 833 ps) + * 4 833 psec > tclk_psecs >= 750 psec DDR4-2666 ( 750 ps) + * 5 750 psec > tclk_psecs >= 625 psec DDR4-3200 ( 625 ps) + */ + dimm_p.s.rc10 = 0; + if (tclk_psecs < 1250) + dimm_p.s.rc10 = 1; + if (tclk_psecs < 1071) + dimm_p.s.rc10 = 2; + if (tclk_psecs < 938) + dimm_p.s.rc10 = 3; + if (tclk_psecs < 833) + dimm_p.s.rc10 = 4; + if (tclk_psecs < 750) + dimm_p.s.rc10 = 5; + + dimm_p.s.rc11 = 0; + dimm_p.s.rc12 = 0; + /* 0=LRDIMM, 1=RDIMM */ + dimm_p.s.rc13 = (spd_dimm_type == 4) ? 0 : 4; + dimm_p.s.rc13 |= (ddr_type == DDR4_DRAM) ? + (spd_addr_mirror << 3) : 0; + dimm_p.s.rc14 = 0; + dimm_p.s.rc15 = 0; /* 1 nCK latency adder */ + + ddr4_p0.u64 = 0; + + ddr4_p0.s.rc8x = 0; + ddr4_p0.s.rc7x = 0; + ddr4_p0.s.rc6x = 0; + ddr4_p0.s.rc5x = 0; + ddr4_p0.s.rc4x = 0; + + ddr4_p0.s.rc3x = compute_rc3x(tclk_psecs); + + ddr4_p0.s.rc2x = 0; + ddr4_p0.s.rc1x = 0; + + ddr4_p1.u64 = 0; + + ddr4_p1.s.rcbx = 0; + ddr4_p1.s.rcax = 0; + ddr4_p1.s.rc9x = 0; + + ddr4_ctl.u64 = 0; + ddr4_ctl.cn70xx.ddr4_dimm0_wmask = 0x004; + ddr4_ctl.cn70xx.ddr4_dimm1_wmask = + (dimm_count > 1) ? 0x004 : 0x0000; + + /* + * Handle any overrides from envvars here... + */ + s = lookup_env(priv, "ddr_ddr4_params0"); + if (s) + ddr4_p0.u64 = simple_strtoul(s, NULL, 0); + + s = lookup_env(priv, "ddr_ddr4_params1"); + if (s) + ddr4_p1.u64 = simple_strtoul(s, NULL, 0); + + s = lookup_env(priv, "ddr_ddr4_dimm_ctl"); + if (s) + ddr4_ctl.u64 = simple_strtoul(s, NULL, 0); + + for (i = 0; i < 11; ++i) { + u64 value; + + s = lookup_env(priv, "ddr_ddr4_rc%1xx", i + 1); + if (s) { + value = simple_strtoul(s, NULL, 0); + if (i < 8) { + ddr4_p0.u64 &= ~((u64)0xff << (i * 8)); + ddr4_p0.u64 |= (value << (i * 8)); + } else { + ddr4_p1.u64 &= + ~((u64)0xff << ((i - 8) * 8)); + ddr4_p1.u64 |= (value << ((i - 8) * 8)); + } + } + } + + /* + * write the final CSR values + */ + lmc_wr(priv, CVMX_LMCX_DIMMX_DDR4_PARAMS0(dimmx, if_num), + ddr4_p0.u64); + + lmc_wr(priv, CVMX_LMCX_DDR4_DIMM_CTL(if_num), ddr4_ctl.u64); + + lmc_wr(priv, CVMX_LMCX_DIMMX_DDR4_PARAMS1(dimmx, if_num), + ddr4_p1.u64); + + debug("DIMM%d Register Control Words RCBx:RC1x : %x %x %x %x %x %x %x %x %x %x %x\n", + dimmx, ddr4_p1.s.rcbx, ddr4_p1.s.rcax, + ddr4_p1.s.rc9x, ddr4_p0.s.rc8x, + ddr4_p0.s.rc7x, ddr4_p0.s.rc6x, + ddr4_p0.s.rc5x, ddr4_p0.s.rc4x, + ddr4_p0.s.rc3x, ddr4_p0.s.rc2x, ddr4_p0.s.rc1x); + + } else { + rc = read_spd(&dimm_config_table[didx], 0, 69); + dimm_p.s.rc0 = (rc >> 0) & 0xf; + dimm_p.s.rc1 = (rc >> 4) & 0xf; + + rc = read_spd(&dimm_config_table[didx], 0, 70); + dimm_p.s.rc2 = (rc >> 0) & 0xf; + dimm_p.s.rc3 = (rc >> 4) & 0xf; + + rc = read_spd(&dimm_config_table[didx], 0, 71); + dimm_p.s.rc4 = (rc >> 0) & 0xf; + dimm_p.s.rc5 = (rc >> 4) & 0xf; + + rc = read_spd(&dimm_config_table[didx], 0, 72); + dimm_p.s.rc6 = (rc >> 0) & 0xf; + dimm_p.s.rc7 = (rc >> 4) & 0xf; + + rc = read_spd(&dimm_config_table[didx], 0, 73); + dimm_p.s.rc8 = (rc >> 0) & 0xf; + dimm_p.s.rc9 = (rc >> 4) & 0xf; + + rc = read_spd(&dimm_config_table[didx], 0, 74); + dimm_p.s.rc10 = (rc >> 0) & 0xf; + dimm_p.s.rc11 = (rc >> 4) & 0xf; + + rc = read_spd(&dimm_config_table[didx], 0, 75); + dimm_p.s.rc12 = (rc >> 0) & 0xf; + dimm_p.s.rc13 = (rc >> 4) & 0xf; + + rc = read_spd(&dimm_config_table[didx], 0, 76); + dimm_p.s.rc14 = (rc >> 0) & 0xf; + dimm_p.s.rc15 = (rc >> 4) & 0xf; + + s = ddr_getenv_debug(priv, "ddr_clk_drive"); + if (s) { + if (strcmp(s, "light") == 0) + dimm_p.s.rc5 = 0x0; /* Light Drive */ + if (strcmp(s, "moderate") == 0) + dimm_p.s.rc5 = 0x5; /* Moderate Drive */ + if (strcmp(s, "strong") == 0) + dimm_p.s.rc5 = 0xA; /* Strong Drive */ + printf("Parameter found in environment. ddr_clk_drive = %s\n", + s); + } + + s = ddr_getenv_debug(priv, "ddr_cmd_drive"); + if (s) { + if (strcmp(s, "light") == 0) + dimm_p.s.rc3 = 0x0; /* Light Drive */ + if (strcmp(s, "moderate") == 0) + dimm_p.s.rc3 = 0x5; /* Moderate Drive */ + if (strcmp(s, "strong") == 0) + dimm_p.s.rc3 = 0xA; /* Strong Drive */ + printf("Parameter found in environment. ddr_cmd_drive = %s\n", + s); + } + + s = ddr_getenv_debug(priv, "ddr_ctl_drive"); + if (s) { + if (strcmp(s, "light") == 0) + dimm_p.s.rc4 = 0x0; /* Light Drive */ + if (strcmp(s, "moderate") == 0) + dimm_p.s.rc4 = 0x5; /* Moderate Drive */ + printf("Parameter found in environment. ddr_ctl_drive = %s\n", + s); + } + + /* + * rc10 DDR3 RDIMM Operating Speed + * == ===================================================== + * 0 tclk_psecs >= 2500 psec DDR3/DDR3L-800 def + * 1 2500 psec > tclk_psecs >= 1875 psec DDR3/DDR3L-1066 + * 2 1875 psec > tclk_psecs >= 1500 psec DDR3/DDR3L-1333 + * 3 1500 psec > tclk_psecs >= 1250 psec DDR3/DDR3L-1600 + * 4 1250 psec > tclk_psecs >= 1071 psec DDR3-1866 + */ + dimm_p.s.rc10 = 0; + if (tclk_psecs < 2500) + dimm_p.s.rc10 = 1; + if (tclk_psecs < 1875) + dimm_p.s.rc10 = 2; + if (tclk_psecs < 1500) + dimm_p.s.rc10 = 3; + if (tclk_psecs < 1250) + dimm_p.s.rc10 = 4; + } + + s = lookup_env(priv, "ddr_dimmx_params", i); + if (s) + dimm_p.u64 = simple_strtoul(s, NULL, 0); + + for (i = 0; i < 16; ++i) { + u64 value; + + s = lookup_env(priv, "ddr_rc%d", i); + if (s) { + value = simple_strtoul(s, NULL, 0); + dimm_p.u64 &= ~((u64)0xf << (i * 4)); + dimm_p.u64 |= (value << (i * 4)); + } + } + + lmc_wr(priv, CVMX_LMCX_DIMMX_PARAMS(dimmx, if_num), dimm_p.u64); + + debug("DIMM%d Register Control Words RC15:RC0 : %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x\n", + dimmx, dimm_p.s.rc15, dimm_p.s.rc14, dimm_p.s.rc13, + dimm_p.s.rc12, dimm_p.s.rc11, dimm_p.s.rc10, + dimm_p.s.rc9, dimm_p.s.rc8, dimm_p.s.rc7, + dimm_p.s.rc6, dimm_p.s.rc5, dimm_p.s.rc4, + dimm_p.s.rc3, dimm_p.s.rc2, dimm_p.s.rc1, dimm_p.s.rc0); + + // FIXME: recognize a DDR3 RDIMM with 4 ranks and 2 registers, + // and treat it specially + if (ddr_type == DDR3_DRAM && num_ranks == 4 && + spd_rdimm_registers == 2 && dimmx == 0) { + debug("DDR3: Copying DIMM0_PARAMS to DIMM1_PARAMS for pseudo-DIMM #1...\n"); + lmc_wr(priv, CVMX_LMCX_DIMMX_PARAMS(1, if_num), dimm_p.u64); + } +} + +static void lmc_dimm01_params(struct ddr_priv *priv) +{ + union cvmx_lmcx_dimm_ctl dimm_ctl; + char *s; + + if (spd_rdimm) { + for (didx = 0; didx < (unsigned int)dimm_count; ++didx) + lmc_dimm01_params_loop(priv); + + if (ddr_type == DDR4_DRAM) { + /* LMC0_DIMM_CTL */ + dimm_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DIMM_CTL(if_num)); + dimm_ctl.s.dimm0_wmask = 0xdf3f; + dimm_ctl.s.dimm1_wmask = + (dimm_count > 1) ? 0xdf3f : 0x0000; + dimm_ctl.s.tcws = 0x4e0; + dimm_ctl.s.parity = c_cfg->parity; + + s = lookup_env(priv, "ddr_dimm0_wmask"); + if (s) { + dimm_ctl.s.dimm0_wmask = + simple_strtoul(s, NULL, 0); + } + + s = lookup_env(priv, "ddr_dimm1_wmask"); + if (s) { + dimm_ctl.s.dimm1_wmask = + simple_strtoul(s, NULL, 0); + } + + s = lookup_env(priv, "ddr_dimm_ctl_parity"); + if (s) + dimm_ctl.s.parity = simple_strtoul(s, NULL, 0); + + s = lookup_env(priv, "ddr_dimm_ctl_tcws"); + if (s) + dimm_ctl.s.tcws = simple_strtoul(s, NULL, 0); + + debug("LMC DIMM_CTL : 0x%016llx\n", + dimm_ctl.u64); + lmc_wr(priv, CVMX_LMCX_DIMM_CTL(if_num), dimm_ctl.u64); + + /* Init RCW */ + oct3_ddr3_seq(priv, rank_mask, if_num, 0x7); + + /* Write RC0D last */ + dimm_ctl.s.dimm0_wmask = 0x2000; + dimm_ctl.s.dimm1_wmask = (dimm_count > 1) ? + 0x2000 : 0x0000; + debug("LMC DIMM_CTL : 0x%016llx\n", + dimm_ctl.u64); + lmc_wr(priv, CVMX_LMCX_DIMM_CTL(if_num), dimm_ctl.u64); + + /* + * Don't write any extended registers the second time + */ + lmc_wr(priv, CVMX_LMCX_DDR4_DIMM_CTL(if_num), 0); + + /* Init RCW */ + oct3_ddr3_seq(priv, rank_mask, if_num, 0x7); + } else { + /* LMC0_DIMM_CTL */ + dimm_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DIMM_CTL(if_num)); + dimm_ctl.s.dimm0_wmask = 0xffff; + // FIXME: recognize a DDR3 RDIMM with 4 ranks and 2 + // registers, and treat it specially + if (num_ranks == 4 && spd_rdimm_registers == 2) { + debug("DDR3: Activating DIMM_CTL[dimm1_mask] bits...\n"); + dimm_ctl.s.dimm1_wmask = 0xffff; + } else { + dimm_ctl.s.dimm1_wmask = + (dimm_count > 1) ? 0xffff : 0x0000; + } + dimm_ctl.s.tcws = 0x4e0; + dimm_ctl.s.parity = c_cfg->parity; + + s = lookup_env(priv, "ddr_dimm0_wmask"); + if (s) { + dimm_ctl.s.dimm0_wmask = + simple_strtoul(s, NULL, 0); + } + + s = lookup_env(priv, "ddr_dimm1_wmask"); + if (s) { + dimm_ctl.s.dimm1_wmask = + simple_strtoul(s, NULL, 0); + } + + s = lookup_env(priv, "ddr_dimm_ctl_parity"); + if (s) + dimm_ctl.s.parity = simple_strtoul(s, NULL, 0); + + s = lookup_env(priv, "ddr_dimm_ctl_tcws"); + if (s) + dimm_ctl.s.tcws = simple_strtoul(s, NULL, 0); + + debug("LMC DIMM_CTL : 0x%016llx\n", + dimm_ctl.u64); + lmc_wr(priv, CVMX_LMCX_DIMM_CTL(if_num), dimm_ctl.u64); + + /* Init RCW */ + oct3_ddr3_seq(priv, rank_mask, if_num, 0x7); + } + + } else { + /* Disable register control writes for unbuffered */ + union cvmx_lmcx_dimm_ctl dimm_ctl; + + dimm_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DIMM_CTL(if_num)); + dimm_ctl.s.dimm0_wmask = 0; + dimm_ctl.s.dimm1_wmask = 0; + lmc_wr(priv, CVMX_LMCX_DIMM_CTL(if_num), dimm_ctl.u64); + } +} + +static int lmc_rank_init(struct ddr_priv *priv) +{ + char *s; + + if (enable_by_rank_init) { + by_rank = 3; + saved_rank_mask = rank_mask; + } + +start_by_rank_init: + + if (enable_by_rank_init) { + rank_mask = (1 << by_rank); + if (!(rank_mask & saved_rank_mask)) + goto end_by_rank_init; + if (by_rank == 0) + rank_mask = saved_rank_mask; + + debug("\n>>>>> BY_RANK: starting rank %d with mask 0x%02x\n\n", + by_rank, rank_mask); + } + + /* + * Comments (steps 3 through 5) continue in oct3_ddr3_seq() + */ + union cvmx_lmcx_modereg_params0 mp0; + + if (ddr_memory_preserved(priv)) { + /* + * Contents are being preserved. Take DRAM out of self-refresh + * first. Then init steps can procede normally + */ + /* self-refresh exit */ + oct3_ddr3_seq(priv, rank_mask, if_num, 3); + } + + mp0.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num)); + mp0.s.dllr = 1; /* Set during first init sequence */ + lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num), mp0.u64); + + ddr_init_seq(priv, rank_mask, if_num); + + mp0.s.dllr = 0; /* Clear for normal operation */ + lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num), mp0.u64); + + if (spd_rdimm && ddr_type == DDR4_DRAM && + octeon_is_cpuid(OCTEON_CN7XXX)) { + debug("Running init sequence 1\n"); + change_rdimm_mpr_pattern(priv, rank_mask, if_num, dimm_count); + } + + memset(lanes, 0, sizeof(lanes)); + for (lane = 0; lane < last_lane; lane++) { + // init all lanes to reset value + dac_settings[lane] = 127; + } + + // FIXME: disable internal VREF if deskew is disabled? + if (disable_deskew_training) { + debug("N%d.LMC%d: internal VREF Training disabled, leaving them in RESET.\n", + node, if_num); + num_samples = 0; + } else if (ddr_type == DDR4_DRAM && + !octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) { + num_samples = DEFAULT_DAC_SAMPLES; + } else { + // if DDR3 or no ability to write DAC values + num_samples = 1; + } + +perform_internal_vref_training: + + total_dac_eval_retries = 0; + dac_eval_exhausted = 0; + + for (sample = 0; sample < num_samples; sample++) { + dac_eval_retries = 0; + + // make offset and internal vref training repeatable + do { + /* + * 6.9.8 LMC Offset Training + * LMC requires input-receiver offset training. + */ + perform_offset_training(priv, rank_mask, if_num); + + /* + * 6.9.9 LMC Internal vref Training + * LMC requires input-reference-voltage training. + */ + perform_internal_vref_training(priv, rank_mask, if_num); + + // read and maybe display the DAC values for a sample + read_dac_dbi_settings(priv, if_num, /*DAC*/ 1, + dac_settings); + if (num_samples == 1 || ddr_verbose(priv)) { + display_dac_dbi_settings(if_num, /*DAC*/ 1, + use_ecc, dac_settings, + "Internal VREF"); + } + + // for DDR4, evaluate the DAC settings and retry + // if any issues + if (ddr_type == DDR4_DRAM) { + if (evaluate_dac_settings + (if_64b, use_ecc, dac_settings)) { + dac_eval_retries += 1; + if (dac_eval_retries > + DAC_RETRIES_LIMIT) { + debug("N%d.LMC%d: DDR4 internal VREF DAC settings: retries exhausted; continuing...\n", + node, if_num); + dac_eval_exhausted += 1; + } else { + debug("N%d.LMC%d: DDR4 internal VREF DAC settings inconsistent; retrying....\n", + node, if_num); + total_dac_eval_retries += 1; + // try another sample + continue; + } + } + + // taking multiple samples, otherwise do nothing + if (num_samples > 1) { + // good sample or exhausted retries, + // record it + for (lane = 0; lane < last_lane; + lane++) { + lanes[lane].bytes[sample] = + dac_settings[lane]; + } + } + } + // done if DDR3, or good sample, or exhausted retries + break; + } while (1); + } + + if (ddr_type == DDR4_DRAM && dac_eval_exhausted > 0) { + debug("N%d.LMC%d: DDR internal VREF DAC settings: total retries %d, exhausted %d\n", + node, if_num, total_dac_eval_retries, dac_eval_exhausted); + } + + if (num_samples > 1) { + debug("N%d.LMC%d: DDR4 internal VREF DAC settings: processing multiple samples...\n", + node, if_num); + + for (lane = 0; lane < last_lane; lane++) { + dac_settings[lane] = + process_samples_average(&lanes[lane].bytes[0], + num_samples, if_num, lane); + } + display_dac_dbi_settings(if_num, /*DAC*/ 1, use_ecc, + dac_settings, "Averaged VREF"); + + // finally, write the final DAC values + for (lane = 0; lane < last_lane; lane++) { + load_dac_override(priv, if_num, dac_settings[lane], + lane); + } + } + + // allow override of any byte-lane internal VREF + int overrode_vref_dac = 0; + + for (lane = 0; lane < last_lane; lane++) { + s = lookup_env(priv, "ddr%d_vref_dac_byte%d", if_num, lane); + if (s) { + dac_settings[lane] = simple_strtoul(s, NULL, 0); + overrode_vref_dac = 1; + // finally, write the new DAC value + load_dac_override(priv, if_num, dac_settings[lane], + lane); + } + } + if (overrode_vref_dac) { + display_dac_dbi_settings(if_num, /*DAC*/ 1, use_ecc, + dac_settings, "Override VREF"); + } + + // as a second step, after internal VREF training, before starting + // deskew training: + // for DDR3 and OCTEON3 not O78 pass 1.x, override the DAC setting + // to 127 + if (ddr_type == DDR3_DRAM && !octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) && + !disable_deskew_training) { + load_dac_override(priv, if_num, 127, /* all */ 0x0A); + debug("N%d.LMC%d: Overriding DDR3 internal VREF DAC settings to 127.\n", + node, if_num); + } + + /* + * 4.8.8 LMC Deskew Training + * + * LMC requires input-read-data deskew training. + */ + if (!disable_deskew_training) { + deskew_training_errors = + perform_deskew_training(priv, rank_mask, if_num, + spd_rawcard_aorb); + + // All the Deskew lock and saturation retries (may) have + // been done, but we ended up with nibble errors; so, + // as a last ditch effort, try the Internal vref + // Training again... + if (deskew_training_errors) { + if (internal_retries < + DEFAULT_INTERNAL_VREF_TRAINING_LIMIT) { + internal_retries++; + debug("N%d.LMC%d: Deskew training results still unsettled - retrying internal vref training (%d)\n", + node, if_num, internal_retries); + goto perform_internal_vref_training; + } else { + if (restart_if_dsk_incomplete) { + debug("N%d.LMC%d: INFO: Deskew training incomplete - %d retries exhausted, Restarting LMC init...\n", + node, if_num, internal_retries); + return -EAGAIN; + } + debug("N%d.LMC%d: Deskew training incomplete - %d retries exhausted, but continuing...\n", + node, if_num, internal_retries); + } + } /* if (deskew_training_errors) */ + + // FIXME: treat this as the final DSK print from now on, + // and print if VBL_NORM or above also, save the results + // of the original training in case we want them later + validate_deskew_training(priv, rank_mask, if_num, + &deskew_training_results, 1); + } else { /* if (! disable_deskew_training) */ + debug("N%d.LMC%d: Deskew Training disabled, printing settings before HWL.\n", + node, if_num); + validate_deskew_training(priv, rank_mask, if_num, + &deskew_training_results, 1); + } /* if (! disable_deskew_training) */ + + if (enable_by_rank_init) { + read_dac_dbi_settings(priv, if_num, /*dac */ 1, + &rank_dac[by_rank].bytes[0]); + get_deskew_settings(priv, if_num, &rank_dsk[by_rank]); + debug("\n>>>>> BY_RANK: ending rank %d\n\n", by_rank); + } + +end_by_rank_init: + + if (enable_by_rank_init) { + //debug("\n>>>>> BY_RANK: ending rank %d\n\n", by_rank); + + by_rank--; + if (by_rank >= 0) + goto start_by_rank_init; + + rank_mask = saved_rank_mask; + ddr_init_seq(priv, rank_mask, if_num); + + process_by_rank_dac(priv, if_num, rank_mask, rank_dac); + process_by_rank_dsk(priv, if_num, rank_mask, rank_dsk); + + // FIXME: set this to prevent later checking!!! + disable_deskew_training = 1; + + debug("\n>>>>> BY_RANK: FINISHED!!\n\n"); + } + + return 0; +} + +static void lmc_config_2(struct ddr_priv *priv) +{ + union cvmx_lmcx_config lmc_config; + int save_ref_zqcs_int; + u64 temp_delay_usecs; + + lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num)); + + /* + * Temporarily select the minimum ZQCS interval and wait + * long enough for a few ZQCS calibrations to occur. This + * should ensure that the calibration circuitry is + * stabilized before read/write leveling occurs. + */ + if (octeon_is_cpuid(OCTEON_CN7XXX)) { + save_ref_zqcs_int = lmc_config.cn78xx.ref_zqcs_int; + /* set smallest interval */ + lmc_config.cn78xx.ref_zqcs_int = 1 | (32 << 7); + } else { + save_ref_zqcs_int = lmc_config.cn63xx.ref_zqcs_int; + /* set smallest interval */ + lmc_config.cn63xx.ref_zqcs_int = 1 | (32 << 7); + } + lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), lmc_config.u64); + lmc_rd(priv, CVMX_LMCX_CONFIG(if_num)); + + /* + * Compute an appropriate delay based on the current ZQCS + * interval. The delay should be long enough for the + * current ZQCS delay counter to expire plus ten of the + * minimum intarvals to ensure that some calibrations + * occur. + */ + temp_delay_usecs = (((u64)save_ref_zqcs_int >> 7) * tclk_psecs * + 100 * 512 * 128) / (10000 * 10000) + 10 * + ((u64)32 * tclk_psecs * 100 * 512 * 128) / (10000 * 10000); + + debug("Waiting %lld usecs for ZQCS calibrations to start\n", + temp_delay_usecs); + udelay(temp_delay_usecs); + + if (octeon_is_cpuid(OCTEON_CN7XXX)) { + /* Restore computed interval */ + lmc_config.cn78xx.ref_zqcs_int = save_ref_zqcs_int; + } else { + /* Restore computed interval */ + lmc_config.cn63xx.ref_zqcs_int = save_ref_zqcs_int; + } + + lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), lmc_config.u64); + lmc_rd(priv, CVMX_LMCX_CONFIG(if_num)); +} + +static union cvmx_lmcx_wlevel_ctl wl_ctl __section(".data"); +static union cvmx_lmcx_wlevel_rankx wl_rank __section(".data"); +static union cvmx_lmcx_modereg_params1 mp1 __section(".data"); + +static int wl_mask[9] __section(".data"); +static int byte_idx __section(".data"); +static int ecc_ena __section(".data"); +static int wl_roundup __section(".data"); +static int save_mode32b __section(".data"); +static int disable_hwl_validity __section(".data"); +static int default_wl_rtt_nom __section(".data"); +static int wl_pbm_pump __section(".data"); + +static void lmc_write_leveling_loop(struct ddr_priv *priv, int rankx) +{ + int wloop = 0; + // retries per sample for HW-related issues with bitmasks or values + int wloop_retries = 0; + int wloop_retries_total = 0; + int wloop_retries_exhausted = 0; +#define WLOOP_RETRIES_DEFAULT 5 + int wl_val_err; + int wl_mask_err_rank = 0; + int wl_val_err_rank = 0; + // array to collect counts of byte-lane values + // assume low-order 3 bits and even, so really only 2-bit values + struct wlevel_bitcnt wl_bytes[9], wl_bytes_extra[9]; + int extra_bumps, extra_mask; + int rank_nom = 0; + + if (!(rank_mask & (1 << rankx))) + return; + + if (match_wl_rtt_nom) { + if (rankx == 0) + rank_nom = mp1.s.rtt_nom_00; + if (rankx == 1) + rank_nom = mp1.s.rtt_nom_01; + if (rankx == 2) + rank_nom = mp1.s.rtt_nom_10; + if (rankx == 3) + rank_nom = mp1.s.rtt_nom_11; + + debug("N%d.LMC%d.R%d: Setting WLEVEL_CTL[rtt_nom] to %d (%d)\n", + node, if_num, rankx, rank_nom, + imp_val->rtt_nom_ohms[rank_nom]); + } + + memset(wl_bytes, 0, sizeof(wl_bytes)); + memset(wl_bytes_extra, 0, sizeof(wl_bytes_extra)); + + // restructure the looping so we can keep trying until we get the + // samples we want + while (wloop < wl_loops) { + wl_ctl.u64 = lmc_rd(priv, CVMX_LMCX_WLEVEL_CTL(if_num)); + + wl_ctl.cn78xx.rtt_nom = + (default_wl_rtt_nom > 0) ? (default_wl_rtt_nom - 1) : 7; + + if (match_wl_rtt_nom) { + wl_ctl.cn78xx.rtt_nom = + (rank_nom > 0) ? (rank_nom - 1) : 7; + } + + /* Clear write-level delays */ + lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num), 0); + + wl_mask_err = 0; /* Reset error counters */ + wl_val_err = 0; + + for (byte_idx = 0; byte_idx < 9; ++byte_idx) + wl_mask[byte_idx] = 0; /* Reset bitmasks */ + + // do all the byte-lanes at the same time + wl_ctl.cn78xx.lanemask = 0x1ff; + + lmc_wr(priv, CVMX_LMCX_WLEVEL_CTL(if_num), wl_ctl.u64); + + /* + * Read and write values back in order to update the + * status field. This insures that we read the updated + * values after write-leveling has completed. + */ + lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num), + lmc_rd(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num))); + + /* write-leveling */ + oct3_ddr3_seq(priv, 1 << rankx, if_num, 6); + + do { + wl_rank.u64 = lmc_rd(priv, + CVMX_LMCX_WLEVEL_RANKX(rankx, + if_num)); + } while (wl_rank.cn78xx.status != 3); + + wl_rank.u64 = lmc_rd(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, + if_num)); + + for (byte_idx = 0; byte_idx < (8 + ecc_ena); ++byte_idx) { + wl_mask[byte_idx] = lmc_ddr3_wl_dbg_read(priv, + if_num, + byte_idx); + if (wl_mask[byte_idx] == 0) + ++wl_mask_err; + } + + // check validity only if no bitmask errors + if (wl_mask_err == 0) { + if ((spd_dimm_type == 1 || spd_dimm_type == 2) && + dram_width != 16 && if_64b && + !disable_hwl_validity) { + // bypass if [mini|SO]-[RU]DIMM or x16 or + // 32-bit + wl_val_err = + validate_hw_wl_settings(if_num, + &wl_rank, + spd_rdimm, ecc_ena); + wl_val_err_rank += (wl_val_err != 0); + } + } else { + wl_mask_err_rank++; + } + + // before we print, if we had bitmask or validity errors, + // do a retry... + if (wl_mask_err != 0 || wl_val_err != 0) { + if (wloop_retries < WLOOP_RETRIES_DEFAULT) { + wloop_retries++; + wloop_retries_total++; + // this printout is per-retry: only when VBL + // is high enough (DEV?) + // FIXME: do we want to show the bad bitmaps + // or delays here also? + debug("N%d.LMC%d.R%d: H/W Write-Leveling had %s errors - retrying...\n", + node, if_num, rankx, + (wl_mask_err) ? "Bitmask" : "Validity"); + // this takes us back to the top without + // counting a sample + return; + } + + // retries exhausted, do not print at normal VBL + debug("N%d.LMC%d.R%d: H/W Write-Leveling issues: %s errors\n", + node, if_num, rankx, + (wl_mask_err) ? "Bitmask" : "Validity"); + wloop_retries_exhausted++; + } + // no errors or exhausted retries, use this sample + wloop_retries = 0; //reset for next sample + + // when only 1 sample or forced, print the bitmasks then + // current HW WL + if (wl_loops == 1 || wl_print) { + if (wl_print > 1) + display_wl_bm(if_num, rankx, wl_mask); + display_wl(if_num, wl_rank, rankx); + } + + if (wl_roundup) { /* Round up odd bitmask delays */ + for (byte_idx = 0; byte_idx < (8 + ecc_ena); + ++byte_idx) { + if (!(if_bytemask & (1 << byte_idx))) + return; + upd_wl_rank(&wl_rank, byte_idx, + roundup_ddr3_wlevel_bitmask + (wl_mask[byte_idx])); + } + lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num), + wl_rank.u64); + display_wl(if_num, wl_rank, rankx); + } + + // OK, we have a decent sample, no bitmask or validity errors + extra_bumps = 0; + extra_mask = 0; + for (byte_idx = 0; byte_idx < (8 + ecc_ena); ++byte_idx) { + int ix; + + if (!(if_bytemask & (1 << byte_idx))) + return; + + // increment count of byte-lane value + // only 4 values + ix = (get_wl_rank(&wl_rank, byte_idx) >> 1) & 3; + wl_bytes[byte_idx].bitcnt[ix]++; + wl_bytes_extra[byte_idx].bitcnt[ix]++; + // if perfect... + if (__builtin_popcount(wl_mask[byte_idx]) == 4) { + wl_bytes_extra[byte_idx].bitcnt[ix] += + wl_pbm_pump; + extra_bumps++; + extra_mask |= 1 << byte_idx; + } + } + + if (extra_bumps) { + if (wl_print > 1) { + debug("N%d.LMC%d.R%d: HWL sample had %d bumps (0x%02x).\n", + node, if_num, rankx, extra_bumps, + extra_mask); + } + } + + // if we get here, we have taken a decent sample + wloop++; + + } /* while (wloop < wl_loops) */ + + // if we did sample more than once, try to pick a majority vote + if (wl_loops > 1) { + // look for the majority in each byte-lane + for (byte_idx = 0; byte_idx < (8 + ecc_ena); ++byte_idx) { + int mx, mc, xc, cc; + int ix, alts; + int maj, xmaj, xmx, xmc, xxc, xcc; + + if (!(if_bytemask & (1 << byte_idx))) + return; + maj = find_wl_majority(&wl_bytes[byte_idx], &mx, + &mc, &xc, &cc); + xmaj = find_wl_majority(&wl_bytes_extra[byte_idx], + &xmx, &xmc, &xxc, &xcc); + if (maj != xmaj) { + if (wl_print) { + debug("N%d.LMC%d.R%d: Byte %d: HWL maj %d(%d), USING xmaj %d(%d)\n", + node, if_num, rankx, + byte_idx, maj, xc, xmaj, xxc); + } + mx = xmx; + mc = xmc; + xc = xxc; + cc = xcc; + } + + // see if there was an alternate + // take out the majority choice + alts = (mc & ~(1 << mx)); + if (alts != 0) { + for (ix = 0; ix < 4; ix++) { + // FIXME: could be done multiple times? + // bad if so + if (alts & (1 << ix)) { + // set the mask + hwl_alts[rankx].hwl_alt_mask |= + (1 << byte_idx); + // record the value + hwl_alts[rankx].hwl_alt_delay[byte_idx] = + ix << 1; + if (wl_print > 1) { + debug("N%d.LMC%d.R%d: SWL_TRY_HWL_ALT: Byte %d maj %d (%d) alt %d (%d).\n", + node, + if_num, + rankx, + byte_idx, + mx << 1, + xc, + ix << 1, + wl_bytes + [byte_idx].bitcnt + [ix]); + } + } + } + } + + if (cc > 2) { // unlikely, but... + // assume: counts for 3 indices are all 1 + // possiblities are: 0/2/4, 2/4/6, 0/4/6, 0/2/6 + // and the desired?: 2 , 4 , 6, 0 + // we choose the middle, assuming one of the + // outliers is bad + // NOTE: this is an ugly hack at the moment; + // there must be a better way + switch (mc) { + case 0x7: + mx = 1; + break; // was 0/2/4, choose 2 + case 0xb: + mx = 0; + break; // was 0/2/6, choose 0 + case 0xd: + mx = 3; + break; // was 0/4/6, choose 6 + case 0xe: + mx = 2; + break; // was 2/4/6, choose 4 + default: + case 0xf: + mx = 1; + break; // was 0/2/4/6, choose 2? + } + printf("N%d.LMC%d.R%d: HW WL MAJORITY: bad byte-lane %d (0x%x), using %d.\n", + node, if_num, rankx, byte_idx, mc, + mx << 1); + } + upd_wl_rank(&wl_rank, byte_idx, mx << 1); + } + + lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num), + wl_rank.u64); + display_wl_with_final(if_num, wl_rank, rankx); + + // FIXME: does this help make the output a little easier + // to focus? + if (wl_print > 0) + debug("-----------\n"); + + } /* if (wl_loops > 1) */ + + // maybe print an error summary for the rank + if (wl_mask_err_rank != 0 || wl_val_err_rank != 0) { + debug("N%d.LMC%d.R%d: H/W Write-Leveling errors - %d bitmask, %d validity, %d retries, %d exhausted\n", + node, if_num, rankx, wl_mask_err_rank, + wl_val_err_rank, wloop_retries_total, + wloop_retries_exhausted); + } +} + +static void lmc_write_leveling(struct ddr_priv *priv) +{ + union cvmx_lmcx_config cfg; + int rankx; + char *s; + + /* + * 4.8.9 LMC Write Leveling + * + * LMC supports an automatic write leveling like that described in the + * JEDEC DDR3 specifications separately per byte-lane. + * + * All of DDR PLL, LMC CK, LMC DRESET, and early LMC initializations + * must be completed prior to starting this LMC write-leveling sequence. + * + * There are many possible procedures that will write-level all the + * attached DDR3 DRAM parts. One possibility is for software to simply + * write the desired values into LMC(0)_WLEVEL_RANK(0..3). This section + * describes one possible sequence that uses LMC's autowrite-leveling + * capabilities. + * + * 1. If the DQS/DQ delays on the board may be more than the ADD/CMD + * delays, then ensure that LMC(0)_CONFIG[EARLY_DQX] is set at this + * point. + * + * Do the remaining steps 2-7 separately for each rank i with attached + * DRAM. + * + * 2. Write LMC(0)_WLEVEL_RANKi = 0. + * + * 3. For x8 parts: + * + * Without changing any other fields in LMC(0)_WLEVEL_CTL, write + * LMC(0)_WLEVEL_CTL[LANEMASK] to select all byte lanes with attached + * DRAM. + * + * For x16 parts: + * + * Without changing any other fields in LMC(0)_WLEVEL_CTL, write + * LMC(0)_WLEVEL_CTL[LANEMASK] to select all even byte lanes with + * attached DRAM. + * + * 4. Without changing any other fields in LMC(0)_CONFIG, + * + * o write LMC(0)_SEQ_CTL[SEQ_SEL] to select write-leveling + * + * o write LMC(0)_CONFIG[RANKMASK] = (1 << i) + * + * o write LMC(0)_SEQ_CTL[INIT_START] = 1 + * + * LMC will initiate write-leveling at this point. Assuming + * LMC(0)_WLEVEL_CTL [SSET] = 0, LMC first enables write-leveling on + * the selected DRAM rank via a DDR3 MR1 write, then sequences + * through + * and accumulates write-leveling results for eight different delay + * settings twice, starting at a delay of zero in this case since + * LMC(0)_WLEVEL_RANKi[BYTE*<4:3>] = 0, increasing by 1/8 CK each + * setting, covering a total distance of one CK, then disables the + * write-leveling via another DDR3 MR1 write. + * + * After the sequence through 16 delay settings is complete: + * + * o LMC sets LMC(0)_WLEVEL_RANKi[STATUS] = 3 + * + * o LMC sets LMC(0)_WLEVEL_RANKi[BYTE*<2:0>] (for all ranks selected + * by LMC(0)_WLEVEL_CTL[LANEMASK]) to indicate the first write + * leveling result of 1 that followed result of 0 during the + * sequence, except that the LMC always writes + * LMC(0)_WLEVEL_RANKi[BYTE*<0>]=0. + * + * o Software can read the eight write-leveling results from the + * first pass through the delay settings by reading + * LMC(0)_WLEVEL_DBG[BITMASK] (after writing + * LMC(0)_WLEVEL_DBG[BYTE]). (LMC does not retain the writeleveling + * results from the second pass through the eight delay + * settings. They should often be identical to the + * LMC(0)_WLEVEL_DBG[BITMASK] results, though.) + * + * 5. Wait until LMC(0)_WLEVEL_RANKi[STATUS] != 2. + * + * LMC will have updated LMC(0)_WLEVEL_RANKi[BYTE*<2:0>] for all byte + * lanes selected by LMC(0)_WLEVEL_CTL[LANEMASK] at this point. + * LMC(0)_WLEVEL_RANKi[BYTE*<4:3>] will still be the value that + * software wrote in substep 2 above, which is 0. + * + * 6. For x16 parts: + * + * Without changing any other fields in LMC(0)_WLEVEL_CTL, write + * LMC(0)_WLEVEL_CTL[LANEMASK] to select all odd byte lanes with + * attached DRAM. + * + * Repeat substeps 4 and 5 with this new LMC(0)_WLEVEL_CTL[LANEMASK] + * setting. Skip to substep 7 if this has already been done. + * + * For x8 parts: + * + * Skip this substep. Go to substep 7. + * + * 7. Calculate LMC(0)_WLEVEL_RANKi[BYTE*<4:3>] settings for all byte + * lanes on all ranks with attached DRAM. + * + * At this point, all byte lanes on rank i with attached DRAM should + * have been write-leveled, and LMC(0)_WLEVEL_RANKi[BYTE*<2:0>] has + * the result for each byte lane. + * + * But note that the DDR3 write-leveling sequence will only determine + * the delay modulo the CK cycle time, and cannot determine how many + * additional CK cycles of delay are present. Software must calculate + * the number of CK cycles, or equivalently, the + * LMC(0)_WLEVEL_RANKi[BYTE*<4:3>] settings. + * + * This BYTE*<4:3> calculation is system/board specific. + * + * Many techniques can be used to calculate write-leveling BYTE*<4:3> + * values, including: + * + * o Known values for some byte lanes. + * + * o Relative values for some byte lanes relative to others. + * + * For example, suppose lane X is likely to require a larger + * write-leveling delay than lane Y. A BYTEX<2:0> value that is much + * smaller than the BYTEY<2:0> value may then indicate that the + * required lane X delay wrapped into the next CK, so BYTEX<4:3> + * should be set to BYTEY<4:3>+1. + * + * When ECC DRAM is not present (i.e. when DRAM is not attached to + * the DDR_CBS_0_* and DDR_CB<7:0> chip signals, or the + * DDR_DQS_<4>_* and DDR_DQ<35:32> chip signals), write + * LMC(0)_WLEVEL_RANK*[BYTE8] = LMC(0)_WLEVEL_RANK*[BYTE0], + * using the final calculated BYTE0 value. + * Write LMC(0)_WLEVEL_RANK*[BYTE4] = LMC(0)_WLEVEL_RANK*[BYTE0], + * using the final calculated BYTE0 value. + * + * 8. Initialize LMC(0)_WLEVEL_RANK* values for all unused ranks. + * + * Let rank i be a rank with attached DRAM. + * + * For all ranks j that do not have attached DRAM, set + * LMC(0)_WLEVEL_RANKj = LMC(0)_WLEVEL_RANKi. + */ + + rankx = 0; + wl_roundup = 0; + disable_hwl_validity = 0; + + // wl_pbm_pump: weight for write-leveling PBMs... + // 0 causes original behavior + // 1 allows a minority of 2 pbms to outscore a majority of 3 non-pbms + // 4 would allow a minority of 1 pbm to outscore a majority of 4 + // non-pbms + wl_pbm_pump = 4; // FIXME: is 4 too much? + + if (wl_loops) { + debug("N%d.LMC%d: Performing Hardware Write-Leveling\n", node, + if_num); + } else { + /* Force software write-leveling to run */ + wl_mask_err = 1; + debug("N%d.LMC%d: Forcing software Write-Leveling\n", node, + if_num); + } + + default_wl_rtt_nom = (ddr_type == DDR3_DRAM) ? + rttnom_20ohm : ddr4_rttnom_40ohm; + + cfg.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num)); + ecc_ena = cfg.s.ecc_ena; + save_mode32b = cfg.cn78xx.mode32b; + cfg.cn78xx.mode32b = (!if_64b); + lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), cfg.u64); + debug("%-45s : %d\n", "MODE32B", cfg.cn78xx.mode32b); + + s = lookup_env(priv, "ddr_wlevel_roundup"); + if (s) + wl_roundup = simple_strtoul(s, NULL, 0); + + s = lookup_env(priv, "ddr_wlevel_printall"); + if (s) + wl_print = strtoul(s, NULL, 0); + + s = lookup_env(priv, "ddr_wlevel_pbm_bump"); + if (s) + wl_pbm_pump = strtoul(s, NULL, 0); + + // default to disable when RL sequential delay check is disabled + disable_hwl_validity = disable_sequential_delay_check; + s = lookup_env(priv, "ddr_disable_hwl_validity"); + if (s) + disable_hwl_validity = !!strtoul(s, NULL, 0); + + s = lookup_env(priv, "ddr_wl_rtt_nom"); + if (s) + default_wl_rtt_nom = simple_strtoul(s, NULL, 0); + + s = lookup_env(priv, "ddr_match_wl_rtt_nom"); + if (s) + match_wl_rtt_nom = !!simple_strtoul(s, NULL, 0); + + if (match_wl_rtt_nom) + mp1.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS1(if_num)); + + // For DDR3, we do not touch WLEVEL_CTL fields OR_DIS or BITMASK + // For DDR4, we touch WLEVEL_CTL fields OR_DIS or BITMASK here + if (ddr_type == DDR4_DRAM) { + int default_or_dis = 1; + int default_bitmask = 0xff; + + // when x4, use only the lower nibble + if (dram_width == 4) { + default_bitmask = 0x0f; + if (wl_print) { + debug("N%d.LMC%d: WLEVEL_CTL: default bitmask is 0x%02x for DDR4 x4\n", + node, if_num, default_bitmask); + } + } + + wl_ctl.u64 = lmc_rd(priv, CVMX_LMCX_WLEVEL_CTL(if_num)); + wl_ctl.s.or_dis = default_or_dis; + wl_ctl.s.bitmask = default_bitmask; + + // allow overrides + s = lookup_env(priv, "ddr_wlevel_ctl_or_dis"); + if (s) + wl_ctl.s.or_dis = !!strtoul(s, NULL, 0); + + s = lookup_env(priv, "ddr_wlevel_ctl_bitmask"); + if (s) + wl_ctl.s.bitmask = simple_strtoul(s, NULL, 0); + + // print only if not defaults + if (wl_ctl.s.or_dis != default_or_dis || + wl_ctl.s.bitmask != default_bitmask) { + debug("N%d.LMC%d: WLEVEL_CTL: or_dis=%d, bitmask=0x%02x\n", + node, if_num, wl_ctl.s.or_dis, wl_ctl.s.bitmask); + } + + // always write + lmc_wr(priv, CVMX_LMCX_WLEVEL_CTL(if_num), wl_ctl.u64); + } + + // Start the hardware write-leveling loop per rank + for (rankx = 0; rankx < dimm_count * 4; rankx++) + lmc_write_leveling_loop(priv, rankx); + + cfg.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num)); + cfg.cn78xx.mode32b = save_mode32b; + lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), cfg.u64); + debug("%-45s : %d\n", "MODE32B", cfg.cn78xx.mode32b); + + // At the end of HW Write Leveling, check on some DESKEW things... + if (!disable_deskew_training) { + struct deskew_counts dsk_counts; + int retry_count = 0; + + debug("N%d.LMC%d: Check Deskew Settings before Read-Leveling.\n", + node, if_num); + + do { + validate_deskew_training(priv, rank_mask, if_num, + &dsk_counts, 1); + + // only RAWCARD A or B will not benefit from + // retraining if there's only saturation + // or any rawcard if there is a nibble error + if ((!spd_rawcard_aorb && dsk_counts.saturated > 0) || + (dsk_counts.nibrng_errs != 0 || + dsk_counts.nibunl_errs != 0)) { + retry_count++; + debug("N%d.LMC%d: Deskew Status indicates saturation or nibble errors - retry %d Training.\n", + node, if_num, retry_count); + perform_deskew_training(priv, rank_mask, if_num, + spd_rawcard_aorb); + } else { + break; + } + } while (retry_count < 5); + } +} + +static void lmc_workaround(struct ddr_priv *priv) +{ + /* Workaround Trcd overflow by using Additive latency. */ + if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) { + union cvmx_lmcx_modereg_params0 mp0; + union cvmx_lmcx_timing_params1 tp1; + union cvmx_lmcx_control ctrl; + int rankx; + + tp1.u64 = lmc_rd(priv, CVMX_LMCX_TIMING_PARAMS1(if_num)); + mp0.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num)); + ctrl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num)); + + if (tp1.cn78xx.trcd == 0) { + debug("Workaround Trcd overflow by using Additive latency.\n"); + /* Hard code this to 12 and enable additive latency */ + tp1.cn78xx.trcd = 12; + mp0.s.al = 2; /* CL-2 */ + ctrl.s.pocas = 1; + + debug("MODEREG_PARAMS0 : 0x%016llx\n", + mp0.u64); + lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num), + mp0.u64); + debug("TIMING_PARAMS1 : 0x%016llx\n", + tp1.u64); + lmc_wr(priv, CVMX_LMCX_TIMING_PARAMS1(if_num), tp1.u64); + + debug("LMC_CONTROL : 0x%016llx\n", + ctrl.u64); + lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctrl.u64); + + for (rankx = 0; rankx < dimm_count * 4; rankx++) { + if (!(rank_mask & (1 << rankx))) + continue; + + /* MR1 */ + ddr4_mrw(priv, if_num, rankx, -1, 1, 0); + } + } + } + + // this is here just for output, to allow check of the Deskew + // settings one last time... + if (!disable_deskew_training) { + struct deskew_counts dsk_counts; + + debug("N%d.LMC%d: Check Deskew Settings before software Write-Leveling.\n", + node, if_num); + validate_deskew_training(priv, rank_mask, if_num, &dsk_counts, + 3); + } + + /* + * Workaround Errata 26304 (T88@2.0, O75@1.x, O78@2.x) + * + * When the CSRs LMCX_DLL_CTL3[WR_DESKEW_ENA] = 1 AND + * LMCX_PHY_CTL2[DQS[0..8]_DSK_ADJ] > 4, set + * LMCX_EXT_CONFIG[DRIVE_ENA_BPRCH] = 1. + */ + if (octeon_is_cpuid(OCTEON_CN78XX_PASS2_X) || + octeon_is_cpuid(OCTEON_CNF75XX_PASS1_X)) { + union cvmx_lmcx_dll_ctl3 dll_ctl3; + union cvmx_lmcx_phy_ctl2 phy_ctl2; + union cvmx_lmcx_ext_config ext_cfg; + int increased_dsk_adj = 0; + int byte; + + phy_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL2(if_num)); + ext_cfg.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG(if_num)); + dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num)); + + for (byte = 0; byte < 8; ++byte) { + if (!(if_bytemask & (1 << byte))) + continue; + increased_dsk_adj |= + (((phy_ctl2.u64 >> (byte * 3)) & 0x7) > 4); + } + + if (dll_ctl3.s.wr_deskew_ena == 1 && increased_dsk_adj) { + ext_cfg.s.drive_ena_bprch = 1; + lmc_wr(priv, CVMX_LMCX_EXT_CONFIG(if_num), ext_cfg.u64); + debug("LMC%d: Forcing DRIVE_ENA_BPRCH for Workaround Errata 26304.\n", + if_num); + } + } +} + +// Software Write-Leveling block + +#define VREF_RANGE1_LIMIT 0x33 // range1 is valid for 0x00 - 0x32 +#define VREF_RANGE2_LIMIT 0x18 // range2 is valid for 0x00 - 0x17 +// full window is valid for 0x00 to 0x4A +// let 0x00 - 0x17 be range2, 0x18 - 0x4a be range 1 +#define VREF_LIMIT (VREF_RANGE1_LIMIT + VREF_RANGE2_LIMIT) +#define VREF_FINAL (VREF_LIMIT - 1) + +enum sw_wl_status { + WL_ESTIMATED = 0, /* HW/SW wleveling failed. Reslt estimated */ + WL_HARDWARE = 1, /* H/W wleveling succeeded */ + WL_SOFTWARE = 2, /* S/W wleveling passed 2 contiguous setting */ + WL_SOFTWARE1 = 3, /* S/W wleveling passed 1 marginal setting */ +}; + +static u64 rank_addr __section(".data"); +static int vref_val __section(".data"); +static int final_vref_val __section(".data"); +static int final_vref_range __section(".data"); +static int start_vref_val __section(".data"); +static int computed_final_vref_val __section(".data"); +static char best_vref_val_count __section(".data"); +static char vref_val_count __section(".data"); +static char best_vref_val_start __section(".data"); +static char vref_val_start __section(".data"); +static int bytes_failed __section(".data"); +static enum sw_wl_status byte_test_status[9] __section(".data"); +static enum sw_wl_status sw_wl_rank_status __section(".data"); +static int sw_wl_failed __section(".data"); +static int sw_wl_hw __section(".data"); +static int measured_vref_flag __section(".data"); + +static void ddr4_vref_loop(struct ddr_priv *priv, int rankx) +{ + char *s; + + if (vref_val < VREF_FINAL) { + int vrange, vvalue; + + if (vref_val < VREF_RANGE2_LIMIT) { + vrange = 1; + vvalue = vref_val; + } else { + vrange = 0; + vvalue = vref_val - VREF_RANGE2_LIMIT; + } + + set_vref(priv, if_num, rankx, vrange, vvalue); + } else { /* if (vref_val < VREF_FINAL) */ + /* Print the final vref value first. */ + + /* Always print the computed first if its valid */ + if (computed_final_vref_val >= 0) { + debug("N%d.LMC%d.R%d: vref Computed Summary : %2d (0x%02x)\n", + node, if_num, rankx, + computed_final_vref_val, computed_final_vref_val); + } + + if (!measured_vref_flag) { // setup to use the computed + best_vref_val_count = 1; + final_vref_val = computed_final_vref_val; + } else { // setup to use the measured + if (best_vref_val_count > 0) { + best_vref_val_count = + max(best_vref_val_count, (char)2); + final_vref_val = best_vref_val_start + + divide_nint(best_vref_val_count - 1, 2); + + if (final_vref_val < VREF_RANGE2_LIMIT) { + final_vref_range = 1; + } else { + final_vref_range = 0; + final_vref_val -= VREF_RANGE2_LIMIT; + } + + int vvlo = best_vref_val_start; + int vrlo; + int vvhi = best_vref_val_start + + best_vref_val_count - 1; + int vrhi; + + if (vvlo < VREF_RANGE2_LIMIT) { + vrlo = 2; + } else { + vrlo = 1; + vvlo -= VREF_RANGE2_LIMIT; + } + + if (vvhi < VREF_RANGE2_LIMIT) { + vrhi = 2; + } else { + vrhi = 1; + vvhi -= VREF_RANGE2_LIMIT; + } + debug("N%d.LMC%d.R%d: vref Training Summary : 0x%02x/%1d <----- 0x%02x/%1d -----> 0x%02x/%1d, range: %2d\n", + node, if_num, rankx, vvlo, vrlo, + final_vref_val, + final_vref_range + 1, vvhi, vrhi, + best_vref_val_count - 1); + + } else { + /* + * If nothing passed use the default vref + * value for this rank + */ + union cvmx_lmcx_modereg_params2 mp2; + + mp2.u64 = + lmc_rd(priv, + CVMX_LMCX_MODEREG_PARAMS2(if_num)); + final_vref_val = (mp2.u64 >> + (rankx * 10 + 3)) & 0x3f; + final_vref_range = (mp2.u64 >> + (rankx * 10 + 9)) & 0x01; + + debug("N%d.LMC%d.R%d: vref Using Default : %2d <----- %2d (0x%02x) -----> %2d, range%1d\n", + node, if_num, rankx, final_vref_val, + final_vref_val, final_vref_val, + final_vref_val, final_vref_range + 1); + } + } + + // allow override + s = lookup_env(priv, "ddr%d_vref_val_%1d%1d", + if_num, !!(rankx & 2), !!(rankx & 1)); + if (s) + final_vref_val = strtoul(s, NULL, 0); + + set_vref(priv, if_num, rankx, final_vref_range, final_vref_val); + } +} + +#define WL_MIN_NO_ERRORS_COUNT 3 // FIXME? three passes without errors + +static int errors __section(".data"); +static int byte_delay[9] __section(".data"); +static u64 bytemask __section(".data"); +static int bytes_todo __section(".data"); +static int no_errors_count __section(".data"); +static u64 bad_bits[2] __section(".data"); +static u64 sum_dram_dclk __section(".data"); +static u64 sum_dram_ops __section(".data"); +static u64 start_dram_dclk __section(".data"); +static u64 stop_dram_dclk __section(".data"); +static u64 start_dram_ops __section(".data"); +static u64 stop_dram_ops __section(".data"); + +static void lmc_sw_write_leveling_loop(struct ddr_priv *priv, int rankx) +{ + int delay; + int b; + + // write the current set of WL delays + lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num), wl_rank.u64); + wl_rank.u64 = lmc_rd(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num)); + + // do the test + if (sw_wl_hw) { + errors = run_best_hw_patterns(priv, if_num, rank_addr, + DBTRAIN_TEST, bad_bits); + errors &= bytes_todo; // keep only the ones we are still doing + } else { + start_dram_dclk = lmc_rd(priv, CVMX_LMCX_DCLK_CNT(if_num)); + start_dram_ops = lmc_rd(priv, CVMX_LMCX_OPS_CNT(if_num)); + errors = test_dram_byte64(priv, if_num, rank_addr, bytemask, + bad_bits); + + stop_dram_dclk = lmc_rd(priv, CVMX_LMCX_DCLK_CNT(if_num)); + stop_dram_ops = lmc_rd(priv, CVMX_LMCX_OPS_CNT(if_num)); + sum_dram_dclk += stop_dram_dclk - start_dram_dclk; + sum_dram_ops += stop_dram_ops - start_dram_ops; + } + + debug("WL pass1: test_dram_byte returned 0x%x\n", errors); + + // remember, errors will not be returned for byte-lanes that have + // maxxed out... + if (errors == 0) { + no_errors_count++; // bump + // bypass check/update completely + if (no_errors_count > 1) + return; // to end of do-while + } else { + no_errors_count = 0; // reset + } + + // check errors by byte + for (b = 0; b < 9; ++b) { + if (!(bytes_todo & (1 << b))) + continue; + + delay = byte_delay[b]; + // yes, an error in this byte lane + if (errors & (1 << b)) { + debug(" byte %d delay %2d Errors\n", b, delay); + // since this byte had an error, we move to the next + // delay value, unless done with it + delay += 8; // incr by 8 to do delay high-order bits + if (delay < 32) { + upd_wl_rank(&wl_rank, b, delay); + debug(" byte %d delay %2d New\n", + b, delay); + byte_delay[b] = delay; + } else { + // reached max delay, maybe really done with + // this byte + // consider an alt only for computed VREF and + if (!measured_vref_flag && + (hwl_alts[rankx].hwl_alt_mask & (1 << b))) { + // if an alt exists... + // just orig low-3 bits + int bad_delay = delay & 0x6; + + // yes, use it + delay = hwl_alts[rankx].hwl_alt_delay[b]; + // clear that flag + hwl_alts[rankx].hwl_alt_mask &= + ~(1 << b); + upd_wl_rank(&wl_rank, b, delay); + byte_delay[b] = delay; + debug(" byte %d delay %2d ALTERNATE\n", + b, delay); + debug("N%d.LMC%d.R%d: SWL: Byte %d: %d FAIL, trying ALTERNATE %d\n", + node, if_num, + rankx, b, bad_delay, delay); + + } else { + unsigned int bits_bad; + + if (b < 8) { + // test no longer, remove from + // byte mask + bytemask &= + ~(0xffULL << (8 * b)); + bits_bad = (unsigned int) + ((bad_bits[0] >> + (8 * b)) & 0xffUL); + } else { + bits_bad = (unsigned int) + (bad_bits[1] & 0xffUL); + } + + // remove from bytes to do + bytes_todo &= ~(1 << b); + // make sure this is set for this case + byte_test_status[b] = WL_ESTIMATED; + debug(" byte %d delay %2d Exhausted\n", + b, delay); + if (!measured_vref_flag) { + // this is too noisy when doing + // measured VREF + debug("N%d.LMC%d.R%d: SWL: Byte %d (0x%02x): delay %d EXHAUSTED\n", + node, if_num, rankx, + b, bits_bad, delay); + } + } + } + } else { + // no error, stay with current delay, but keep testing + // it... + debug(" byte %d delay %2d Passed\n", b, delay); + byte_test_status[b] = WL_HARDWARE; // change status + } + } /* for (b = 0; b < 9; ++b) */ +} + +static void sw_write_lvl_use_ecc(struct ddr_priv *priv, int rankx) +{ + int save_byte8 = wl_rank.s.byte8; + + byte_test_status[8] = WL_HARDWARE; /* H/W delay value */ + + if (save_byte8 != wl_rank.s.byte3 && + save_byte8 != wl_rank.s.byte4) { + int test_byte8 = save_byte8; + int test_byte8_error; + int byte8_error = 0x1f; + int adder; + int avg_bytes = divide_nint(wl_rank.s.byte3 + wl_rank.s.byte4, + 2); + + for (adder = 0; adder <= 32; adder += 8) { + test_byte8_error = abs((adder + save_byte8) - + avg_bytes); + if (test_byte8_error < byte8_error) { + byte8_error = test_byte8_error; + test_byte8 = save_byte8 + adder; + } + } + + // only do the check if we are not using measured VREF + if (!measured_vref_flag) { + /* Use only even settings, rounding down... */ + test_byte8 &= ~1; + + // do validity check on the calculated ECC delay value + // this depends on the DIMM type + if (spd_rdimm) { // RDIMM + // but not mini-RDIMM + if (spd_dimm_type != 5) { + // it can be > byte4, but should never + // be > byte3 + if (test_byte8 > wl_rank.s.byte3) { + /* say it is still estimated */ + byte_test_status[8] = + WL_ESTIMATED; + } + } + } else { // UDIMM + if (test_byte8 < wl_rank.s.byte3 || + test_byte8 > wl_rank.s.byte4) { + // should never be outside the + // byte 3-4 range + /* say it is still estimated */ + byte_test_status[8] = WL_ESTIMATED; + } + } + /* + * Report whenever the calculation appears bad. + * This happens if some of the original values were off, + * or unexpected geometry from DIMM type, or custom + * circuitry (NIC225E, I am looking at you!). + * We will trust the calculated value, and depend on + * later testing to catch any instances when that + * value is truly bad. + */ + // ESTIMATED means there may be an issue + if (byte_test_status[8] == WL_ESTIMATED) { + debug("N%d.LMC%d.R%d: SWL: (%cDIMM): calculated ECC delay unexpected (%d/%d/%d)\n", + node, if_num, rankx, + (spd_rdimm ? 'R' : 'U'), wl_rank.s.byte4, + test_byte8, wl_rank.s.byte3); + byte_test_status[8] = WL_HARDWARE; + } + } + /* Use only even settings */ + wl_rank.s.byte8 = test_byte8 & ~1; + } + + if (wl_rank.s.byte8 != save_byte8) { + /* Change the status if s/w adjusted the delay */ + byte_test_status[8] = WL_SOFTWARE; /* Estimated delay */ + } +} + +static __maybe_unused void parallel_wl_block_delay(struct ddr_priv *priv, + int rankx) +{ + int errors; + int byte_delay[8]; + int byte_passed[8]; + u64 bytemask; + u64 bitmask; + int wl_offset; + int bytes_todo; + int sw_wl_offset = 1; + int delay; + int b; + + for (b = 0; b < 8; ++b) + byte_passed[b] = 0; + + bytes_todo = if_bytemask; + + for (wl_offset = sw_wl_offset; wl_offset >= 0; --wl_offset) { + debug("Starting wl_offset for-loop: %d\n", wl_offset); + + bytemask = 0; + + for (b = 0; b < 8; ++b) { + byte_delay[b] = 0; + // this does not contain fully passed bytes + if (!(bytes_todo & (1 << b))) + continue; + + // reset across passes if not fully passed + byte_passed[b] = 0; + upd_wl_rank(&wl_rank, b, 0); // all delays start at 0 + bitmask = ((!if_64b) && (b == 4)) ? 0x0f : 0xff; + // set the bytes bits in the bytemask + bytemask |= bitmask << (8 * b); + } /* for (b = 0; b < 8; ++b) */ + + // start a pass if there is any byte lane to test + while (bytemask != 0) { + debug("Starting bytemask while-loop: 0x%llx\n", + bytemask); + + // write this set of WL delays + lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num), + wl_rank.u64); + wl_rank.u64 = lmc_rd(priv, + CVMX_LMCX_WLEVEL_RANKX(rankx, + if_num)); + + // do the test + if (sw_wl_hw) { + errors = run_best_hw_patterns(priv, if_num, + rank_addr, + DBTRAIN_TEST, + NULL) & 0xff; + } else { + errors = test_dram_byte64(priv, if_num, + rank_addr, bytemask, + NULL); + } + + debug("test_dram_byte returned 0x%x\n", errors); + + // check errors by byte + for (b = 0; b < 8; ++b) { + if (!(bytes_todo & (1 << b))) + continue; + + delay = byte_delay[b]; + if (errors & (1 << b)) { // yes, an error + debug(" byte %d delay %2d Errors\n", + b, delay); + byte_passed[b] = 0; + } else { // no error + byte_passed[b] += 1; + // Look for consecutive working settings + if (byte_passed[b] == (1 + wl_offset)) { + debug(" byte %d delay %2d FULLY Passed\n", + b, delay); + if (wl_offset == 1) { + byte_test_status[b] = + WL_SOFTWARE; + } else if (wl_offset == 0) { + byte_test_status[b] = + WL_SOFTWARE1; + } + + // test no longer, remove + // from byte mask this pass + bytemask &= ~(0xffULL << + (8 * b)); + // remove completely from + // concern + bytes_todo &= ~(1 << b); + // on to the next byte, bypass + // delay updating!! + continue; + } else { + debug(" byte %d delay %2d Passed\n", + b, delay); + } + } + + // error or no, here we move to the next delay + // value for this byte, unless done all delays + // only a byte that has "fully passed" will + // bypass around this, + delay += 2; + if (delay < 32) { + upd_wl_rank(&wl_rank, b, delay); + debug(" byte %d delay %2d New\n", + b, delay); + byte_delay[b] = delay; + } else { + // reached max delay, done with this + // byte + debug(" byte %d delay %2d Exhausted\n", + b, delay); + // test no longer, remove from byte + // mask this pass + bytemask &= ~(0xffULL << (8 * b)); + } + } /* for (b = 0; b < 8; ++b) */ + debug("End of for-loop: bytemask 0x%llx\n", bytemask); + } /* while (bytemask != 0) */ + } + + for (b = 0; b < 8; ++b) { + // any bytes left in bytes_todo did not pass + if (bytes_todo & (1 << b)) { + union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank; + + /* + * Last resort. Use Rlevel settings to estimate + * Wlevel if software write-leveling fails + */ + debug("Using RLEVEL as WLEVEL estimate for byte %d\n", + b); + lmc_rlevel_rank.u64 = + lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, + if_num)); + rlevel_to_wlevel(&lmc_rlevel_rank, &wl_rank, b); + } + } /* for (b = 0; b < 8; ++b) */ +} + +static int lmc_sw_write_leveling(struct ddr_priv *priv) +{ + /* Try to determine/optimize write-level delays experimentally. */ + union cvmx_lmcx_wlevel_rankx wl_rank_hw_res; + union cvmx_lmcx_config cfg; + int rankx; + int byte; + char *s; + int i; + + int active_rank; + int sw_wl_enable = 1; /* FIX... Should be customizable. */ + int interfaces; + + static const char * const wl_status_strings[] = { + "(e)", + " ", + " ", + "(1)" + }; + + // FIXME: make HW-assist the default now? + int sw_wl_hw_default = SW_WLEVEL_HW_DEFAULT; + int dram_connection = c_cfg->dram_connection; + + s = lookup_env(priv, "ddr_sw_wlevel_hw"); + if (s) + sw_wl_hw_default = !!strtoul(s, NULL, 0); + if (!if_64b) // must use SW algo if 32-bit mode + sw_wl_hw_default = 0; + + // can never use hw-assist + if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) + sw_wl_hw_default = 0; + + s = lookup_env(priv, "ddr_software_wlevel"); + if (s) + sw_wl_enable = strtoul(s, NULL, 0); + + s = lookup_env(priv, "ddr%d_dram_connection", if_num); + if (s) + dram_connection = !!strtoul(s, NULL, 0); + + cvmx_rng_enable(); + + /* + * Get the measured_vref setting from the config, check for an + * override... + */ + /* NOTE: measured_vref=1 (ON) means force use of MEASURED vref... */ + // NOTE: measured VREF can only be done for DDR4 + if (ddr_type == DDR4_DRAM) { + measured_vref_flag = c_cfg->measured_vref; + s = lookup_env(priv, "ddr_measured_vref"); + if (s) + measured_vref_flag = !!strtoul(s, NULL, 0); + } else { + measured_vref_flag = 0; // OFF for DDR3 + } + + /* + * Ensure disabled ECC for DRAM tests using the SW algo, else leave + * it untouched + */ + if (!sw_wl_hw_default) { + cfg.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num)); + cfg.cn78xx.ecc_ena = 0; + lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), cfg.u64); + } + + /* + * We need to track absolute rank number, as well as how many + * active ranks we have. Two single rank DIMMs show up as + * ranks 0 and 2, but only 2 ranks are active. + */ + active_rank = 0; + + interfaces = __builtin_popcount(if_mask); + + for (rankx = 0; rankx < dimm_count * 4; rankx++) { + final_vref_range = 0; + start_vref_val = 0; + computed_final_vref_val = -1; + sw_wl_rank_status = WL_HARDWARE; + sw_wl_failed = 0; + sw_wl_hw = sw_wl_hw_default; + + if (!sw_wl_enable) + break; + + if (!(rank_mask & (1 << rankx))) + continue; + + debug("N%d.LMC%d.R%d: Performing Software Write-Leveling %s\n", + node, if_num, rankx, + (sw_wl_hw) ? "with H/W assist" : + "with S/W algorithm"); + + if (ddr_type == DDR4_DRAM && num_ranks != 4) { + // always compute when we can... + computed_final_vref_val = + compute_vref_val(priv, if_num, rankx, dimm_count, + num_ranks, imp_val, + is_stacked_die, dram_connection); + + // but only use it if allowed + if (!measured_vref_flag) { + // skip all the measured vref processing, + // just the final setting + start_vref_val = VREF_FINAL; + } + } + + /* Save off the h/w wl results */ + wl_rank_hw_res.u64 = lmc_rd(priv, + CVMX_LMCX_WLEVEL_RANKX(rankx, + if_num)); + + vref_val_count = 0; + vref_val_start = 0; + best_vref_val_count = 0; + best_vref_val_start = 0; + + /* Loop one extra time using the Final vref value. */ + for (vref_val = start_vref_val; vref_val < VREF_LIMIT; + ++vref_val) { + if (ddr_type == DDR4_DRAM) + ddr4_vref_loop(priv, rankx); + + /* Restore the saved value */ + wl_rank.u64 = wl_rank_hw_res.u64; + + for (byte = 0; byte < 9; ++byte) + byte_test_status[byte] = WL_ESTIMATED; + + if (wl_mask_err == 0) { + /* + * Determine address of DRAM to test for + * pass 1 of software write leveling. + */ + rank_addr = active_rank * + (1ull << (pbank_lsb - bunk_enable + + (interfaces / 2))); + + /* + * Adjust address for boot bus hole in memory + * map. + */ + if (rank_addr > 0x10000000) + rank_addr += 0x10000000; + + debug("N%d.LMC%d.R%d: Active Rank %d Address: 0x%llx\n", + node, if_num, rankx, active_rank, + rank_addr); + + // start parallel write-leveling block for + // delay high-order bits + errors = 0; + no_errors_count = 0; + sum_dram_dclk = 0; + sum_dram_ops = 0; + + if (if_64b) { + bytes_todo = (sw_wl_hw) ? + if_bytemask : 0xFF; + bytemask = ~0ULL; + } else { + // 32-bit, must be using SW algo, + // only data bytes + bytes_todo = 0x0f; + bytemask = 0x00000000ffffffffULL; + } + + for (byte = 0; byte < 9; ++byte) { + if (!(bytes_todo & (1 << byte))) { + byte_delay[byte] = 0; + } else { + byte_delay[byte] = + get_wl_rank(&wl_rank, byte); + } + } /* for (byte = 0; byte < 9; ++byte) */ + + do { + lmc_sw_write_leveling_loop(priv, rankx); + } while (no_errors_count < + WL_MIN_NO_ERRORS_COUNT); + + if (!sw_wl_hw) { + u64 percent_x10; + + if (sum_dram_dclk == 0) + sum_dram_dclk = 1; + percent_x10 = sum_dram_ops * 1000 / + sum_dram_dclk; + debug("N%d.LMC%d.R%d: ops %llu, cycles %llu, used %llu.%llu%%\n", + node, if_num, rankx, sum_dram_ops, + sum_dram_dclk, percent_x10 / 10, + percent_x10 % 10); + } + if (errors) { + debug("End WLEV_64 while loop: vref_val %d(0x%x), errors 0x%02x\n", + vref_val, vref_val, errors); + } + // end parallel write-leveling block for + // delay high-order bits + + // if we used HW-assist, we did the ECC byte + // when approp. + if (sw_wl_hw) { + if (wl_print) { + debug("N%d.LMC%d.R%d: HW-assisted SWL - ECC estimate not needed.\n", + node, if_num, rankx); + } + goto no_ecc_estimate; + } + + if ((if_bytemask & 0xff) == 0xff) { + if (use_ecc) { + sw_write_lvl_use_ecc(priv, + rankx); + } else { + /* H/W delay value */ + byte_test_status[8] = + WL_HARDWARE; + /* ECC is not used */ + wl_rank.s.byte8 = + wl_rank.s.byte0; + } + } else { + if (use_ecc) { + /* Estimate the ECC byte dly */ + // add hi-order to b4 + wl_rank.s.byte4 |= + (wl_rank.s.byte3 & + 0x38); + if ((wl_rank.s.byte4 & 0x06) < + (wl_rank.s.byte3 & 0x06)) { + // must be next clock + wl_rank.s.byte4 += 8; + } + } else { + /* ECC is not used */ + wl_rank.s.byte4 = + wl_rank.s.byte0; + } + + /* + * Change the status if s/w adjusted + * the delay + */ + /* Estimated delay */ + byte_test_status[4] = WL_SOFTWARE; + } /* if ((if_bytemask & 0xff) == 0xff) */ + } /* if (wl_mask_err == 0) */ + +no_ecc_estimate: + + bytes_failed = 0; + for (byte = 0; byte < 9; ++byte) { + /* Don't accumulate errors for untested bytes */ + if (!(if_bytemask & (1 << byte))) + continue; + bytes_failed += + (byte_test_status[byte] == WL_ESTIMATED); + } + + /* vref training loop is only used for DDR4 */ + if (ddr_type != DDR4_DRAM) + break; + + if (bytes_failed == 0) { + if (vref_val_count == 0) + vref_val_start = vref_val; + + ++vref_val_count; + if (vref_val_count > best_vref_val_count) { + best_vref_val_count = vref_val_count; + best_vref_val_start = vref_val_start; + debug("N%d.LMC%d.R%d: vref Training (%2d) : 0x%02x <----- ???? -----> 0x%02x\n", + node, if_num, rankx, vref_val, + best_vref_val_start, + best_vref_val_start + + best_vref_val_count - 1); + } + } else { + vref_val_count = 0; + debug("N%d.LMC%d.R%d: vref Training (%2d) : failed\n", + node, if_num, rankx, vref_val); + } + } + + /* + * Determine address of DRAM to test for software write + * leveling. + */ + rank_addr = active_rank * (1ull << (pbank_lsb - bunk_enable + + (interfaces / 2))); + /* Adjust address for boot bus hole in memory map. */ + if (rank_addr > 0x10000000) + rank_addr += 0x10000000; + + debug("Rank Address: 0x%llx\n", rank_addr); + + if (bytes_failed) { + // FIXME? the big hammer, did not even try SW WL pass2, + // assume only chip reset will help + debug("N%d.LMC%d.R%d: S/W write-leveling pass 1 failed\n", + node, if_num, rankx); + sw_wl_failed = 1; + } else { /* if (bytes_failed) */ + // SW WL pass 1 was OK, write the settings + lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num), + wl_rank.u64); + wl_rank.u64 = lmc_rd(priv, + CVMX_LMCX_WLEVEL_RANKX(rankx, + if_num)); + + // do validity check on the delay values by running + // the test 1 more time... + // FIXME: we really need to check the ECC byte setting + // here as well, so we need to enable ECC for this test! + // if there are any errors, claim SW WL failure + u64 datamask = (if_64b) ? 0xffffffffffffffffULL : + 0x00000000ffffffffULL; + int errors; + + // do the test + if (sw_wl_hw) { + errors = run_best_hw_patterns(priv, if_num, + rank_addr, + DBTRAIN_TEST, + NULL) & 0xff; + } else { + errors = test_dram_byte64(priv, if_num, + rank_addr, datamask, + NULL); + } + + if (errors) { + debug("N%d.LMC%d.R%d: Wlevel Rank Final Test errors 0x%03x\n", + node, if_num, rankx, errors); + sw_wl_failed = 1; + } + } /* if (bytes_failed) */ + + // FIXME? dump the WL settings, so we get more of a clue + // as to what happened where + debug("N%d.LMC%d.R%d: Wlevel Rank %#4x, 0x%016llX : %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %s\n", + node, if_num, rankx, wl_rank.s.status, wl_rank.u64, + wl_rank.s.byte8, wl_status_strings[byte_test_status[8]], + wl_rank.s.byte7, wl_status_strings[byte_test_status[7]], + wl_rank.s.byte6, wl_status_strings[byte_test_status[6]], + wl_rank.s.byte5, wl_status_strings[byte_test_status[5]], + wl_rank.s.byte4, wl_status_strings[byte_test_status[4]], + wl_rank.s.byte3, wl_status_strings[byte_test_status[3]], + wl_rank.s.byte2, wl_status_strings[byte_test_status[2]], + wl_rank.s.byte1, wl_status_strings[byte_test_status[1]], + wl_rank.s.byte0, wl_status_strings[byte_test_status[0]], + (sw_wl_rank_status == WL_HARDWARE) ? "" : "(s)"); + + // finally, check for fatal conditions: either chip reset + // right here, or return error flag + if ((ddr_type == DDR4_DRAM && best_vref_val_count == 0) || + sw_wl_failed) { + if (!ddr_disable_chip_reset) { // do chip RESET + printf("N%d.LMC%d.R%d: INFO: Short memory test indicates a retry is needed. Resetting node...\n", + node, if_num, rankx); + mdelay(500); + do_reset(NULL, 0, 0, NULL); + } else { + // return error flag so LMC init can be retried. + debug("N%d.LMC%d.R%d: INFO: Short memory test indicates a retry is needed. Restarting LMC init...\n", + node, if_num, rankx); + return -EAGAIN; // 0 indicates restart possible. + } + } + active_rank++; + } + + for (rankx = 0; rankx < dimm_count * 4; rankx++) { + int parameter_set = 0; + u64 value; + + if (!(rank_mask & (1 << rankx))) + continue; + + wl_rank.u64 = lmc_rd(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, + if_num)); + + for (i = 0; i < 9; ++i) { + s = lookup_env(priv, "ddr%d_wlevel_rank%d_byte%d", + if_num, rankx, i); + if (s) { + parameter_set |= 1; + value = strtoul(s, NULL, 0); + + upd_wl_rank(&wl_rank, i, value); + } + } + + s = lookup_env_ull(priv, "ddr%d_wlevel_rank%d", if_num, rankx); + if (s) { + parameter_set |= 1; + value = strtoull(s, NULL, 0); + wl_rank.u64 = value; + } + + if (parameter_set) { + lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num), + wl_rank.u64); + wl_rank.u64 = + lmc_rd(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num)); + display_wl(if_num, wl_rank, rankx); + } + // if there are unused entries to be filled + if ((rank_mask & 0x0F) != 0x0F) { + if (rankx < 3) { + debug("N%d.LMC%d.R%d: checking for WLEVEL_RANK unused entries.\n", + node, if_num, rankx); + + // if rank 0, write ranks 1 and 2 here if empty + if (rankx == 0) { + // check that rank 1 is empty + if (!(rank_mask & (1 << 1))) { + debug("N%d.LMC%d.R%d: writing WLEVEL_RANK unused entry R%d.\n", + node, if_num, rankx, 1); + lmc_wr(priv, + CVMX_LMCX_WLEVEL_RANKX(1, + if_num), + wl_rank.u64); + } + + // check that rank 2 is empty + if (!(rank_mask & (1 << 2))) { + debug("N%d.LMC%d.R%d: writing WLEVEL_RANK unused entry R%d.\n", + node, if_num, rankx, 2); + lmc_wr(priv, + CVMX_LMCX_WLEVEL_RANKX(2, + if_num), + wl_rank.u64); + } + } + + // if rank 0, 1 or 2, write rank 3 here if empty + // check that rank 3 is empty + if (!(rank_mask & (1 << 3))) { + debug("N%d.LMC%d.R%d: writing WLEVEL_RANK unused entry R%d.\n", + node, if_num, rankx, 3); + lmc_wr(priv, + CVMX_LMCX_WLEVEL_RANKX(3, + if_num), + wl_rank.u64); + } + } + } + } + + /* Enable 32-bit mode if required. */ + cfg.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num)); + cfg.cn78xx.mode32b = (!if_64b); + debug("%-45s : %d\n", "MODE32B", cfg.cn78xx.mode32b); + + /* Restore the ECC configuration */ + if (!sw_wl_hw_default) + cfg.cn78xx.ecc_ena = use_ecc; + + lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), cfg.u64); + + return 0; +} + +static void lmc_dll(struct ddr_priv *priv) +{ + union cvmx_lmcx_dll_ctl3 ddr_dll_ctl3; + int setting[9]; + int i; + + ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num)); + + for (i = 0; i < 9; ++i) { + SET_DDR_DLL_CTL3(dll90_byte_sel, ENCODE_DLL90_BYTE_SEL(i)); + lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64); + lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num)); + ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num)); + setting[i] = GET_DDR_DLL_CTL3(dll90_setting); + debug("%d. LMC%d_DLL_CTL3[%d] = %016llx %d\n", i, if_num, + GET_DDR_DLL_CTL3(dll90_byte_sel), ddr_dll_ctl3.u64, + setting[i]); + } + + debug("N%d.LMC%d: %-36s : %5d %5d %5d %5d %5d %5d %5d %5d %5d\n", + node, if_num, "DLL90 Setting 8:0", + setting[8], setting[7], setting[6], setting[5], setting[4], + setting[3], setting[2], setting[1], setting[0]); + + process_custom_dll_offsets(priv, if_num, "ddr_dll_write_offset", + c_cfg->dll_write_offset, + "ddr%d_dll_write_offset_byte%d", 1); + process_custom_dll_offsets(priv, if_num, "ddr_dll_read_offset", + c_cfg->dll_read_offset, + "ddr%d_dll_read_offset_byte%d", 2); +} + +#define SLOT_CTL_INCR(csr, chip, field, incr) \ + csr.chip.field = (csr.chip.field < (64 - incr)) ? \ + (csr.chip.field + incr) : 63 + +#define INCR(csr, chip, field, incr) \ + csr.chip.field = (csr.chip.field < (64 - incr)) ? \ + (csr.chip.field + incr) : 63 + +static void lmc_workaround_2(struct ddr_priv *priv) +{ + /* Workaround Errata 21063 */ + if (octeon_is_cpuid(OCTEON_CN78XX) || + octeon_is_cpuid(OCTEON_CN70XX_PASS1_X)) { + union cvmx_lmcx_slot_ctl0 slot_ctl0; + union cvmx_lmcx_slot_ctl1 slot_ctl1; + union cvmx_lmcx_slot_ctl2 slot_ctl2; + union cvmx_lmcx_ext_config ext_cfg; + + slot_ctl0.u64 = lmc_rd(priv, CVMX_LMCX_SLOT_CTL0(if_num)); + slot_ctl1.u64 = lmc_rd(priv, CVMX_LMCX_SLOT_CTL1(if_num)); + slot_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_SLOT_CTL2(if_num)); + + ext_cfg.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG(if_num)); + + /* When ext_cfg.s.read_ena_bprch is set add 1 */ + if (ext_cfg.s.read_ena_bprch) { + SLOT_CTL_INCR(slot_ctl0, cn78xx, r2w_init, 1); + SLOT_CTL_INCR(slot_ctl0, cn78xx, r2w_l_init, 1); + SLOT_CTL_INCR(slot_ctl1, cn78xx, r2w_xrank_init, 1); + SLOT_CTL_INCR(slot_ctl2, cn78xx, r2w_xdimm_init, 1); + } + + /* Always add 2 */ + SLOT_CTL_INCR(slot_ctl1, cn78xx, w2r_xrank_init, 2); + SLOT_CTL_INCR(slot_ctl2, cn78xx, w2r_xdimm_init, 2); + + lmc_wr(priv, CVMX_LMCX_SLOT_CTL0(if_num), slot_ctl0.u64); + lmc_wr(priv, CVMX_LMCX_SLOT_CTL1(if_num), slot_ctl1.u64); + lmc_wr(priv, CVMX_LMCX_SLOT_CTL2(if_num), slot_ctl2.u64); + } + + /* Workaround Errata 21216 */ + if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) || + octeon_is_cpuid(OCTEON_CN70XX_PASS1_X)) { + union cvmx_lmcx_slot_ctl1 slot_ctl1; + union cvmx_lmcx_slot_ctl2 slot_ctl2; + + slot_ctl1.u64 = lmc_rd(priv, CVMX_LMCX_SLOT_CTL1(if_num)); + slot_ctl1.cn78xx.w2w_xrank_init = + max(10, (int)slot_ctl1.cn78xx.w2w_xrank_init); + lmc_wr(priv, CVMX_LMCX_SLOT_CTL1(if_num), slot_ctl1.u64); + + slot_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_SLOT_CTL2(if_num)); + slot_ctl2.cn78xx.w2w_xdimm_init = + max(10, (int)slot_ctl2.cn78xx.w2w_xdimm_init); + lmc_wr(priv, CVMX_LMCX_SLOT_CTL2(if_num), slot_ctl2.u64); + } +} + +static void lmc_final(struct ddr_priv *priv) +{ + /* + * 4.8.11 Final LMC Initialization + * + * Early LMC initialization, LMC write-leveling, and LMC read-leveling + * must be completed prior to starting this final LMC initialization. + * + * LMC hardware updates the LMC(0)_SLOT_CTL0, LMC(0)_SLOT_CTL1, + * LMC(0)_SLOT_CTL2 CSRs with minimum values based on the selected + * readleveling and write-leveling settings. Software should not write + * the final LMC(0)_SLOT_CTL0, LMC(0)_SLOT_CTL1, and LMC(0)_SLOT_CTL2 + * values until after the final read-leveling and write-leveling + * settings are written. + * + * Software must ensure the LMC(0)_SLOT_CTL0, LMC(0)_SLOT_CTL1, and + * LMC(0)_SLOT_CTL2 CSR values are appropriate for this step. These CSRs + * select the minimum gaps between read operations and write operations + * of various types. + * + * Software must not reduce the values in these CSR fields below the + * values previously selected by the LMC hardware (during write-leveling + * and read-leveling steps above). + * + * All sections in this chapter may be used to derive proper settings + * for these registers. + * + * For minimal read latency, L2C_CTL[EF_ENA,EF_CNT] should be programmed + * properly. This should be done prior to the first read. + */ + + /* Clear any residual ECC errors */ + int num_tads = 1; + int tad; + int num_mcis = 1; + int mci; + + if (octeon_is_cpuid(OCTEON_CN78XX)) { + num_tads = 8; + num_mcis = 4; + } else if (octeon_is_cpuid(OCTEON_CN70XX)) { + num_tads = 1; + num_mcis = 1; + } else if (octeon_is_cpuid(OCTEON_CN73XX) || + octeon_is_cpuid(OCTEON_CNF75XX)) { + num_tads = 4; + num_mcis = 3; + } + + lmc_wr(priv, CVMX_LMCX_INT(if_num), -1ULL); + lmc_rd(priv, CVMX_LMCX_INT(if_num)); + + for (tad = 0; tad < num_tads; tad++) { + l2c_wr(priv, CVMX_L2C_TADX_INT(tad), + l2c_rd(priv, CVMX_L2C_TADX_INT(tad))); + debug("%-45s : (%d) 0x%08llx\n", "CVMX_L2C_TAD_INT", tad, + l2c_rd(priv, CVMX_L2C_TADX_INT(tad))); + } + + for (mci = 0; mci < num_mcis; mci++) { + l2c_wr(priv, CVMX_L2C_MCIX_INT(mci), + l2c_rd(priv, CVMX_L2C_MCIX_INT(mci))); + debug("%-45s : (%d) 0x%08llx\n", "L2C_MCI_INT", mci, + l2c_rd(priv, CVMX_L2C_MCIX_INT(mci))); + } + + debug("%-45s : 0x%08llx\n", "LMC_INT", + lmc_rd(priv, CVMX_LMCX_INT(if_num))); +} + +static void lmc_scrambling(struct ddr_priv *priv) +{ + // Make sure scrambling is disabled during init... + union cvmx_lmcx_control ctrl; + union cvmx_lmcx_scramble_cfg0 lmc_scramble_cfg0; + union cvmx_lmcx_scramble_cfg1 lmc_scramble_cfg1; + union cvmx_lmcx_scramble_cfg2 lmc_scramble_cfg2; + union cvmx_lmcx_ns_ctl lmc_ns_ctl; + int use_scramble = 0; // default OFF + char *s; + + ctrl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num)); + lmc_scramble_cfg0.u64 = lmc_rd(priv, CVMX_LMCX_SCRAMBLE_CFG0(if_num)); + lmc_scramble_cfg1.u64 = lmc_rd(priv, CVMX_LMCX_SCRAMBLE_CFG1(if_num)); + lmc_scramble_cfg2.u64 = 0; // quiet compiler + if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) { + lmc_scramble_cfg2.u64 = + lmc_rd(priv, CVMX_LMCX_SCRAMBLE_CFG2(if_num)); + } + lmc_ns_ctl.u64 = lmc_rd(priv, CVMX_LMCX_NS_CTL(if_num)); + + s = lookup_env_ull(priv, "ddr_use_scramble"); + if (s) + use_scramble = simple_strtoull(s, NULL, 0); + + /* Generate random values if scrambling is needed */ + if (use_scramble) { + lmc_scramble_cfg0.u64 = cvmx_rng_get_random64(); + lmc_scramble_cfg1.u64 = cvmx_rng_get_random64(); + lmc_scramble_cfg2.u64 = cvmx_rng_get_random64(); + lmc_ns_ctl.s.ns_scramble_dis = 0; + lmc_ns_ctl.s.adr_offset = 0; + ctrl.s.scramble_ena = 1; + } + + s = lookup_env_ull(priv, "ddr_scramble_cfg0"); + if (s) { + lmc_scramble_cfg0.u64 = simple_strtoull(s, NULL, 0); + ctrl.s.scramble_ena = 1; + } + debug("%-45s : 0x%016llx\n", "LMC_SCRAMBLE_CFG0", + lmc_scramble_cfg0.u64); + + lmc_wr(priv, CVMX_LMCX_SCRAMBLE_CFG0(if_num), lmc_scramble_cfg0.u64); + + s = lookup_env_ull(priv, "ddr_scramble_cfg1"); + if (s) { + lmc_scramble_cfg1.u64 = simple_strtoull(s, NULL, 0); + ctrl.s.scramble_ena = 1; + } + debug("%-45s : 0x%016llx\n", "LMC_SCRAMBLE_CFG1", + lmc_scramble_cfg1.u64); + lmc_wr(priv, CVMX_LMCX_SCRAMBLE_CFG1(if_num), lmc_scramble_cfg1.u64); + + if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) { + s = lookup_env_ull(priv, "ddr_scramble_cfg2"); + if (s) { + lmc_scramble_cfg2.u64 = simple_strtoull(s, NULL, 0); + ctrl.s.scramble_ena = 1; + } + debug("%-45s : 0x%016llx\n", "LMC_SCRAMBLE_CFG2", + lmc_scramble_cfg1.u64); + lmc_wr(priv, CVMX_LMCX_SCRAMBLE_CFG2(if_num), + lmc_scramble_cfg2.u64); + } + + s = lookup_env_ull(priv, "ddr_ns_ctl"); + if (s) + lmc_ns_ctl.u64 = simple_strtoull(s, NULL, 0); + debug("%-45s : 0x%016llx\n", "LMC_NS_CTL", lmc_ns_ctl.u64); + lmc_wr(priv, CVMX_LMCX_NS_CTL(if_num), lmc_ns_ctl.u64); + + lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctrl.u64); +} + +struct rl_score { + u64 setting; + int score; +}; + +static union cvmx_lmcx_rlevel_rankx rl_rank __section(".data"); +static union cvmx_lmcx_rlevel_ctl rl_ctl __section(".data"); +static unsigned char rodt_ctl __section(".data"); + +static int rl_rodt_err __section(".data"); +static unsigned char rtt_nom __section(".data"); +static unsigned char rtt_idx __section(".data"); +static char min_rtt_nom_idx __section(".data"); +static char max_rtt_nom_idx __section(".data"); +static char min_rodt_ctl __section(".data"); +static char max_rodt_ctl __section(".data"); +static int rl_dbg_loops __section(".data"); +static unsigned char save_ddr2t __section(".data"); +static int rl_samples __section(".data"); +static char rl_compute __section(".data"); +static char saved_ddr__ptune __section(".data"); +static char saved_ddr__ntune __section(".data"); +static char rl_comp_offs __section(".data"); +static char saved_int_zqcs_dis __section(".data"); +static int max_adj_rl_del_inc __section(".data"); +static int print_nom_ohms __section(".data"); +static int rl_print __section(".data"); + +#ifdef ENABLE_HARDCODED_RLEVEL +static char part_number[21] __section(".data"); +#endif /* ENABLE_HARDCODED_RLEVEL */ + +struct perfect_counts { + u16 count[9][32]; // 8+ECC by 64 values + u32 mask[9]; // 8+ECC, bitmask of perfect delays +}; + +static struct perfect_counts rank_perf[4] __section(".data"); +static struct perfect_counts rodt_perfect_counts __section(".data"); +static int pbm_lowsum_limit __section(".data"); +// FIXME: PBM skip for RODT 240 and 34 +static u32 pbm_rodt_skip __section(".data"); + +// control rank majority processing +static int disable_rank_majority __section(".data"); + +// default to mask 11b ODDs for DDR4 (except 73xx), else DISABLE +// for DDR3 +static int enable_rldelay_bump __section(".data"); +static int rldelay_bump_incr __section(".data"); +static int disable_rlv_bump_this_byte __section(".data"); +static u64 value_mask __section(".data"); + +static struct rlevel_byte_data rl_byte[9] __section(".data"); +static int sample_loops __section(".data"); +static int max_samples __section(".data"); +static int rl_rank_errors __section(".data"); +static int rl_mask_err __section(".data"); +static int rl_nonseq_err __section(".data"); +static struct rlevel_bitmask rl_mask[9] __section(".data"); +static int rl_best_rank_score __section(".data"); + +static int rodt_row_skip_mask __section(".data"); + +static void rodt_loop(struct ddr_priv *priv, int rankx, struct rl_score + rl_score[RTT_NOM_OHMS_COUNT][RODT_OHMS_COUNT][4]) +{ + union cvmx_lmcx_comp_ctl2 cc2; + const int rl_separate_ab = 1; + int i; + + rl_best_rank_score = DEFAULT_BEST_RANK_SCORE; + rl_rodt_err = 0; + cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num)); + cc2.cn78xx.rodt_ctl = rodt_ctl; + lmc_wr(priv, CVMX_LMCX_COMP_CTL2(if_num), cc2.u64); + cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num)); + udelay(1); /* Give it a little time to take affect */ + if (rl_print > 1) { + debug("Read ODT_CTL : 0x%x (%d ohms)\n", + cc2.cn78xx.rodt_ctl, + imp_val->rodt_ohms[cc2.cn78xx.rodt_ctl]); + } + + memset(rl_byte, 0, sizeof(rl_byte)); + memset(&rodt_perfect_counts, 0, sizeof(rodt_perfect_counts)); + + // when iter RODT is the target RODT, take more samples... + max_samples = rl_samples; + if (rodt_ctl == default_rodt_ctl) + max_samples += rl_samples + 1; + + for (sample_loops = 0; sample_loops < max_samples; sample_loops++) { + int redoing_nonseq_errs = 0; + + rl_mask_err = 0; + + if (!(rl_separate_ab && spd_rdimm && + ddr_type == DDR4_DRAM)) { + /* Clear read-level delays */ + lmc_wr(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num), 0); + + /* read-leveling */ + oct3_ddr3_seq(priv, 1 << rankx, if_num, 1); + + do { + rl_rank.u64 = + lmc_rd(priv, + CVMX_LMCX_RLEVEL_RANKX(rankx, + if_num)); + } while (rl_rank.cn78xx.status != 3); + } + + rl_rank.u64 = + lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num)); + + // start bitmask interpretation block + + memset(rl_mask, 0, sizeof(rl_mask)); + + if (rl_separate_ab && spd_rdimm && ddr_type == DDR4_DRAM) { + union cvmx_lmcx_rlevel_rankx rl_rank_aside; + union cvmx_lmcx_modereg_params0 mp0; + + /* A-side */ + mp0.u64 = + lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num)); + mp0.s.mprloc = 0; /* MPR Page 0 Location 0 */ + lmc_wr(priv, + CVMX_LMCX_MODEREG_PARAMS0(if_num), + mp0.u64); + + /* Clear read-level delays */ + lmc_wr(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num), 0); + + /* read-leveling */ + oct3_ddr3_seq(priv, 1 << rankx, if_num, 1); + + do { + rl_rank.u64 = + lmc_rd(priv, + CVMX_LMCX_RLEVEL_RANKX(rankx, + if_num)); + } while (rl_rank.cn78xx.status != 3); + + rl_rank.u64 = + lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, + if_num)); + + rl_rank_aside.u64 = rl_rank.u64; + + rl_mask[0].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 0); + rl_mask[1].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 1); + rl_mask[2].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 2); + rl_mask[3].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 3); + rl_mask[8].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 8); + /* A-side complete */ + + /* B-side */ + mp0.u64 = + lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num)); + mp0.s.mprloc = 3; /* MPR Page 0 Location 3 */ + lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num), + mp0.u64); + + /* Clear read-level delays */ + lmc_wr(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num), 0); + + /* read-leveling */ + oct3_ddr3_seq(priv, 1 << rankx, if_num, 1); + + do { + rl_rank.u64 = + lmc_rd(priv, + CVMX_LMCX_RLEVEL_RANKX(rankx, + if_num)); + } while (rl_rank.cn78xx.status != 3); + + rl_rank.u64 = + lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, + if_num)); + + rl_mask[4].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 4); + rl_mask[5].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 5); + rl_mask[6].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 6); + rl_mask[7].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 7); + /* B-side complete */ + + upd_rl_rank(&rl_rank, 0, rl_rank_aside.s.byte0); + upd_rl_rank(&rl_rank, 1, rl_rank_aside.s.byte1); + upd_rl_rank(&rl_rank, 2, rl_rank_aside.s.byte2); + upd_rl_rank(&rl_rank, 3, rl_rank_aside.s.byte3); + /* ECC A-side */ + upd_rl_rank(&rl_rank, 8, rl_rank_aside.s.byte8); + + mp0.u64 = + lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num)); + mp0.s.mprloc = 0; /* MPR Page 0 Location 0 */ + lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num), + mp0.u64); + } + + /* + * Evaluate the quality of the read-leveling delays from the + * bitmasks. Also save off a software computed read-leveling + * mask that may be used later to qualify the delay results + * from Octeon. + */ + for (i = 0; i < (8 + ecc_ena); ++i) { + int bmerr; + + if (!(if_bytemask & (1 << i))) + continue; + if (!(rl_separate_ab && spd_rdimm && + ddr_type == DDR4_DRAM)) { + rl_mask[i].bm = + lmc_ddr3_rl_dbg_read(priv, if_num, i); + } + bmerr = validate_ddr3_rlevel_bitmask(&rl_mask[i], + ddr_type); + rl_mask[i].errs = bmerr; + rl_mask_err += bmerr; + // count only the "perfect" bitmasks + if (ddr_type == DDR4_DRAM && !bmerr) { + int delay; + // FIXME: for now, simple filtering: + // do NOT count PBMs for RODTs in skip mask + if ((1U << rodt_ctl) & pbm_rodt_skip) + continue; + // FIXME: could optimize this a bit? + delay = get_rl_rank(&rl_rank, i); + rank_perf[rankx].count[i][delay] += 1; + rank_perf[rankx].mask[i] |= + (1ULL << delay); + rodt_perfect_counts.count[i][delay] += 1; + rodt_perfect_counts.mask[i] |= (1ULL << delay); + } + } + + /* Set delays for unused bytes to match byte 0. */ + for (i = 0; i < 9; ++i) { + if (if_bytemask & (1 << i)) + continue; + upd_rl_rank(&rl_rank, i, rl_rank.s.byte0); + } + + /* + * Save a copy of the byte delays in physical + * order for sequential evaluation. + */ + unpack_rlevel_settings(if_bytemask, ecc_ena, rl_byte, rl_rank); + + redo_nonseq_errs: + + rl_nonseq_err = 0; + if (!disable_sequential_delay_check) { + for (i = 0; i < 9; ++i) + rl_byte[i].sqerrs = 0; + + if ((if_bytemask & 0xff) == 0xff) { + /* + * Evaluate delay sequence across the whole + * range of bytes for standard dimms. + */ + /* 1=RDIMM, 5=Mini-RDIMM */ + if (spd_dimm_type == 1 || spd_dimm_type == 5) { + int reg_adj_del = abs(rl_byte[4].delay - + rl_byte[5].delay); + + /* + * Registered dimm topology routes + * from the center. + */ + rl_nonseq_err += + nonseq_del(rl_byte, 0, + 3 + ecc_ena, + max_adj_rl_del_inc); + rl_nonseq_err += + nonseq_del(rl_byte, 5, + 7 + ecc_ena, + max_adj_rl_del_inc); + // byte 5 sqerrs never gets cleared + // for RDIMMs + rl_byte[5].sqerrs = 0; + if (reg_adj_del > 1) { + /* + * Assess proximity of bytes on + * opposite sides of register + */ + rl_nonseq_err += (reg_adj_del - + 1) * + RLEVEL_ADJACENT_DELAY_ERROR; + // update byte 5 error + rl_byte[5].sqerrs += + (reg_adj_del - 1) * + RLEVEL_ADJACENT_DELAY_ERROR; + } + } + + /* 2=UDIMM, 6=Mini-UDIMM */ + if (spd_dimm_type == 2 || spd_dimm_type == 6) { + /* + * Unbuffered dimm topology routes + * from end to end. + */ + rl_nonseq_err += nonseq_del(rl_byte, 0, + 7 + ecc_ena, + max_adj_rl_del_inc); + } + } else { + rl_nonseq_err += nonseq_del(rl_byte, 0, + 3 + ecc_ena, + max_adj_rl_del_inc); + } + } /* if (! disable_sequential_delay_check) */ + + rl_rank_errors = rl_mask_err + rl_nonseq_err; + + // print original sample here only if we are not really + // averaging or picking best + // also do not print if we were redoing the NONSEQ score + // for using COMPUTED + if (!redoing_nonseq_errs && rl_samples < 2) { + if (rl_print > 1) { + display_rl_bm(if_num, rankx, rl_mask, ecc_ena); + display_rl_bm_scores(if_num, rankx, rl_mask, + ecc_ena); + display_rl_seq_scores(if_num, rankx, rl_byte, + ecc_ena); + } + display_rl_with_score(if_num, rl_rank, rankx, + rl_rank_errors); + } + + if (rl_compute) { + if (!redoing_nonseq_errs) { + /* Recompute the delays based on the bitmask */ + for (i = 0; i < (8 + ecc_ena); ++i) { + if (!(if_bytemask & (1 << i))) + continue; + + upd_rl_rank(&rl_rank, i, + compute_ddr3_rlevel_delay( + rl_mask[i].mstart, + rl_mask[i].width, + rl_ctl)); + } + + /* + * Override the copy of byte delays with the + * computed results. + */ + unpack_rlevel_settings(if_bytemask, ecc_ena, + rl_byte, rl_rank); + + redoing_nonseq_errs = 1; + goto redo_nonseq_errs; + + } else { + /* + * now print this if already printed the + * original sample + */ + if (rl_samples < 2 || rl_print) { + display_rl_with_computed(if_num, + rl_rank, rankx, + rl_rank_errors); + } + } + } /* if (rl_compute) */ + + // end bitmask interpretation block + + // if it is a better (lower) score, then keep it + if (rl_rank_errors < rl_best_rank_score) { + rl_best_rank_score = rl_rank_errors; + + // save the new best delays and best errors + for (i = 0; i < (8 + ecc_ena); ++i) { + rl_byte[i].best = rl_byte[i].delay; + rl_byte[i].bestsq = rl_byte[i].sqerrs; + // save bitmasks and their scores as well + // xlate UNPACKED index to PACKED index to + // get from rl_mask + rl_byte[i].bm = rl_mask[XUP(i, !!ecc_ena)].bm; + rl_byte[i].bmerrs = + rl_mask[XUP(i, !!ecc_ena)].errs; + } + } + + rl_rodt_err += rl_rank_errors; + } + + /* We recorded the best score across the averaging loops */ + rl_score[rtt_nom][rodt_ctl][rankx].score = rl_best_rank_score; + + /* + * Restore the delays from the best fields that go with the best + * score + */ + for (i = 0; i < 9; ++i) { + rl_byte[i].delay = rl_byte[i].best; + rl_byte[i].sqerrs = rl_byte[i].bestsq; + } + + rl_rank.u64 = lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num)); + + pack_rlevel_settings(if_bytemask, ecc_ena, rl_byte, &rl_rank); + + if (rl_samples > 1) { + // restore the "best" bitmasks and their scores for printing + for (i = 0; i < 9; ++i) { + if ((if_bytemask & (1 << i)) == 0) + continue; + // xlate PACKED index to UNPACKED index to get from + // rl_byte + rl_mask[i].bm = rl_byte[XPU(i, !!ecc_ena)].bm; + rl_mask[i].errs = rl_byte[XPU(i, !!ecc_ena)].bmerrs; + } + + // maybe print bitmasks/scores here + if (rl_print > 1) { + display_rl_bm(if_num, rankx, rl_mask, ecc_ena); + display_rl_bm_scores(if_num, rankx, rl_mask, ecc_ena); + display_rl_seq_scores(if_num, rankx, rl_byte, ecc_ena); + + display_rl_with_rodt(if_num, rl_rank, rankx, + rl_score[rtt_nom][rodt_ctl][rankx].score, + print_nom_ohms, + imp_val->rodt_ohms[rodt_ctl], + WITH_RODT_BESTSCORE); + + debug("-----------\n"); + } + } + + rl_score[rtt_nom][rodt_ctl][rankx].setting = rl_rank.u64; + + // print out the PBMs for the current RODT + if (ddr_type == DDR4_DRAM && rl_print > 1) { // verbosity? + // FIXME: change verbosity level after debug complete... + + for (i = 0; i < 9; i++) { + u64 temp_mask; + int num_values; + + // FIXME: PBM skip for RODTs in mask + if ((1U << rodt_ctl) & pbm_rodt_skip) + continue; + + temp_mask = rodt_perfect_counts.mask[i]; + num_values = __builtin_popcountll(temp_mask); + i = __builtin_ffsll(temp_mask) - 1; + + debug("N%d.LMC%d.R%d: PERFECT: RODT %3d: Byte %d: mask 0x%02llx (%d): ", + node, if_num, rankx, + imp_val->rodt_ohms[rodt_ctl], + i, temp_mask >> i, num_values); + + while (temp_mask != 0) { + i = __builtin_ffsll(temp_mask) - 1; + debug("%2d(%2d) ", i, + rodt_perfect_counts.count[i][i]); + temp_mask &= ~(1UL << i); + } /* while (temp_mask != 0) */ + debug("\n"); + } + } +} + +static void rank_major_loop(struct ddr_priv *priv, int rankx, struct rl_score + rl_score[RTT_NOM_OHMS_COUNT][RODT_OHMS_COUNT][4]) +{ + /* Start with an arbitrarily high score */ + int best_rank_score = DEFAULT_BEST_RANK_SCORE; + int best_rank_rtt_nom = 0; + int best_rank_ctl = 0; + int best_rank_ohms = 0; + int best_rankx = 0; + int dimm_rank_mask; + int max_rank_score; + union cvmx_lmcx_rlevel_rankx saved_rl_rank; + int next_ohms; + int orankx; + int next_score = 0; + int best_byte, new_byte, temp_byte, orig_best_byte; + int rank_best_bytes[9]; + int byte_sh; + int avg_byte; + int avg_diff; + int i; + + if (!(rank_mask & (1 << rankx))) + return; + + // some of the rank-related loops below need to operate only on + // the ranks of a single DIMM, + // so create a mask for their use here + if (num_ranks == 4) { + dimm_rank_mask = rank_mask; // should be 1111 + } else { + dimm_rank_mask = rank_mask & 3; // should be 01 or 11 + if (rankx >= 2) { + // doing a rank on the second DIMM, should be + // 0100 or 1100 + dimm_rank_mask <<= 2; + } + } + debug("DIMM rank mask: 0x%x, rank mask: 0x%x, rankx: %d\n", + dimm_rank_mask, rank_mask, rankx); + + // this is the start of the BEST ROW SCORE LOOP + + for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx; ++rtt_idx) { + rtt_nom = imp_val->rtt_nom_table[rtt_idx]; + + debug("N%d.LMC%d.R%d: starting RTT_NOM %d (%d)\n", + node, if_num, rankx, rtt_nom, + imp_val->rtt_nom_ohms[rtt_nom]); + + for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl; + --rodt_ctl) { + next_ohms = imp_val->rodt_ohms[rodt_ctl]; + + // skip RODT rows in mask, but *NOT* rows with too + // high a score; + // we will not use the skipped ones for printing or + // evaluating, but we need to allow all the + // non-skipped ones to be candidates for "best" + if (((1 << rodt_ctl) & rodt_row_skip_mask) != 0) { + debug("N%d.LMC%d.R%d: SKIPPING rodt:%d (%d) with rank_score:%d\n", + node, if_num, rankx, rodt_ctl, + next_ohms, next_score); + continue; + } + + // this is ROFFIX-0528 + for (orankx = 0; orankx < dimm_count * 4; orankx++) { + // stay on the same DIMM + if (!(dimm_rank_mask & (1 << orankx))) + continue; + + next_score = rl_score[rtt_nom][rodt_ctl][orankx].score; + + // always skip a higher score + if (next_score > best_rank_score) + continue; + + // if scores are equal + if (next_score == best_rank_score) { + // always skip lower ohms + if (next_ohms < best_rank_ohms) + continue; + + // if same ohms + if (next_ohms == best_rank_ohms) { + // always skip the other rank(s) + if (orankx != rankx) + continue; + } + // else next_ohms are greater, + // always choose it + } + // else next_score is less than current best, + // so always choose it + debug("N%d.LMC%d.R%d: new best score: rank %d, rodt %d(%3d), new best %d, previous best %d(%d)\n", + node, if_num, rankx, orankx, rodt_ctl, next_ohms, next_score, + best_rank_score, best_rank_ohms); + best_rank_score = next_score; + best_rank_rtt_nom = rtt_nom; + //best_rank_nom_ohms = rtt_nom_ohms; + best_rank_ctl = rodt_ctl; + best_rank_ohms = next_ohms; + best_rankx = orankx; + rl_rank.u64 = + rl_score[rtt_nom][rodt_ctl][orankx].setting; + } + } + } + + // this is the end of the BEST ROW SCORE LOOP + + // DANGER, Will Robinson!! Abort now if we did not find a best + // score at all... + if (best_rank_score == DEFAULT_BEST_RANK_SCORE) { + printf("N%d.LMC%d.R%d: WARNING: no best rank score found - resetting node...\n", + node, if_num, rankx); + mdelay(500); + do_reset(NULL, 0, 0, NULL); + } + + // FIXME: relative now, but still arbitrary... + max_rank_score = best_rank_score; + if (ddr_type == DDR4_DRAM) { + // halve the range if 2 DIMMs unless they are single rank... + max_rank_score += (MAX_RANK_SCORE_LIMIT / ((num_ranks > 1) ? + dimm_count : 1)); + } else { + // Since DDR3 typically has a wider score range, + // keep more of them always + max_rank_score += MAX_RANK_SCORE_LIMIT; + } + + if (!ecc_ena) { + /* ECC is not used */ + rl_rank.s.byte8 = rl_rank.s.byte0; + } + + // at the end, write the best row settings to the current rank + lmc_wr(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num), rl_rank.u64); + rl_rank.u64 = lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num)); + + saved_rl_rank.u64 = rl_rank.u64; + + // this is the start of the PRINT LOOP + int pass; + + // for pass==0, print current rank, pass==1 print other rank(s) + // this is done because we want to show each ranks RODT values + // together, not interlaced + // keep separates for ranks - pass=0 target rank, pass=1 other + // rank on DIMM + int mask_skipped[2] = {0, 0}; + int score_skipped[2] = {0, 0}; + int selected_rows[2] = {0, 0}; + int zero_scores[2] = {0, 0}; + for (pass = 0; pass < 2; pass++) { + for (orankx = 0; orankx < dimm_count * 4; orankx++) { + // stay on the same DIMM + if (!(dimm_rank_mask & (1 << orankx))) + continue; + + if ((pass == 0 && orankx != rankx) || + (pass != 0 && orankx == rankx)) + continue; + + for (rtt_idx = min_rtt_nom_idx; + rtt_idx <= max_rtt_nom_idx; ++rtt_idx) { + rtt_nom = imp_val->rtt_nom_table[rtt_idx]; + if (dyn_rtt_nom_mask == 0) { + print_nom_ohms = -1; + } else { + print_nom_ohms = + imp_val->rtt_nom_ohms[rtt_nom]; + } + + // cycle through all the RODT values... + for (rodt_ctl = max_rodt_ctl; + rodt_ctl >= min_rodt_ctl; --rodt_ctl) { + union cvmx_lmcx_rlevel_rankx + temp_rl_rank; + int temp_score = + rl_score[rtt_nom][rodt_ctl][orankx].score; + int skip_row; + + temp_rl_rank.u64 = + rl_score[rtt_nom][rodt_ctl][orankx].setting; + + // skip RODT rows in mask, or rows + // with too high a score; + // we will not use them for printing + // or evaluating... + if ((1 << rodt_ctl) & + rodt_row_skip_mask) { + skip_row = WITH_RODT_SKIPPING; + ++mask_skipped[pass]; + } else if (temp_score > + max_rank_score) { + skip_row = WITH_RODT_SKIPPING; + ++score_skipped[pass]; + } else { + skip_row = WITH_RODT_BLANK; + ++selected_rows[pass]; + if (temp_score == 0) + ++zero_scores[pass]; + } + + // identify and print the BEST ROW + // when it comes up + if (skip_row == WITH_RODT_BLANK && + best_rankx == orankx && + best_rank_rtt_nom == rtt_nom && + best_rank_ctl == rodt_ctl) + skip_row = WITH_RODT_BESTROW; + + if (rl_print) { + display_rl_with_rodt(if_num, + temp_rl_rank, orankx, temp_score, + print_nom_ohms, + imp_val->rodt_ohms[rodt_ctl], + skip_row); + } + } + } + } + } + debug("N%d.LMC%d.R%d: RLROWS: selected %d+%d, zero_scores %d+%d, mask_skipped %d+%d, score_skipped %d+%d\n", + node, if_num, rankx, selected_rows[0], selected_rows[1], + zero_scores[0], zero_scores[1], mask_skipped[0], mask_skipped[1], + score_skipped[0], score_skipped[1]); + // this is the end of the PRINT LOOP + + // now evaluate which bytes need adjusting + // collect the new byte values; first init with current best for + // neighbor use + for (i = 0, byte_sh = 0; i < 8 + ecc_ena; i++, byte_sh += 6) { + rank_best_bytes[i] = (int)(rl_rank.u64 >> byte_sh) & + RLEVEL_BYTE_MSK; + } + + // this is the start of the BEST BYTE LOOP + + for (i = 0, byte_sh = 0; i < 8 + ecc_ena; i++, byte_sh += 6) { + int sum = 0, count = 0; + int count_less = 0, count_same = 0, count_more = 0; + int count_byte; // save the value we counted around + // for rank majority use + int rank_less = 0, rank_same = 0, rank_more = 0; + int neighbor; + int neigh_byte; + + best_byte = rank_best_bytes[i]; + orig_best_byte = rank_best_bytes[i]; + + // this is the start of the BEST BYTE AVERAGING LOOP + + // validate the initial "best" byte by looking at the + // average of the unskipped byte-column entries + // we want to do this before we go further, so we can + // try to start with a better initial value + // this is the so-called "BESTBUY" patch set + + for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx; + ++rtt_idx) { + rtt_nom = imp_val->rtt_nom_table[rtt_idx]; + + for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl; + --rodt_ctl) { + union cvmx_lmcx_rlevel_rankx temp_rl_rank; + int temp_score; + + // average over all the ranks + for (orankx = 0; orankx < dimm_count * 4; + orankx++) { + // stay on the same DIMM + if (!(dimm_rank_mask & (1 << orankx))) + continue; + + temp_score = + rl_score[rtt_nom][rodt_ctl][orankx].score; + // skip RODT rows in mask, or rows with + // too high a score; + // we will not use them for printing or + // evaluating... + + if (!((1 << rodt_ctl) & + rodt_row_skip_mask) && + temp_score <= max_rank_score) { + temp_rl_rank.u64 = + rl_score[rtt_nom][rodt_ctl][orankx].setting; + temp_byte = + (int)(temp_rl_rank.u64 >> byte_sh) & + RLEVEL_BYTE_MSK; + sum += temp_byte; + count++; + } + } + } + } + + // this is the end of the BEST BYTE AVERAGING LOOP + + // FIXME: validate count and sum?? + avg_byte = (int)divide_nint(sum, count); + avg_diff = best_byte - avg_byte; + new_byte = best_byte; + if (avg_diff != 0) { + // bump best up/dn by 1, not necessarily all the + // way to avg + new_byte = best_byte + ((avg_diff > 0) ? -1 : 1); + } + + if (rl_print) { + debug("N%d.LMC%d.R%d: START: Byte %d: best %d is different by %d from average %d, using %d.\n", + node, if_num, rankx, + i, best_byte, avg_diff, avg_byte, new_byte); + } + best_byte = new_byte; + count_byte = new_byte; // save the value we will count around + + // At this point best_byte is either: + // 1. the original byte-column value from the best scoring + // RODT row, OR + // 2. that value bumped toward the average of all the + // byte-column values + // + // best_byte will not change from here on... + + // this is the start of the BEST BYTE COUNTING LOOP + + // NOTE: we do this next loop separately from above, because + // we count relative to "best_byte" + // which may have been modified by the above averaging + // operation... + + for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx; + ++rtt_idx) { + rtt_nom = imp_val->rtt_nom_table[rtt_idx]; + + for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl; + --rodt_ctl) { + union cvmx_lmcx_rlevel_rankx temp_rl_rank; + int temp_score; + + for (orankx = 0; orankx < dimm_count * 4; + orankx++) { // count over all the ranks + // stay on the same DIMM + if (!(dimm_rank_mask & (1 << orankx))) + continue; + + temp_score = + rl_score[rtt_nom][rodt_ctl][orankx].score; + // skip RODT rows in mask, or rows + // with too high a score; + // we will not use them for printing + // or evaluating... + if (((1 << rodt_ctl) & + rodt_row_skip_mask) || + temp_score > max_rank_score) + continue; + + temp_rl_rank.u64 = + rl_score[rtt_nom][rodt_ctl][orankx].setting; + temp_byte = (temp_rl_rank.u64 >> + byte_sh) & RLEVEL_BYTE_MSK; + + if (temp_byte == 0) + ; // do not count it if illegal + else if (temp_byte == best_byte) + count_same++; + else if (temp_byte == best_byte - 1) + count_less++; + else if (temp_byte == best_byte + 1) + count_more++; + // else do not count anything more + // than 1 away from the best + + // no rank counting if disabled + if (disable_rank_majority) + continue; + + // FIXME? count is relative to + // best_byte; should it be rank-based? + // rank counts only on main rank + if (orankx != rankx) + continue; + else if (temp_byte == best_byte) + rank_same++; + else if (temp_byte == best_byte - 1) + rank_less++; + else if (temp_byte == best_byte + 1) + rank_more++; + } + } + } + + if (rl_print) { + debug("N%d.LMC%d.R%d: COUNT: Byte %d: orig %d now %d, more %d same %d less %d (%d/%d/%d)\n", + node, if_num, rankx, + i, orig_best_byte, best_byte, + count_more, count_same, count_less, + rank_more, rank_same, rank_less); + } + + // this is the end of the BEST BYTE COUNTING LOOP + + // choose the new byte value + // we need to check that there is no gap greater than 2 + // between adjacent bytes (adjacency depends on DIMM type) + // use the neighbor value to help decide + // initially, the rank_best_bytes[] will contain values from + // the chosen lowest score rank + new_byte = 0; + + // neighbor is index-1 unless we are index 0 or index 8 (ECC) + neighbor = (i == 8) ? 3 : ((i == 0) ? 1 : i - 1); + neigh_byte = rank_best_bytes[neighbor]; + + // can go up or down or stay the same, so look at a numeric + // average to help + new_byte = (int)divide_nint(((count_more * (best_byte + 1)) + + (count_same * (best_byte + 0)) + + (count_less * (best_byte - 1))), + max(1, (count_more + count_same + + count_less))); + + // use neighbor to help choose with average + if (i > 0 && (abs(neigh_byte - new_byte) > 2) && + !disable_sequential_delay_check) { + // but not for byte 0 + int avg_pick = new_byte; + + if ((new_byte - best_byte) != 0) { + // back to best, average did not get better + new_byte = best_byte; + } else { + // avg was the same, still too far, now move + // it towards the neighbor + new_byte += (neigh_byte > new_byte) ? 1 : -1; + } + + if (rl_print) { + debug("N%d.LMC%d.R%d: AVERAGE: Byte %d: neighbor %d too different %d from average %d, picking %d.\n", + node, if_num, rankx, + i, neighbor, neigh_byte, avg_pick, + new_byte); + } + } else { + // NOTE: + // For now, we let the neighbor processing above trump + // the new simple majority processing here. + // This is mostly because we have seen no smoking gun + // for a neighbor bad choice (yet?). + // Also note that we will ALWAYS be using byte 0 + // majority, because of the if clause above. + + // majority is dependent on the counts, which are + // relative to best_byte, so start there + int maj_byte = best_byte; + int rank_maj; + int rank_sum; + + if (count_more > count_same && + count_more > count_less) { + maj_byte++; + } else if (count_less > count_same && + count_less > count_more) { + maj_byte--; + } + + if (maj_byte != new_byte) { + // print only when majority choice is + // different from average + if (rl_print) { + debug("N%d.LMC%d.R%d: MAJORTY: Byte %d: picking majority of %d over average %d.\n", + node, if_num, rankx, i, maj_byte, + new_byte); + } + new_byte = maj_byte; + } else { + if (rl_print) { + debug("N%d.LMC%d.R%d: AVERAGE: Byte %d: picking average of %d.\n", + node, if_num, rankx, i, new_byte); + } + } + + if (!disable_rank_majority) { + // rank majority is dependent on the rank + // counts, which are relative to best_byte, + // so start there, and adjust according to the + // rank counts majority + rank_maj = best_byte; + if (rank_more > rank_same && + rank_more > rank_less) { + rank_maj++; + } else if (rank_less > rank_same && + rank_less > rank_more) { + rank_maj--; + } + rank_sum = rank_more + rank_same + rank_less; + + // now, let rank majority possibly rule over + // the current new_byte however we got it + if (rank_maj != new_byte) { // only if different + // Here is where we decide whether to + // completely apply RANK_MAJORITY or not + // ignore if less than + if (rank_maj < new_byte) { + if (rl_print) { + debug("N%d.LMC%d.R%d: RANKMAJ: Byte %d: LESS: NOT using %d over %d.\n", + node, if_num, + rankx, i, + rank_maj, + new_byte); + } + } else { + // For the moment, we do it + // ONLY when running 2-slot + // configs + // OR when rank_sum is big + // enough + if (dimm_count > 1 || + rank_sum > 2) { + // print only when rank + // majority choice is + // selected + if (rl_print) { + debug("N%d.LMC%d.R%d: RANKMAJ: Byte %d: picking %d over %d.\n", + node, + if_num, + rankx, + i, + rank_maj, + new_byte); + } + new_byte = rank_maj; + } else { + // FIXME: print some + // info when we could + // have chosen RANKMAJ + // but did not + if (rl_print) { + debug("N%d.LMC%d.R%d: RANKMAJ: Byte %d: NOT using %d over %d (best=%d,sum=%d).\n", + node, + if_num, + rankx, + i, + rank_maj, + new_byte, + best_byte, + rank_sum); + } + } + } + } + } /* if (!disable_rank_majority) */ + } + // one last check: + // if new_byte is still count_byte, BUT there was no count + // for that value, DO SOMETHING!!! + // FIXME: go back to original best byte from the best row + if (new_byte == count_byte && count_same == 0) { + new_byte = orig_best_byte; + if (rl_print) { + debug("N%d.LMC%d.R%d: FAILSAF: Byte %d: going back to original %d.\n", + node, if_num, rankx, i, new_byte); + } + } + // Look at counts for "perfect" bitmasks (PBMs) if we had + // any for this byte-lane. + // Remember, we only counted for DDR4, so zero means none + // or DDR3, and we bypass this... + value_mask = rank_perf[rankx].mask[i]; + disable_rlv_bump_this_byte = 0; + + if (value_mask != 0 && rl_ctl.cn78xx.offset == 1) { + int i, delay_count, delay_max = 0, del_val = 0; + int num_values = __builtin_popcountll(value_mask); + int sum_counts = 0; + u64 temp_mask = value_mask; + + disable_rlv_bump_this_byte = 1; + i = __builtin_ffsll(temp_mask) - 1; + if (rl_print) + debug("N%d.LMC%d.R%d: PERFECT: Byte %d: OFF1: mask 0x%02llx (%d): ", + node, if_num, rankx, i, value_mask >> i, + num_values); + + while (temp_mask != 0) { + i = __builtin_ffsll(temp_mask) - 1; + delay_count = rank_perf[rankx].count[i][i]; + sum_counts += delay_count; + if (rl_print) + debug("%2d(%2d) ", i, delay_count); + if (delay_count >= delay_max) { + delay_max = delay_count; + del_val = i; + } + temp_mask &= ~(1UL << i); + } /* while (temp_mask != 0) */ + + // if sum_counts is small, just use NEW_BYTE + if (sum_counts < pbm_lowsum_limit) { + if (rl_print) + debug(": LOWSUM (%2d), choose ORIG ", + sum_counts); + del_val = new_byte; + delay_max = rank_perf[rankx].count[i][del_val]; + } + + // finish printing here... + if (rl_print) { + debug(": USING %2d (%2d) D%d\n", del_val, + delay_max, disable_rlv_bump_this_byte); + } + + new_byte = del_val; // override with best PBM choice + + } else if ((value_mask != 0) && (rl_ctl.cn78xx.offset == 2)) { + // if (value_mask != 0) { + int i, delay_count, del_val; + int num_values = __builtin_popcountll(value_mask); + int sum_counts = 0; + u64 temp_mask = value_mask; + + i = __builtin_ffsll(temp_mask) - 1; + if (rl_print) + debug("N%d.LMC%d.R%d: PERFECT: Byte %d: mask 0x%02llx (%d): ", + node, if_num, rankx, i, value_mask >> i, + num_values); + while (temp_mask != 0) { + i = __builtin_ffsll(temp_mask) - 1; + delay_count = rank_perf[rankx].count[i][i]; + sum_counts += delay_count; + if (rl_print) + debug("%2d(%2d) ", i, delay_count); + temp_mask &= ~(1UL << i); + } /* while (temp_mask != 0) */ + + del_val = __builtin_ffsll(value_mask) - 1; + delay_count = + rank_perf[rankx].count[i][del_val]; + + // overkill, normally only 1-4 bits + i = (value_mask >> del_val) & 0x1F; + + // if sum_counts is small, treat as special and use + // NEW_BYTE + if (sum_counts < pbm_lowsum_limit) { + if (rl_print) + debug(": LOWSUM (%2d), choose ORIG", + sum_counts); + i = 99; // SPECIAL case... + } + + switch (i) { + case 0x01 /* 00001b */: + // allow BUMP + break; + + case 0x13 /* 10011b */: + case 0x0B /* 01011b */: + case 0x03 /* 00011b */: + del_val += 1; // take the second + disable_rlv_bump_this_byte = 1; // allow no BUMP + break; + + case 0x0D /* 01101b */: + case 0x05 /* 00101b */: + // test count of lowest and all + if (delay_count >= 5 || sum_counts <= 5) + del_val += 1; // take the hole + else + del_val += 2; // take the next set + disable_rlv_bump_this_byte = 1; // allow no BUMP + break; + + case 0x0F /* 01111b */: + case 0x17 /* 10111b */: + case 0x07 /* 00111b */: + del_val += 1; // take the second + if (delay_count < 5) { // lowest count is small + int second = + rank_perf[rankx].count[i][del_val]; + int third = + rank_perf[rankx].count[i][del_val + 1]; + // test if middle is more than 1 OR + // top is more than 1; + // this means if they are BOTH 1, + // then we keep the second... + if (second > 1 || third > 1) { + // if middle is small OR top + // is large + if (second < 5 || + third > 1) { + // take the top + del_val += 1; + if (rl_print) + debug(": TOP7 "); + } + } + } + disable_rlv_bump_this_byte = 1; // allow no BUMP + break; + + default: // all others... + if (rl_print) + debug(": ABNORMAL, choose ORIG"); + + case 99: // special + // FIXME: choose original choice? + del_val = new_byte; + disable_rlv_bump_this_byte = 1; // allow no BUMP + break; + } + delay_count = + rank_perf[rankx].count[i][del_val]; + + // finish printing here... + if (rl_print) + debug(": USING %2d (%2d) D%d\n", del_val, + delay_count, disable_rlv_bump_this_byte); + new_byte = del_val; // override with best PBM choice + } else { + if (ddr_type == DDR4_DRAM) { // only report when DDR4 + // FIXME: remove or increase VBL for this + // output... + if (rl_print) + debug("N%d.LMC%d.R%d: PERFECT: Byte %d: ZERO PBMs, USING %d\n", + node, if_num, rankx, i, + new_byte); + // prevent ODD bump, rely on original + disable_rlv_bump_this_byte = 1; + } + } /* if (value_mask != 0) */ + + // optionally bump the delay value + if (enable_rldelay_bump && !disable_rlv_bump_this_byte) { + if ((new_byte & enable_rldelay_bump) == + enable_rldelay_bump) { + int bump_value = new_byte + rldelay_bump_incr; + + if (rl_print) { + debug("N%d.LMC%d.R%d: RLVBUMP: Byte %d: CHANGING %d to %d (%s)\n", + node, if_num, rankx, i, + new_byte, bump_value, + (value_mask & + (1 << bump_value)) ? + "PBM" : "NOPBM"); + } + new_byte = bump_value; + } + } + + // last checks for count-related purposes + if (new_byte == best_byte && count_more > 0 && + count_less == 0) { + // we really should take best_byte + 1 + if (rl_print) { + debug("N%d.LMC%d.R%d: CADJMOR: Byte %d: CHANGING %d to %d\n", + node, if_num, rankx, i, + new_byte, best_byte + 1); + new_byte = best_byte + 1; + } + } else if ((new_byte < best_byte) && (count_same > 0)) { + // we really should take best_byte + if (rl_print) { + debug("N%d.LMC%d.R%d: CADJSAM: Byte %d: CHANGING %d to %d\n", + node, if_num, rankx, i, + new_byte, best_byte); + new_byte = best_byte; + } + } else if (new_byte > best_byte) { + if ((new_byte == (best_byte + 1)) && + count_more == 0 && count_less > 0) { + // we really should take best_byte + if (rl_print) { + debug("N%d.LMC%d.R%d: CADJLE1: Byte %d: CHANGING %d to %d\n", + node, if_num, rankx, i, + new_byte, best_byte); + new_byte = best_byte; + } + } else if ((new_byte >= (best_byte + 2)) && + ((count_more > 0) || (count_same > 0))) { + if (rl_print) { + debug("N%d.LMC%d.R%d: CADJLE2: Byte %d: CHANGING %d to %d\n", + node, if_num, rankx, i, + new_byte, best_byte + 1); + new_byte = best_byte + 1; + } + } + } + + if (rl_print) { + debug("N%d.LMC%d.R%d: SUMMARY: Byte %d: orig %d now %d, more %d same %d less %d, using %d\n", + node, if_num, rankx, i, orig_best_byte, + best_byte, count_more, count_same, count_less, + new_byte); + } + + // update the byte with the new value (NOTE: orig value in + // the CSR may not be current "best") + upd_rl_rank(&rl_rank, i, new_byte); + + // save new best for neighbor use + rank_best_bytes[i] = new_byte; + } /* for (i = 0; i < 8+ecc_ena; i++) */ + + ////////////////// this is the end of the BEST BYTE LOOP + + if (saved_rl_rank.u64 != rl_rank.u64) { + lmc_wr(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num), + rl_rank.u64); + rl_rank.u64 = lmc_rd(priv, + CVMX_LMCX_RLEVEL_RANKX(rankx, if_num)); + debug("Adjusting Read-Leveling per-RANK settings.\n"); + } else { + debug("Not Adjusting Read-Leveling per-RANK settings.\n"); + } + display_rl_with_final(if_num, rl_rank, rankx); + + // FIXME: does this help make the output a little easier to focus? + if (rl_print > 0) + debug("-----------\n"); + +#define RLEVEL_RANKX_EXTRAS_INCR 0 + // if there are unused entries to be filled + if ((rank_mask & 0x0f) != 0x0f) { + // copy the current rank + union cvmx_lmcx_rlevel_rankx temp_rl_rank = rl_rank; + + if (rankx < 3) { +#if RLEVEL_RANKX_EXTRAS_INCR > 0 + int byte, delay; + + // modify the copy in prep for writing to empty slot(s) + for (byte = 0; byte < 9; byte++) { + delay = get_rl_rank(&temp_rl_rank, byte) + + RLEVEL_RANKX_EXTRAS_INCR; + if (delay > RLEVEL_BYTE_MSK) + delay = RLEVEL_BYTE_MSK; + upd_rl_rank(&temp_rl_rank, byte, delay); + } +#endif + + // if rank 0, write rank 1 and rank 2 here if empty + if (rankx == 0) { + // check that rank 1 is empty + if (!(rank_mask & (1 << 1))) { + debug("N%d.LMC%d.R%d: writing RLEVEL_RANK unused entry R%d.\n", + node, if_num, rankx, 1); + lmc_wr(priv, + CVMX_LMCX_RLEVEL_RANKX(1, + if_num), + temp_rl_rank.u64); + } + + // check that rank 2 is empty + if (!(rank_mask & (1 << 2))) { + debug("N%d.LMC%d.R%d: writing RLEVEL_RANK unused entry R%d.\n", + node, if_num, rankx, 2); + lmc_wr(priv, + CVMX_LMCX_RLEVEL_RANKX(2, + if_num), + temp_rl_rank.u64); + } + } + + // if ranks 0, 1 or 2, write rank 3 here if empty + // check that rank 3 is empty + if (!(rank_mask & (1 << 3))) { + debug("N%d.LMC%d.R%d: writing RLEVEL_RANK unused entry R%d.\n", + node, if_num, rankx, 3); + lmc_wr(priv, CVMX_LMCX_RLEVEL_RANKX(3, if_num), + temp_rl_rank.u64); + } + } + } +} + +static void lmc_read_leveling(struct ddr_priv *priv) +{ + struct rl_score rl_score[RTT_NOM_OHMS_COUNT][RODT_OHMS_COUNT][4]; + union cvmx_lmcx_control ctl; + union cvmx_lmcx_config cfg; + int rankx; + char *s; + int i; + + /* + * 4.8.10 LMC Read Leveling + * + * LMC supports an automatic read-leveling separately per byte-lane + * using the DDR3 multipurpose register predefined pattern for system + * calibration defined in the JEDEC DDR3 specifications. + * + * All of DDR PLL, LMC CK, and LMC DRESET, and early LMC initializations + * must be completed prior to starting this LMC read-leveling sequence. + * + * Software could simply write the desired read-leveling values into + * LMC(0)_RLEVEL_RANK(0..3). This section describes a sequence that uses + * LMC's autoread-leveling capabilities. + * + * When LMC does the read-leveling sequence for a rank, it first enables + * the DDR3 multipurpose register predefined pattern for system + * calibration on the selected DRAM rank via a DDR3 MR3 write, then + * executes 64 RD operations at different internal delay settings, then + * disables the predefined pattern via another DDR3 MR3 write + * operation. LMC determines the pass or fail of each of the 64 settings + * independently for each byte lane, then writes appropriate + * LMC(0)_RLEVEL_RANK(0..3)[BYTE*] values for the rank. + * + * After read-leveling for a rank, software can read the 64 pass/fail + * indications for one byte lane via LMC(0)_RLEVEL_DBG[BITMASK]. + * Software can observe all pass/fail results for all byte lanes in a + * rank via separate read-leveling sequences on the rank with different + * LMC(0)_RLEVEL_CTL[BYTE] values. + * + * The 64 pass/fail results will typically have failures for the low + * delays, followed by a run of some passing settings, followed by more + * failures in the remaining high delays. LMC sets + * LMC(0)_RLEVEL_RANK(0..3)[BYTE*] to one of the passing settings. + * First, LMC selects the longest run of successes in the 64 results. + * (In the unlikely event that there is more than one longest run, LMC + * selects the first one.) Then if LMC(0)_RLEVEL_CTL[OFFSET_EN] = 1 and + * the selected run has more than LMC(0)_RLEVEL_CTL[OFFSET] successes, + * LMC selects the last passing setting in the run minus + * LMC(0)_RLEVEL_CTL[OFFSET]. Otherwise LMC selects the middle setting + * in the run (rounding earlier when necessary). We expect the + * read-leveling sequence to produce good results with the reset values + * LMC(0)_RLEVEL_CTL [OFFSET_EN]=1, LMC(0)_RLEVEL_CTL[OFFSET] = 2. + * + * The read-leveling sequence has the following steps: + * + * 1. Select desired LMC(0)_RLEVEL_CTL[OFFSET_EN,OFFSET,BYTE] settings. + * Do the remaining substeps 2-4 separately for each rank i with + * attached DRAM. + * + * 2. Without changing any other fields in LMC(0)_CONFIG, + * + * o write LMC(0)_SEQ_CTL[SEQ_SEL] to select read-leveling + * + * o write LMC(0)_CONFIG[RANKMASK] = (1 << i) + * + * o write LMC(0)_SEQ_CTL[INIT_START] = 1 + * + * This initiates the previously-described read-leveling. + * + * 3. Wait until LMC(0)_RLEVEL_RANKi[STATUS] != 2 + * + * LMC will have updated LMC(0)_RLEVEL_RANKi[BYTE*] for all byte + * lanes at this point. + * + * If ECC DRAM is not present (i.e. when DRAM is not attached to the + * DDR_CBS_0_* and DDR_CB<7:0> chip signals, or the DDR_DQS_<4>_* and + * DDR_DQ<35:32> chip signals), write LMC(0)_RLEVEL_RANK*[BYTE8] = + * LMC(0)_RLEVEL_RANK*[BYTE0]. Write LMC(0)_RLEVEL_RANK*[BYTE4] = + * LMC(0)_RLEVEL_RANK*[BYTE0]. + * + * 4. If desired, consult LMC(0)_RLEVEL_DBG[BITMASK] and compare to + * LMC(0)_RLEVEL_RANKi[BYTE*] for the lane selected by + * LMC(0)_RLEVEL_CTL[BYTE]. If desired, modify + * LMC(0)_RLEVEL_CTL[BYTE] to a new value and repeat so that all + * BITMASKs can be observed. + * + * 5. Initialize LMC(0)_RLEVEL_RANK* values for all unused ranks. + * + * Let rank i be a rank with attached DRAM. + * + * For all ranks j that do not have attached DRAM, set + * LMC(0)_RLEVEL_RANKj = LMC(0)_RLEVEL_RANKi. + * + * This read-leveling sequence can help select the proper CN70XX ODT + * resistance value (LMC(0)_COMP_CTL2[RODT_CTL]). A hardware-generated + * LMC(0)_RLEVEL_RANKi[BYTEj] value (for a used byte lane j) that is + * drastically different from a neighboring LMC(0)_RLEVEL_RANKi[BYTEk] + * (for a used byte lane k) can indicate that the CN70XX ODT value is + * bad. It is possible to simultaneously optimize both + * LMC(0)_COMP_CTL2[RODT_CTL] and LMC(0)_RLEVEL_RANKn[BYTE*] values by + * performing this read-leveling sequence for several + * LMC(0)_COMP_CTL2[RODT_CTL] values and selecting the one with the + * best LMC(0)_RLEVEL_RANKn[BYTE*] profile for the ranks. + */ + + rl_rodt_err = 0; + rl_dbg_loops = 1; + saved_int_zqcs_dis = 0; + max_adj_rl_del_inc = 0; + rl_print = RLEVEL_PRINTALL_DEFAULT; + +#ifdef ENABLE_HARDCODED_RLEVEL + part_number[21] = {0}; +#endif /* ENABLE_HARDCODED_RLEVEL */ + + pbm_lowsum_limit = 5; // FIXME: is this a good default? + // FIXME: PBM skip for RODT 240 and 34 + pbm_rodt_skip = (1U << ddr4_rodt_ctl_240_ohm) | + (1U << ddr4_rodt_ctl_34_ohm); + + disable_rank_majority = 0; // control rank majority processing + + // default to mask 11b ODDs for DDR4 (except 73xx), else DISABLE + // for DDR3 + rldelay_bump_incr = 0; + disable_rlv_bump_this_byte = 0; + + enable_rldelay_bump = (ddr_type == DDR4_DRAM) ? + ((octeon_is_cpuid(OCTEON_CN73XX)) ? 1 : 3) : 0; + + s = lookup_env(priv, "ddr_disable_rank_majority"); + if (s) + disable_rank_majority = !!simple_strtoul(s, NULL, 0); + + s = lookup_env(priv, "ddr_pbm_lowsum_limit"); + if (s) + pbm_lowsum_limit = simple_strtoul(s, NULL, 0); + + s = lookup_env(priv, "ddr_pbm_rodt_skip"); + if (s) + pbm_rodt_skip = simple_strtoul(s, NULL, 0); + memset(rank_perf, 0, sizeof(rank_perf)); + + ctl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num)); + save_ddr2t = ctl.cn78xx.ddr2t; + + cfg.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num)); + ecc_ena = cfg.cn78xx.ecc_ena; + + s = lookup_env(priv, "ddr_rlevel_2t"); + if (s) + ctl.cn78xx.ddr2t = simple_strtoul(s, NULL, 0); + + lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctl.u64); + + debug("LMC%d: Performing Read-Leveling\n", if_num); + + rl_ctl.u64 = lmc_rd(priv, CVMX_LMCX_RLEVEL_CTL(if_num)); + + rl_samples = c_cfg->rlevel_average_loops; + if (rl_samples == 0) { + rl_samples = RLEVEL_SAMPLES_DEFAULT; + // up the samples for these cases + if (dimm_count == 1 || num_ranks == 1) + rl_samples = rl_samples * 2 + 1; + } + + rl_compute = c_cfg->rlevel_compute; + rl_ctl.cn78xx.offset_en = c_cfg->offset_en; + rl_ctl.cn78xx.offset = spd_rdimm + ? c_cfg->offset_rdimm + : c_cfg->offset_udimm; + + int value = 1; // should ALWAYS be set + + s = lookup_env(priv, "ddr_rlevel_delay_unload"); + if (s) + value = !!simple_strtoul(s, NULL, 0); + rl_ctl.cn78xx.delay_unload_0 = value; + rl_ctl.cn78xx.delay_unload_1 = value; + rl_ctl.cn78xx.delay_unload_2 = value; + rl_ctl.cn78xx.delay_unload_3 = value; + + // use OR_DIS=1 to try for better results + rl_ctl.cn78xx.or_dis = 1; + + /* + * If we will be switching to 32bit mode level based on only + * four bits because there are only 4 ECC bits. + */ + rl_ctl.cn78xx.bitmask = (if_64b) ? 0xFF : 0x0F; + + // allow overrides + s = lookup_env(priv, "ddr_rlevel_ctl_or_dis"); + if (s) + rl_ctl.cn78xx.or_dis = simple_strtoul(s, NULL, 0); + + s = lookup_env(priv, "ddr_rlevel_ctl_bitmask"); + if (s) + rl_ctl.cn78xx.bitmask = simple_strtoul(s, NULL, 0); + + rl_comp_offs = spd_rdimm + ? c_cfg->rlevel_comp_offset_rdimm + : c_cfg->rlevel_comp_offset_udimm; + s = lookup_env(priv, "ddr_rlevel_comp_offset"); + if (s) + rl_comp_offs = strtoul(s, NULL, 0); + + s = lookup_env(priv, "ddr_rlevel_offset"); + if (s) + rl_ctl.cn78xx.offset = simple_strtoul(s, NULL, 0); + + s = lookup_env(priv, "ddr_rlevel_offset_en"); + if (s) + rl_ctl.cn78xx.offset_en = simple_strtoul(s, NULL, 0); + + s = lookup_env(priv, "ddr_rlevel_ctl"); + if (s) + rl_ctl.u64 = simple_strtoul(s, NULL, 0); + + lmc_wr(priv, + CVMX_LMCX_RLEVEL_CTL(if_num), + rl_ctl.u64); + + // do this here so we can look at final RLEVEL_CTL[offset] setting... + s = lookup_env(priv, "ddr_enable_rldelay_bump"); + if (s) { + // also use as mask bits + enable_rldelay_bump = strtoul(s, NULL, 0); + } + + if (enable_rldelay_bump != 0) + rldelay_bump_incr = (rl_ctl.cn78xx.offset == 1) ? -1 : 1; + + s = lookup_env(priv, "ddr%d_rlevel_debug_loops", if_num); + if (s) + rl_dbg_loops = simple_strtoul(s, NULL, 0); + + s = lookup_env(priv, "ddr_rtt_nom_auto"); + if (s) + ddr_rtt_nom_auto = !!simple_strtoul(s, NULL, 0); + + s = lookup_env(priv, "ddr_rlevel_average"); + if (s) + rl_samples = simple_strtoul(s, NULL, 0); + + s = lookup_env(priv, "ddr_rlevel_compute"); + if (s) + rl_compute = simple_strtoul(s, NULL, 0); + + s = lookup_env(priv, "ddr_rlevel_printall"); + if (s) + rl_print = simple_strtoul(s, NULL, 0); + + debug("RLEVEL_CTL : 0x%016llx\n", + rl_ctl.u64); + debug("RLEVEL_OFFSET : %6d\n", + rl_ctl.cn78xx.offset); + debug("RLEVEL_OFFSET_EN : %6d\n", + rl_ctl.cn78xx.offset_en); + + /* + * The purpose for the indexed table is to sort the settings + * by the ohm value to simplify the testing when incrementing + * through the settings. (index => ohms) 1=120, 2=60, 3=40, + * 4=30, 5=20 + */ + min_rtt_nom_idx = (c_cfg->min_rtt_nom_idx == 0) ? + 1 : c_cfg->min_rtt_nom_idx; + max_rtt_nom_idx = (c_cfg->max_rtt_nom_idx == 0) ? + 5 : c_cfg->max_rtt_nom_idx; + + min_rodt_ctl = (c_cfg->min_rodt_ctl == 0) ? 1 : c_cfg->min_rodt_ctl; + max_rodt_ctl = (c_cfg->max_rodt_ctl == 0) ? 5 : c_cfg->max_rodt_ctl; + + s = lookup_env(priv, "ddr_min_rodt_ctl"); + if (s) + min_rodt_ctl = simple_strtoul(s, NULL, 0); + + s = lookup_env(priv, "ddr_max_rodt_ctl"); + if (s) + max_rodt_ctl = simple_strtoul(s, NULL, 0); + + s = lookup_env(priv, "ddr_min_rtt_nom_idx"); + if (s) + min_rtt_nom_idx = simple_strtoul(s, NULL, 0); + + s = lookup_env(priv, "ddr_max_rtt_nom_idx"); + if (s) + max_rtt_nom_idx = simple_strtoul(s, NULL, 0); + +#ifdef ENABLE_HARDCODED_RLEVEL + if (c_cfg->rl_tbl) { + /* Check for hard-coded read-leveling settings */ + get_dimm_part_number(part_number, &dimm_config_table[0], + 0, ddr_type); + for (rankx = 0; rankx < dimm_count * 4; rankx++) { + if (!(rank_mask & (1 << rankx))) + continue; + + rl_rank.u64 = lmc_rd(priv, + CVMX_LMCX_RLEVEL_RANKX(rankx, + if_num)); + + i = 0; + while (c_cfg->rl_tbl[i].part) { + debug("DIMM part number:\"%s\", SPD: \"%s\"\n", + c_cfg->rl_tbl[i].part, part_number); + if ((strcmp(part_number, + c_cfg->rl_tbl[i].part) == 0) && + (abs(c_cfg->rl_tbl[i].speed - + 2 * ddr_hertz / (1000 * 1000)) < 10)) { + debug("Using hard-coded read leveling for DIMM part number: \"%s\"\n", + part_number); + rl_rank.u64 = + c_cfg->rl_tbl[i].rl_rank[if_num][rankx]; + lmc_wr(priv, + CVMX_LMCX_RLEVEL_RANKX(rankx, + if_num), + rl_rank.u64); + rl_rank.u64 = + lmc_rd(priv, + CVMX_LMCX_RLEVEL_RANKX(rankx, + if_num)); + display_rl(if_num, rl_rank, rankx); + /* Disable h/w read-leveling */ + rl_dbg_loops = 0; + break; + } + ++i; + } + } + } +#endif /* ENABLE_HARDCODED_RLEVEL */ + + max_adj_rl_del_inc = c_cfg->maximum_adjacent_rlevel_delay_increment; + s = lookup_env(priv, "ddr_maximum_adjacent_rlevel_delay_increment"); + if (s) + max_adj_rl_del_inc = strtoul(s, NULL, 0); + + while (rl_dbg_loops--) { + union cvmx_lmcx_modereg_params1 mp1; + union cvmx_lmcx_comp_ctl2 cc2; + + /* Initialize the error scoreboard */ + memset(rl_score, 0, sizeof(rl_score)); + + cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num)); + saved_ddr__ptune = cc2.cn78xx.ddr__ptune; + saved_ddr__ntune = cc2.cn78xx.ddr__ntune; + + /* Disable dynamic compensation settings */ + if (rl_comp_offs != 0) { + cc2.cn78xx.ptune = saved_ddr__ptune; + cc2.cn78xx.ntune = saved_ddr__ntune; + + /* + * Round up the ptune calculation to bias the odd + * cases toward ptune + */ + cc2.cn78xx.ptune += divide_roundup(rl_comp_offs, 2); + cc2.cn78xx.ntune -= rl_comp_offs / 2; + + ctl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num)); + saved_int_zqcs_dis = ctl.s.int_zqcs_dis; + /* Disable ZQCS while in bypass. */ + ctl.s.int_zqcs_dis = 1; + lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctl.u64); + + cc2.cn78xx.byp = 1; /* Enable bypass mode */ + lmc_wr(priv, CVMX_LMCX_COMP_CTL2(if_num), cc2.u64); + lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num)); + /* Read again */ + cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num)); + debug("DDR__PTUNE/DDR__NTUNE : %d/%d\n", + cc2.cn78xx.ddr__ptune, cc2.cn78xx.ddr__ntune); + } + + mp1.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS1(if_num)); + + for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx; + ++rtt_idx) { + rtt_nom = imp_val->rtt_nom_table[rtt_idx]; + + /* + * When the read ODT mask is zero the dyn_rtt_nom_mask + * is zero than RTT_NOM will not be changing during + * read-leveling. Since the value is fixed we only need + * to test it once. + */ + if (dyn_rtt_nom_mask == 0) { + // flag not to print NOM ohms + print_nom_ohms = -1; + } else { + if (dyn_rtt_nom_mask & 1) + mp1.s.rtt_nom_00 = rtt_nom; + if (dyn_rtt_nom_mask & 2) + mp1.s.rtt_nom_01 = rtt_nom; + if (dyn_rtt_nom_mask & 4) + mp1.s.rtt_nom_10 = rtt_nom; + if (dyn_rtt_nom_mask & 8) + mp1.s.rtt_nom_11 = rtt_nom; + // FIXME? rank 0 ohms always? + print_nom_ohms = + imp_val->rtt_nom_ohms[mp1.s.rtt_nom_00]; + } + + lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS1(if_num), + mp1.u64); + + if (print_nom_ohms >= 0 && rl_print > 1) { + debug("\n"); + debug("RTT_NOM %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n", + imp_val->rtt_nom_ohms[mp1.s.rtt_nom_11], + imp_val->rtt_nom_ohms[mp1.s.rtt_nom_10], + imp_val->rtt_nom_ohms[mp1.s.rtt_nom_01], + imp_val->rtt_nom_ohms[mp1.s.rtt_nom_00], + mp1.s.rtt_nom_11, + mp1.s.rtt_nom_10, + mp1.s.rtt_nom_01, + mp1.s.rtt_nom_00); + } + + ddr_init_seq(priv, rank_mask, if_num); + + // Try RANK outside RODT to rearrange the output... + for (rankx = 0; rankx < dimm_count * 4; rankx++) { + if (!(rank_mask & (1 << rankx))) + continue; + + for (rodt_ctl = max_rodt_ctl; + rodt_ctl >= min_rodt_ctl; --rodt_ctl) + rodt_loop(priv, rankx, rl_score); + } + } + + /* Re-enable dynamic compensation settings. */ + if (rl_comp_offs != 0) { + cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num)); + + cc2.cn78xx.ptune = 0; + cc2.cn78xx.ntune = 0; + cc2.cn78xx.byp = 0; /* Disable bypass mode */ + lmc_wr(priv, CVMX_LMCX_COMP_CTL2(if_num), cc2.u64); + /* Read once */ + lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num)); + + /* Read again */ + cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num)); + debug("DDR__PTUNE/DDR__NTUNE : %d/%d\n", + cc2.cn78xx.ddr__ptune, cc2.cn78xx.ddr__ntune); + + ctl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num)); + /* Restore original setting */ + ctl.s.int_zqcs_dis = saved_int_zqcs_dis; + lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctl.u64); + } + + int override_compensation = 0; + + s = lookup_env(priv, "ddr__ptune"); + if (s) + saved_ddr__ptune = strtoul(s, NULL, 0); + + s = lookup_env(priv, "ddr__ntune"); + if (s) { + saved_ddr__ntune = strtoul(s, NULL, 0); + override_compensation = 1; + } + + if (override_compensation) { + cc2.cn78xx.ptune = saved_ddr__ptune; + cc2.cn78xx.ntune = saved_ddr__ntune; + + ctl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num)); + saved_int_zqcs_dis = ctl.s.int_zqcs_dis; + /* Disable ZQCS while in bypass. */ + ctl.s.int_zqcs_dis = 1; + lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctl.u64); + + cc2.cn78xx.byp = 1; /* Enable bypass mode */ + lmc_wr(priv, CVMX_LMCX_COMP_CTL2(if_num), cc2.u64); + /* Read again */ + cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num)); + + debug("DDR__PTUNE/DDR__NTUNE : %d/%d\n", + cc2.cn78xx.ptune, cc2.cn78xx.ntune); + } + + /* Evaluation block */ + /* Still at initial value? */ + int best_rodt_score = DEFAULT_BEST_RANK_SCORE; + int auto_rodt_ctl = 0; + int auto_rtt_nom = 0; + int rodt_score; + + rodt_row_skip_mask = 0; + + // just add specific RODT rows to the skip mask for DDR4 + // at this time... + if (ddr_type == DDR4_DRAM) { + // skip RODT row 34 ohms for all DDR4 types + rodt_row_skip_mask |= (1 << ddr4_rodt_ctl_34_ohm); + // skip RODT row 40 ohms for all DDR4 types + rodt_row_skip_mask |= (1 << ddr4_rodt_ctl_40_ohm); + // For now, do not skip RODT row 40 or 48 ohm when + // ddr_hertz is above 1075 MHz + if (ddr_hertz > 1075000000) { + // noskip RODT row 40 ohms + rodt_row_skip_mask &= + ~(1 << ddr4_rodt_ctl_40_ohm); + // noskip RODT row 48 ohms + rodt_row_skip_mask &= + ~(1 << ddr4_rodt_ctl_48_ohm); + } + // For now, do not skip RODT row 48 ohm for 2Rx4 + // stacked die DIMMs + if (is_stacked_die && num_ranks == 2 && + dram_width == 4) { + // noskip RODT row 48 ohms + rodt_row_skip_mask &= + ~(1 << ddr4_rodt_ctl_48_ohm); + } + // for now, leave all rows eligible when we have + // mini-DIMMs... + if (spd_dimm_type == 5 || spd_dimm_type == 6) + rodt_row_skip_mask = 0; + // for now, leave all rows eligible when we have + // a 2-slot 1-rank config + if (dimm_count == 2 && num_ranks == 1) + rodt_row_skip_mask = 0; + + debug("Evaluating Read-Leveling Scoreboard for AUTO settings.\n"); + for (rtt_idx = min_rtt_nom_idx; + rtt_idx <= max_rtt_nom_idx; ++rtt_idx) { + rtt_nom = imp_val->rtt_nom_table[rtt_idx]; + + for (rodt_ctl = max_rodt_ctl; + rodt_ctl >= min_rodt_ctl; --rodt_ctl) { + rodt_score = 0; + for (rankx = 0; rankx < dimm_count * 4; + rankx++) { + if (!(rank_mask & (1 << rankx))) + continue; + + debug("rl_score[rtt_nom=%d][rodt_ctl=%d][rankx=%d].score:%d\n", + rtt_nom, rodt_ctl, rankx, + rl_score[rtt_nom][rodt_ctl][rankx].score); + rodt_score += + rl_score[rtt_nom][rodt_ctl][rankx].score; + } + // FIXME: do we need to skip RODT rows + // here, like we do below in the + // by-RANK settings? + + /* + * When using automatic ODT settings use + * the ODT settings associated with the + * best score for all of the tested ODT + * combinations. + */ + + if (rodt_score < best_rodt_score || + (rodt_score == best_rodt_score && + (imp_val->rodt_ohms[rodt_ctl] > + imp_val->rodt_ohms[auto_rodt_ctl]))) { + debug("AUTO: new best score for rodt:%d (%d), new score:%d, previous score:%d\n", + rodt_ctl, + imp_val->rodt_ohms[rodt_ctl], + rodt_score, + best_rodt_score); + best_rodt_score = rodt_score; + auto_rodt_ctl = rodt_ctl; + auto_rtt_nom = rtt_nom; + } + } + } + + mp1.u64 = lmc_rd(priv, + CVMX_LMCX_MODEREG_PARAMS1(if_num)); + + if (ddr_rtt_nom_auto) { + /* Store the automatically set RTT_NOM value */ + if (dyn_rtt_nom_mask & 1) + mp1.s.rtt_nom_00 = auto_rtt_nom; + if (dyn_rtt_nom_mask & 2) + mp1.s.rtt_nom_01 = auto_rtt_nom; + if (dyn_rtt_nom_mask & 4) + mp1.s.rtt_nom_10 = auto_rtt_nom; + if (dyn_rtt_nom_mask & 8) + mp1.s.rtt_nom_11 = auto_rtt_nom; + } else { + /* + * restore the manual settings to the register + */ + mp1.s.rtt_nom_00 = default_rtt_nom[0]; + mp1.s.rtt_nom_01 = default_rtt_nom[1]; + mp1.s.rtt_nom_10 = default_rtt_nom[2]; + mp1.s.rtt_nom_11 = default_rtt_nom[3]; + } + + lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS1(if_num), + mp1.u64); + debug("RTT_NOM %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n", + imp_val->rtt_nom_ohms[mp1.s.rtt_nom_11], + imp_val->rtt_nom_ohms[mp1.s.rtt_nom_10], + imp_val->rtt_nom_ohms[mp1.s.rtt_nom_01], + imp_val->rtt_nom_ohms[mp1.s.rtt_nom_00], + mp1.s.rtt_nom_11, + mp1.s.rtt_nom_10, + mp1.s.rtt_nom_01, + mp1.s.rtt_nom_00); + + debug("RTT_WR %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n", + imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 3)], + imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 2)], + imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 1)], + imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 0)], + extr_wr(mp1.u64, 3), + extr_wr(mp1.u64, 2), + extr_wr(mp1.u64, 1), + extr_wr(mp1.u64, 0)); + + debug("DIC %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n", + imp_val->dic_ohms[mp1.s.dic_11], + imp_val->dic_ohms[mp1.s.dic_10], + imp_val->dic_ohms[mp1.s.dic_01], + imp_val->dic_ohms[mp1.s.dic_00], + mp1.s.dic_11, + mp1.s.dic_10, + mp1.s.dic_01, + mp1.s.dic_00); + + if (ddr_type == DDR4_DRAM) { + union cvmx_lmcx_modereg_params2 mp2; + /* + * We must read the CSR, and not depend on + * odt_config[odt_idx].odt_mask2, since we could + * have overridden values with envvars. + * NOTE: this corrects the printout, since the + * CSR is not written with the old values... + */ + mp2.u64 = lmc_rd(priv, + CVMX_LMCX_MODEREG_PARAMS2(if_num)); + + debug("RTT_PARK %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n", + imp_val->rtt_nom_ohms[mp2.s.rtt_park_11], + imp_val->rtt_nom_ohms[mp2.s.rtt_park_10], + imp_val->rtt_nom_ohms[mp2.s.rtt_park_01], + imp_val->rtt_nom_ohms[mp2.s.rtt_park_00], + mp2.s.rtt_park_11, + mp2.s.rtt_park_10, + mp2.s.rtt_park_01, + mp2.s.rtt_park_00); + + debug("%-45s : 0x%x,0x%x,0x%x,0x%x\n", + "VREF_RANGE", + mp2.s.vref_range_11, + mp2.s.vref_range_10, + mp2.s.vref_range_01, + mp2.s.vref_range_00); + + debug("%-45s : 0x%x,0x%x,0x%x,0x%x\n", + "VREF_VALUE", + mp2.s.vref_value_11, + mp2.s.vref_value_10, + mp2.s.vref_value_01, + mp2.s.vref_value_00); + } + + cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num)); + if (ddr_rodt_ctl_auto) { + cc2.cn78xx.rodt_ctl = auto_rodt_ctl; + } else { + // back to the original setting + cc2.cn78xx.rodt_ctl = default_rodt_ctl; + } + lmc_wr(priv, CVMX_LMCX_COMP_CTL2(if_num), cc2.u64); + cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num)); + debug("Read ODT_CTL : 0x%x (%d ohms)\n", + cc2.cn78xx.rodt_ctl, + imp_val->rodt_ohms[cc2.cn78xx.rodt_ctl]); + + /* + * Use the delays associated with the best score for + * each individual rank + */ + debug("Evaluating Read-Leveling Scoreboard for per-RANK settings.\n"); + + // this is the the RANK MAJOR LOOP + for (rankx = 0; rankx < dimm_count * 4; rankx++) + rank_major_loop(priv, rankx, rl_score); + } /* Evaluation block */ + } /* while(rl_dbg_loops--) */ + + ctl.cn78xx.ddr2t = save_ddr2t; + lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctl.u64); + ctl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num)); + /* Display final 2T value */ + debug("DDR2T : %6d\n", + ctl.cn78xx.ddr2t); + + ddr_init_seq(priv, rank_mask, if_num); + + for (rankx = 0; rankx < dimm_count * 4; rankx++) { + u64 value; + int parameter_set = 0; + + if (!(rank_mask & (1 << rankx))) + continue; + + rl_rank.u64 = lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, + if_num)); + + for (i = 0; i < 9; ++i) { + s = lookup_env(priv, "ddr%d_rlevel_rank%d_byte%d", + if_num, rankx, i); + if (s) { + parameter_set |= 1; + value = simple_strtoul(s, NULL, 0); + + upd_rl_rank(&rl_rank, i, value); + } + } + + s = lookup_env_ull(priv, "ddr%d_rlevel_rank%d", if_num, rankx); + if (s) { + parameter_set |= 1; + value = simple_strtoull(s, NULL, 0); + rl_rank.u64 = value; + } + + if (parameter_set) { + lmc_wr(priv, + CVMX_LMCX_RLEVEL_RANKX(rankx, if_num), + rl_rank.u64); + rl_rank.u64 = lmc_rd(priv, + CVMX_LMCX_RLEVEL_RANKX(rankx, + if_num)); + display_rl(if_num, rl_rank, rankx); + } + } +} + +int init_octeon3_ddr3_interface(struct ddr_priv *priv, + struct ddr_conf *_ddr_conf, u32 _ddr_hertz, + u32 cpu_hertz, u32 ddr_ref_hertz, int _if_num, + u32 _if_mask) +{ + union cvmx_lmcx_control ctrl; + int ret; + char *s; + int i; + + if_num = _if_num; + ddr_hertz = _ddr_hertz; + ddr_conf = _ddr_conf; + if_mask = _if_mask; + odt_1rank_config = ddr_conf->odt_1rank_config; + odt_2rank_config = ddr_conf->odt_2rank_config; + odt_4rank_config = ddr_conf->odt_4rank_config; + dimm_config_table = ddr_conf->dimm_config_table; + c_cfg = &ddr_conf->custom_lmc_config; + + /* + * Compute clock rates to the nearest picosecond. + */ + tclk_psecs = hertz_to_psecs(ddr_hertz); /* Clock in psecs */ + eclk_psecs = hertz_to_psecs(cpu_hertz); /* Clock in psecs */ + + dimm_count = 0; + /* Accumulate and report all the errors before giving up */ + fatal_error = 0; + + /* Flag that indicates safe DDR settings should be used */ + safe_ddr_flag = 0; + if_64b = 1; /* Octeon II Default: 64bit interface width */ + mem_size_mbytes = 0; + bank_bits = 0; + column_bits_start = 1; + use_ecc = 1; + min_cas_latency = 0, max_cas_latency = 0, override_cas_latency = 0; + spd_package = 0; + spd_rawcard = 0; + spd_rawcard_aorb = 0; + spd_rdimm_registers = 0; + is_stacked_die = 0; + is_3ds_dimm = 0; // 3DS + lranks_per_prank = 1; // 3DS: logical ranks per package rank + lranks_bits = 0; // 3DS: logical ranks bits + die_capacity = 0; // in Mbits; only used for 3DS + + wl_mask_err = 0; + dyn_rtt_nom_mask = 0; + ddr_disable_chip_reset = 1; + match_wl_rtt_nom = 0; + + internal_retries = 0; + + disable_deskew_training = 0; + restart_if_dsk_incomplete = 0; + last_lane = ((if_64b) ? 8 : 4) + use_ecc; + + disable_sequential_delay_check = 0; + wl_print = WLEVEL_PRINTALL_DEFAULT; + + enable_by_rank_init = 1; // FIXME: default by-rank ON + saved_rank_mask = 0; + + node = 0; + + memset(hwl_alts, 0, sizeof(hwl_alts)); + + /* + * Initialize these to shut up the compiler. They are configured + * and used only for DDR4 + */ + ddr4_trrd_lmin = 6000; + ddr4_tccd_lmin = 6000; + + debug("\nInitializing node %d DDR interface %d, DDR Clock %d, DDR Reference Clock %d, CPUID 0x%08x\n", + node, if_num, ddr_hertz, ddr_ref_hertz, read_c0_prid()); + + if (dimm_config_table[0].spd_addrs[0] == 0 && + !dimm_config_table[0].spd_ptrs[0]) { + printf("ERROR: No dimms specified in the dimm_config_table.\n"); + return -1; + } + + // allow some overrides to be done + + // this one controls several things related to DIMM geometry: HWL and RL + disable_sequential_delay_check = c_cfg->disable_sequential_delay_check; + s = lookup_env(priv, "ddr_disable_sequential_delay_check"); + if (s) + disable_sequential_delay_check = strtoul(s, NULL, 0); + + // this one controls whether chip RESET is done, or LMC init restarted + // from step 6.9.6 + s = lookup_env(priv, "ddr_disable_chip_reset"); + if (s) + ddr_disable_chip_reset = !!strtoul(s, NULL, 0); + + // this one controls whether Deskew Training is performed + s = lookup_env(priv, "ddr_disable_deskew_training"); + if (s) + disable_deskew_training = !!strtoul(s, NULL, 0); + + if (ddr_verbose(priv)) { + printf("DDR SPD Table:"); + for (didx = 0; didx < DDR_CFG_T_MAX_DIMMS; ++didx) { + if (dimm_config_table[didx].spd_addrs[0] == 0) + break; + + printf(" --ddr%dspd=0x%02x", if_num, + dimm_config_table[didx].spd_addrs[0]); + if (dimm_config_table[didx].spd_addrs[1] != 0) + printf(",0x%02x", + dimm_config_table[didx].spd_addrs[1]); + } + printf("\n"); + } + + /* + * Walk the DRAM Socket Configuration Table to see what is installed. + */ + for (didx = 0; didx < DDR_CFG_T_MAX_DIMMS; ++didx) { + /* Check for lower DIMM socket populated */ + if (validate_dimm(priv, &dimm_config_table[didx], 0)) { + if (ddr_verbose(priv)) + report_dimm(&dimm_config_table[didx], 0, + dimm_count, if_num); + ++dimm_count; + } else { + break; + } /* Finished when there is no lower DIMM */ + } + + initialize_ddr_clock(priv, ddr_conf, cpu_hertz, ddr_hertz, + ddr_ref_hertz, if_num, if_mask); + + if (!odt_1rank_config) + odt_1rank_config = disable_odt_config; + if (!odt_2rank_config) + odt_2rank_config = disable_odt_config; + if (!odt_4rank_config) + odt_4rank_config = disable_odt_config; + + s = env_get("ddr_safe"); + if (s) { + safe_ddr_flag = !!simple_strtoul(s, NULL, 0); + printf("Parameter found in environment. ddr_safe = %d\n", + safe_ddr_flag); + } + + if (dimm_count == 0) { + printf("ERROR: DIMM 0 not detected.\n"); + return (-1); + } + + if (c_cfg->mode32b) + if_64b = 0; + + s = lookup_env(priv, "if_64b"); + if (s) + if_64b = !!simple_strtoul(s, NULL, 0); + + if (if_64b == 1) { + if (octeon_is_cpuid(OCTEON_CN70XX)) { + printf("64-bit interface width is not supported for this Octeon model\n"); + ++fatal_error; + } + } + + /* ddr_type only indicates DDR4 or DDR3 */ + ddr_type = (read_spd(&dimm_config_table[0], 0, + DDR4_SPD_KEY_BYTE_DEVICE_TYPE) == 0x0C) ? 4 : 3; + debug("DRAM Device Type: DDR%d\n", ddr_type); + + if (ddr_type == DDR4_DRAM) { + int spd_module_type; + int asymmetric; + const char *signal_load[4] = { "", "MLS", "3DS", "RSV" }; + + imp_val = &ddr4_impedence_val; + + spd_addr = + read_spd(&dimm_config_table[0], 0, + DDR4_SPD_ADDRESSING_ROW_COL_BITS); + spd_org = + read_spd(&dimm_config_table[0], 0, + DDR4_SPD_MODULE_ORGANIZATION); + spd_banks = + 0xFF & read_spd(&dimm_config_table[0], 0, + DDR4_SPD_DENSITY_BANKS); + + bank_bits = + (2 + ((spd_banks >> 4) & 0x3)) + ((spd_banks >> 6) & 0x3); + /* Controller can only address 4 bits. */ + bank_bits = min((int)bank_bits, 4); + + spd_package = + 0XFF & read_spd(&dimm_config_table[0], 0, + DDR4_SPD_PACKAGE_TYPE); + if (spd_package & 0x80) { // non-monolithic device + is_stacked_die = ((spd_package & 0x73) == 0x11); + debug("DDR4: Package Type 0x%02x (%s), %d die\n", + spd_package, signal_load[(spd_package & 3)], + ((spd_package >> 4) & 7) + 1); + is_3ds_dimm = ((spd_package & 3) == 2); // is it 3DS? + if (is_3ds_dimm) { // is it 3DS? + lranks_per_prank = ((spd_package >> 4) & 7) + 1; + // FIXME: should make sure it is only 2H or 4H + // or 8H? + lranks_bits = lranks_per_prank >> 1; + if (lranks_bits == 4) + lranks_bits = 3; + } + } else if (spd_package != 0) { + // FIXME: print non-zero monolithic device definition + debug("DDR4: Package Type MONOLITHIC: %d die, signal load %d\n", + ((spd_package >> 4) & 7) + 1, (spd_package & 3)); + } + + asymmetric = (spd_org >> 6) & 1; + if (asymmetric) { + int spd_secondary_pkg = + read_spd(&dimm_config_table[0], 0, + DDR4_SPD_SECONDARY_PACKAGE_TYPE); + debug("DDR4: Module Organization: ASYMMETRICAL: Secondary Package Type 0x%02x\n", + spd_secondary_pkg); + } else { + u64 bus_width = + 8 << (0x07 & + read_spd(&dimm_config_table[0], 0, + DDR4_SPD_MODULE_MEMORY_BUS_WIDTH)); + u64 ddr_width = 4 << ((spd_org >> 0) & 0x7); + u64 module_cap; + int shift = (spd_banks & 0x0F); + + die_capacity = (shift < 8) ? (256UL << shift) : + ((12UL << (shift & 1)) << 10); + debug("DDR4: Module Organization: SYMMETRICAL: capacity per die %d %cbit\n", + (die_capacity > 512) ? (die_capacity >> 10) : + die_capacity, (die_capacity > 512) ? 'G' : 'M'); + module_cap = ((u64)die_capacity << 20) / 8UL * + bus_width / ddr_width * + (1UL + ((spd_org >> 3) & 0x7)); + + // is it 3DS? + if (is_3ds_dimm) { + module_cap *= (u64)(((spd_package >> 4) & 7) + + 1); + } + debug("DDR4: Module Organization: SYMMETRICAL: capacity per module %lld GB\n", + module_cap >> 30); + } + + spd_rawcard = + 0xFF & read_spd(&dimm_config_table[0], 0, + DDR4_SPD_REFERENCE_RAW_CARD); + debug("DDR4: Reference Raw Card 0x%02x\n", spd_rawcard); + + spd_module_type = + read_spd(&dimm_config_table[0], 0, + DDR4_SPD_KEY_BYTE_MODULE_TYPE); + if (spd_module_type & 0x80) { // HYBRID module + debug("DDR4: HYBRID module, type %s\n", + ((spd_module_type & 0x70) == + 0x10) ? "NVDIMM" : "UNKNOWN"); + } + spd_thermal_sensor = + read_spd(&dimm_config_table[0], 0, + DDR4_SPD_MODULE_THERMAL_SENSOR); + spd_dimm_type = spd_module_type & 0x0F; + spd_rdimm = (spd_dimm_type == 1) || (spd_dimm_type == 5) || + (spd_dimm_type == 8); + if (spd_rdimm) { + u16 spd_mfgr_id, spd_register_rev, spd_mod_attr; + static const u16 manu_ids[4] = { + 0xb380, 0x3286, 0x9780, 0xb304 + }; + static const char *manu_names[4] = { + "XXX", "XXXXXXX", "XX", "XXXXX" + }; + int mc; + + spd_mfgr_id = + (0xFFU & + read_spd(&dimm_config_table[0], 0, + DDR4_SPD_REGISTER_MANUFACTURER_ID_LSB)) | + ((0xFFU & + read_spd(&dimm_config_table[0], 0, + DDR4_SPD_REGISTER_MANUFACTURER_ID_MSB)) + << 8); + spd_register_rev = + 0xFFU & read_spd(&dimm_config_table[0], 0, + DDR4_SPD_REGISTER_REVISION_NUMBER); + for (mc = 0; mc < 4; mc++) + if (manu_ids[mc] == spd_mfgr_id) + break; + + debug("DDR4: RDIMM Register Manufacturer ID: %s, Revision: 0x%02x\n", + (mc >= 4) ? "UNKNOWN" : manu_names[mc], + spd_register_rev); + + // RAWCARD A or B must be bit 7=0 and bits 4-0 + // either 00000(A) or 00001(B) + spd_rawcard_aorb = ((spd_rawcard & 0x9fUL) <= 1); + // RDIMM Module Attributes + spd_mod_attr = + 0xFFU & read_spd(&dimm_config_table[0], 0, + DDR4_SPD_UDIMM_ADDR_MAPPING_FROM_EDGE); + spd_rdimm_registers = ((1 << (spd_mod_attr & 3)) >> 1); + debug("DDR4: RDIMM Module Attributes (0x%02x): Register Type DDR4RCD%02d, DRAM rows %d, Registers %d\n", + spd_mod_attr, (spd_mod_attr >> 4) + 1, + ((1 << ((spd_mod_attr >> 2) & 3)) >> 1), + spd_rdimm_registers); + } + dimm_type_name = ddr4_dimm_types[spd_dimm_type]; + } else { /* if (ddr_type == DDR4_DRAM) */ + const char *signal_load[4] = { "UNK", "MLS", "SLS", "RSV" }; + + imp_val = &ddr3_impedence_val; + + spd_addr = + read_spd(&dimm_config_table[0], 0, + DDR3_SPD_ADDRESSING_ROW_COL_BITS); + spd_org = + read_spd(&dimm_config_table[0], 0, + DDR3_SPD_MODULE_ORGANIZATION); + spd_banks = + read_spd(&dimm_config_table[0], 0, + DDR3_SPD_DENSITY_BANKS) & 0xff; + + bank_bits = 3 + ((spd_banks >> 4) & 0x7); + /* Controller can only address 3 bits. */ + bank_bits = min((int)bank_bits, 3); + spd_dimm_type = + 0x0f & read_spd(&dimm_config_table[0], 0, + DDR3_SPD_KEY_BYTE_MODULE_TYPE); + spd_rdimm = (spd_dimm_type == 1) || (spd_dimm_type == 5) || + (spd_dimm_type == 9); + + spd_package = + 0xFF & read_spd(&dimm_config_table[0], 0, + DDR3_SPD_SDRAM_DEVICE_TYPE); + if (spd_package & 0x80) { // non-standard device + debug("DDR3: Device Type 0x%02x (%s), %d die\n", + spd_package, signal_load[(spd_package & 3)], + ((1 << ((spd_package >> 4) & 7)) >> 1)); + } else if (spd_package != 0) { + // FIXME: print non-zero monolithic device definition + debug("DDR3: Device Type MONOLITHIC: %d die, signal load %d\n", + ((1 << (spd_package >> 4) & 7) >> 1), + (spd_package & 3)); + } + + spd_rawcard = + 0xFF & read_spd(&dimm_config_table[0], 0, + DDR3_SPD_REFERENCE_RAW_CARD); + debug("DDR3: Reference Raw Card 0x%02x\n", spd_rawcard); + spd_thermal_sensor = + read_spd(&dimm_config_table[0], 0, + DDR3_SPD_MODULE_THERMAL_SENSOR); + + if (spd_rdimm) { + int spd_mfgr_id, spd_register_rev, spd_mod_attr; + + spd_mfgr_id = + (0xFFU & + read_spd(&dimm_config_table[0], 0, + DDR3_SPD_REGISTER_MANUFACTURER_ID_LSB)) | + ((0xFFU & + read_spd(&dimm_config_table[0], 0, + DDR3_SPD_REGISTER_MANUFACTURER_ID_MSB)) + << 8); + spd_register_rev = + 0xFFU & read_spd(&dimm_config_table[0], 0, + DDR3_SPD_REGISTER_REVISION_NUMBER); + debug("DDR3: RDIMM Register Manufacturer ID 0x%x Revision 0x%02x\n", + spd_mfgr_id, spd_register_rev); + // Module Attributes + spd_mod_attr = + 0xFFU & read_spd(&dimm_config_table[0], 0, + DDR3_SPD_ADDRESS_MAPPING); + spd_rdimm_registers = ((1 << (spd_mod_attr & 3)) >> 1); + debug("DDR3: RDIMM Module Attributes (0x%02x): DRAM rows %d, Registers %d\n", + spd_mod_attr, + ((1 << ((spd_mod_attr >> 2) & 3)) >> 1), + spd_rdimm_registers); + } + dimm_type_name = ddr3_dimm_types[spd_dimm_type]; + } + + if (spd_thermal_sensor & 0x80) { + debug("DDR%d: SPD: Thermal Sensor PRESENT\n", + (ddr_type == DDR4_DRAM) ? 4 : 3); + } + + debug("spd_addr : %#06x\n", spd_addr); + debug("spd_org : %#06x\n", spd_org); + debug("spd_banks : %#06x\n", spd_banks); + + row_bits = 12 + ((spd_addr >> 3) & 0x7); + col_bits = 9 + ((spd_addr >> 0) & 0x7); + + num_ranks = 1 + ((spd_org >> 3) & 0x7); + dram_width = 4 << ((spd_org >> 0) & 0x7); + num_banks = 1 << bank_bits; + + s = lookup_env(priv, "ddr_num_ranks"); + if (s) + num_ranks = simple_strtoul(s, NULL, 0); + + s = lookup_env(priv, "ddr_enable_by_rank_init"); + if (s) + enable_by_rank_init = !!simple_strtoul(s, NULL, 0); + + // FIXME: for now, we can only handle a DDR4 2rank-1slot config + // FIXME: also, by-rank init does not work correctly if 32-bit mode... + if (enable_by_rank_init && (ddr_type != DDR4_DRAM || + dimm_count != 1 || if_64b != 1 || + num_ranks != 2)) + enable_by_rank_init = 0; + + if (enable_by_rank_init) { + struct dimm_odt_config *odt_config; + union cvmx_lmcx_modereg_params1 mp1; + union cvmx_lmcx_modereg_params2 modereg_params2; + int by_rank_rodt, by_rank_wr, by_rank_park; + + // Do ODT settings changes which work best for 2R-1S configs + debug("DDR4: 2R-1S special BY-RANK init ODT settings updated\n"); + + // setup for modifying config table values - 2 ranks and 1 DIMM + odt_config = + (struct dimm_odt_config *)&ddr_conf->odt_2rank_config[0]; + + // original was 80, first try was 60 + by_rank_rodt = ddr4_rodt_ctl_48_ohm; + s = lookup_env(priv, "ddr_by_rank_rodt"); + if (s) + by_rank_rodt = strtoul(s, NULL, 0); + + odt_config->qs_dic = /*RODT_CTL */ by_rank_rodt; + + // this is for MODEREG_PARAMS1 fields + // fetch the original settings + mp1.u64 = odt_config->modereg_params1.u64; + + by_rank_wr = ddr4_rttwr_80ohm; // originals were 240 + s = lookup_env(priv, "ddr_by_rank_wr"); + if (s) + by_rank_wr = simple_strtoul(s, NULL, 0); + + // change specific settings here... + insrt_wr(&mp1.u64, /*rank */ 00, by_rank_wr); + insrt_wr(&mp1.u64, /*rank */ 01, by_rank_wr); + + // save final settings + odt_config->modereg_params1.u64 = mp1.u64; + + // this is for MODEREG_PARAMS2 fields + // fetch the original settings + modereg_params2.u64 = odt_config->modereg_params2.u64; + + by_rank_park = ddr4_rttpark_none; // originals were 120 + s = lookup_env(priv, "ddr_by_rank_park"); + if (s) + by_rank_park = simple_strtoul(s, NULL, 0); + + // change specific settings here... + modereg_params2.s.rtt_park_00 = by_rank_park; + modereg_params2.s.rtt_park_01 = by_rank_park; + + // save final settings + odt_config->modereg_params2.u64 = modereg_params2.u64; + } + + /* + * FIX + * Check that values are within some theoretical limits. + * col_bits(min) = row_lsb(min) - bank_bits(max) - bus_bits(max) = + * 14 - 3 - 4 = 7 + * col_bits(max) = row_lsb(max) - bank_bits(min) - bus_bits(min) = + * 18 - 2 - 3 = 13 + */ + if (col_bits > 13 || col_bits < 7) { + printf("Unsupported number of Col Bits: %d\n", col_bits); + ++fatal_error; + } + + /* + * FIX + * Check that values are within some theoretical limits. + * row_bits(min) = pbank_lsb(min) - row_lsb(max) - rank_bits = + * 26 - 18 - 1 = 7 + * row_bits(max) = pbank_lsb(max) - row_lsb(min) - rank_bits = + * 33 - 14 - 1 = 18 + */ + if (row_bits > 18 || row_bits < 7) { + printf("Unsupported number of Row Bits: %d\n", row_bits); + ++fatal_error; + } + + s = lookup_env(priv, "ddr_rdimm_ena"); + if (s) + spd_rdimm = !!simple_strtoul(s, NULL, 0); + + wl_loops = WLEVEL_LOOPS_DEFAULT; + // accept generic or interface-specific override + s = lookup_env(priv, "ddr_wlevel_loops"); + if (!s) + s = lookup_env(priv, "ddr%d_wlevel_loops", if_num); + + if (s) + wl_loops = strtoul(s, NULL, 0); + + s = lookup_env(priv, "ddr_ranks"); + if (s) + num_ranks = simple_strtoul(s, NULL, 0); + + bunk_enable = (num_ranks > 1); + + if (octeon_is_cpuid(OCTEON_CN7XXX)) + column_bits_start = 3; + else + printf("ERROR: Unsupported Octeon model: 0x%x\n", + read_c0_prid()); + + row_lsb = column_bits_start + col_bits + bank_bits - (!if_64b); + debug("row_lsb = column_bits_start + col_bits + bank_bits = %d\n", + row_lsb); + + pbank_lsb = row_lsb + row_bits + bunk_enable; + debug("pbank_lsb = row_lsb + row_bits + bunk_enable = %d\n", pbank_lsb); + + if (lranks_per_prank > 1) { + pbank_lsb = row_lsb + row_bits + lranks_bits + bunk_enable; + debug("DDR4: 3DS: pbank_lsb = (%d row_lsb) + (%d row_bits) + (%d lranks_bits) + (%d bunk_enable) = %d\n", + row_lsb, row_bits, lranks_bits, bunk_enable, pbank_lsb); + } + + mem_size_mbytes = dimm_count * ((1ull << pbank_lsb) >> 20); + if (num_ranks == 4) { + /* + * Quad rank dimm capacity is equivalent to two dual-rank + * dimms. + */ + mem_size_mbytes *= 2; + } + + /* + * Mask with 1 bits set for for each active rank, allowing 2 bits + * per dimm. This makes later calculations simpler, as a variety + * of CSRs use this layout. This init needs to be updated for dual + * configs (ie non-identical DIMMs). + * + * Bit 0 = dimm0, rank 0 + * Bit 1 = dimm0, rank 1 + * Bit 2 = dimm1, rank 0 + * Bit 3 = dimm1, rank 1 + * ... + */ + rank_mask = 0x1; + if (num_ranks > 1) + rank_mask = 0x3; + if (num_ranks > 2) + rank_mask = 0xf; + + for (i = 1; i < dimm_count; i++) + rank_mask |= ((rank_mask & 0x3) << (2 * i)); + + /* + * If we are booting from RAM, the DRAM controller is + * already set up. Just return the memory size + */ + if (priv->flags & FLAG_RAM_RESIDENT) { + debug("Ram Boot: Skipping LMC config\n"); + return mem_size_mbytes; + } + + if (ddr_type == DDR4_DRAM) { + spd_ecc = + !!(read_spd + (&dimm_config_table[0], 0, + DDR4_SPD_MODULE_MEMORY_BUS_WIDTH) & 8); + } else { + spd_ecc = + !!(read_spd + (&dimm_config_table[0], 0, + DDR3_SPD_MEMORY_BUS_WIDTH) & 8); + } + + char rank_spec[8]; + + printable_rank_spec(rank_spec, num_ranks, dram_width, spd_package); + debug("Summary: %d %s%s %s %s, row bits=%d, col bits=%d, bank bits=%d\n", + dimm_count, dimm_type_name, (dimm_count > 1) ? "s" : "", + rank_spec, + (spd_ecc) ? "ECC" : "non-ECC", row_bits, col_bits, bank_bits); + + if (ddr_type == DDR4_DRAM) { + spd_cas_latency = + ((0xff & + read_spd(&dimm_config_table[0], 0, + DDR4_SPD_CAS_LATENCIES_BYTE0)) << 0); + spd_cas_latency |= + ((0xff & + read_spd(&dimm_config_table[0], 0, + DDR4_SPD_CAS_LATENCIES_BYTE1)) << 8); + spd_cas_latency |= + ((0xff & + read_spd(&dimm_config_table[0], 0, + DDR4_SPD_CAS_LATENCIES_BYTE2)) << 16); + spd_cas_latency |= + ((0xff & + read_spd(&dimm_config_table[0], 0, + DDR4_SPD_CAS_LATENCIES_BYTE3)) << 24); + } else { + spd_cas_latency = + 0xff & read_spd(&dimm_config_table[0], 0, + DDR3_SPD_CAS_LATENCIES_LSB); + spd_cas_latency |= + ((0xff & + read_spd(&dimm_config_table[0], 0, + DDR3_SPD_CAS_LATENCIES_MSB)) << 8); + } + debug("spd_cas_latency : %#06x\n", spd_cas_latency); + + if (ddr_type == DDR4_DRAM) { + /* + * No other values for DDR4 MTB and FTB are specified at the + * current time so don't bother reading them. Can't speculate + * how new values will be represented. + */ + int spdmtb = 125; + int spdftb = 1; + + taamin = spdmtb * read_spd(&dimm_config_table[0], 0, + DDR4_SPD_MIN_CAS_LATENCY_TAAMIN) + + spdftb * (signed char)read_spd(&dimm_config_table[0], + 0, DDR4_SPD_MIN_CAS_LATENCY_FINE_TAAMIN); + + ddr4_tckavgmin = spdmtb * read_spd(&dimm_config_table[0], 0, + DDR4_SPD_MINIMUM_CYCLE_TIME_TCKAVGMIN) + + spdftb * (signed char)read_spd(&dimm_config_table[0], 0, + DDR4_SPD_MIN_CYCLE_TIME_FINE_TCKAVGMIN); + + ddr4_tckavgmax = spdmtb * read_spd(&dimm_config_table[0], 0, + DDR4_SPD_MAXIMUM_CYCLE_TIME_TCKAVGMAX) + + spdftb * (signed char)read_spd(&dimm_config_table[0], 0, + DDR4_SPD_MAX_CYCLE_TIME_FINE_TCKAVGMAX); + + ddr4_trdcmin = spdmtb * read_spd(&dimm_config_table[0], 0, + DDR4_SPD_MIN_RAS_CAS_DELAY_TRCDMIN) + + spdftb * (signed char)read_spd(&dimm_config_table[0], 0, + DDR4_SPD_MIN_RAS_TO_CAS_DELAY_FINE_TRCDMIN); + + ddr4_trpmin = spdmtb * read_spd(&dimm_config_table[0], 0, + DDR4_SPD_MIN_ROW_PRECHARGE_DELAY_TRPMIN) + + spdftb * (signed char)read_spd(&dimm_config_table[0], 0, + DDR4_SPD_MIN_ROW_PRECHARGE_DELAY_FINE_TRPMIN); + + ddr4_trasmin = spdmtb * + (((read_spd + (&dimm_config_table[0], 0, + DDR4_SPD_UPPER_NIBBLES_TRAS_TRC) & 0xf) << 8) + + (read_spd + (&dimm_config_table[0], 0, + DDR4_SPD_MIN_ACTIVE_PRECHARGE_LSB_TRASMIN) & 0xff)); + + ddr4_trcmin = spdmtb * + ((((read_spd + (&dimm_config_table[0], 0, + DDR4_SPD_UPPER_NIBBLES_TRAS_TRC) >> 4) & 0xf) << + 8) + (read_spd + (&dimm_config_table[0], 0, + DDR4_SPD_MIN_ACTIVE_REFRESH_LSB_TRCMIN) & + 0xff)) + + spdftb * (signed char)read_spd(&dimm_config_table[0], + 0, + DDR4_SPD_MIN_ACT_TO_ACT_REFRESH_DELAY_FINE_TRCMIN); + + ddr4_trfc1min = spdmtb * (((read_spd(&dimm_config_table[0], 0, + DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC1MIN) & 0xff) << + 8) + (read_spd(&dimm_config_table[0], 0, + DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC1MIN) & 0xff)); + + ddr4_trfc2min = spdmtb * (((read_spd(&dimm_config_table[0], 0, + DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC2MIN) & 0xff) << + 8) + (read_spd(&dimm_config_table[0], 0, + DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC2MIN) & 0xff)); + + ddr4_trfc4min = spdmtb * (((read_spd(&dimm_config_table[0], 0, + DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC4MIN) & 0xff) << + 8) + (read_spd(&dimm_config_table[0], 0, + DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC4MIN) & 0xff)); + + ddr4_tfawmin = spdmtb * (((read_spd(&dimm_config_table[0], 0, + DDR4_SPD_MIN_FOUR_ACTIVE_WINDOW_MSN_TFAWMIN) & 0xf) << + 8) + (read_spd(&dimm_config_table[0], 0, + DDR4_SPD_MIN_FOUR_ACTIVE_WINDOW_LSB_TFAWMIN) & 0xff)); + + ddr4_trrd_smin = spdmtb * read_spd(&dimm_config_table[0], 0, + DDR4_SPD_MIN_ROW_ACTIVE_DELAY_SAME_TRRD_SMIN) + + spdftb * (signed char)read_spd(&dimm_config_table[0], 0, + DDR4_SPD_MIN_ACT_TO_ACT_DELAY_DIFF_FINE_TRRD_SMIN); + + ddr4_trrd_lmin = spdmtb * read_spd(&dimm_config_table[0], 0, + DDR4_SPD_MIN_ROW_ACTIVE_DELAY_DIFF_TRRD_LMIN) + + spdftb * (signed char)read_spd(&dimm_config_table[0], 0, + DDR4_SPD_MIN_ACT_TO_ACT_DELAY_SAME_FINE_TRRD_LMIN); + + ddr4_tccd_lmin = spdmtb * read_spd(&dimm_config_table[0], 0, + DDR4_SPD_MIN_CAS_TO_CAS_DELAY_TCCD_LMIN) + + spdftb * (signed char)read_spd(&dimm_config_table[0], 0, + DDR4_SPD_MIN_CAS_TO_CAS_DELAY_FINE_TCCD_LMIN); + + debug("%-45s : %6d ps\n", "Medium Timebase (MTB)", spdmtb); + debug("%-45s : %6d ps\n", "Fine Timebase (FTB)", spdftb); + + debug("%-45s : %6d ps (%ld MT/s)\n", + "SDRAM Minimum Cycle Time (tCKAVGmin)", ddr4_tckavgmin, + pretty_psecs_to_mts(ddr4_tckavgmin)); + debug("%-45s : %6d ps\n", + "SDRAM Maximum Cycle Time (tCKAVGmax)", ddr4_tckavgmax); + debug("%-45s : %6d ps\n", "Minimum CAS Latency Time (taamin)", + taamin); + debug("%-45s : %6d ps\n", + "Minimum RAS to CAS Delay Time (tRCDmin)", ddr4_trdcmin); + debug("%-45s : %6d ps\n", + "Minimum Row Precharge Delay Time (tRPmin)", ddr4_trpmin); + debug("%-45s : %6d ps\n", + "Minimum Active to Precharge Delay (tRASmin)", + ddr4_trasmin); + debug("%-45s : %6d ps\n", + "Minimum Active to Active/Refr. Delay (tRCmin)", + ddr4_trcmin); + debug("%-45s : %6d ps\n", + "Minimum Refresh Recovery Delay (tRFC1min)", + ddr4_trfc1min); + debug("%-45s : %6d ps\n", + "Minimum Refresh Recovery Delay (tRFC2min)", + ddr4_trfc2min); + debug("%-45s : %6d ps\n", + "Minimum Refresh Recovery Delay (tRFC4min)", + ddr4_trfc4min); + debug("%-45s : %6d ps\n", + "Minimum Four Activate Window Time (tFAWmin)", + ddr4_tfawmin); + debug("%-45s : %6d ps\n", + "Minimum Act. to Act. Delay (tRRD_Smin)", ddr4_trrd_smin); + debug("%-45s : %6d ps\n", + "Minimum Act. to Act. Delay (tRRD_Lmin)", ddr4_trrd_lmin); + debug("%-45s : %6d ps\n", + "Minimum CAS to CAS Delay Time (tCCD_Lmin)", + ddr4_tccd_lmin); + +#define DDR4_TWR 15000 +#define DDR4_TWTR_S 2500 + + tckmin = ddr4_tckavgmin; + twr = DDR4_TWR; + trcd = ddr4_trdcmin; + trrd = ddr4_trrd_smin; + trp = ddr4_trpmin; + tras = ddr4_trasmin; + trc = ddr4_trcmin; + trfc = ddr4_trfc1min; + twtr = DDR4_TWTR_S; + tfaw = ddr4_tfawmin; + + if (spd_rdimm) { + spd_addr_mirror = read_spd(&dimm_config_table[0], 0, + DDR4_SPD_RDIMM_ADDR_MAPPING_FROM_REGISTER_TO_DRAM) & + 0x1; + } else { + spd_addr_mirror = read_spd(&dimm_config_table[0], 0, + DDR4_SPD_UDIMM_ADDR_MAPPING_FROM_EDGE) & 0x1; + } + debug("spd_addr_mirror : %#06x\n", spd_addr_mirror); + } else { + spd_mtb_dividend = + 0xff & read_spd(&dimm_config_table[0], 0, + DDR3_SPD_MEDIUM_TIMEBASE_DIVIDEND); + spd_mtb_divisor = + 0xff & read_spd(&dimm_config_table[0], 0, + DDR3_SPD_MEDIUM_TIMEBASE_DIVISOR); + spd_tck_min = + 0xff & read_spd(&dimm_config_table[0], 0, + DDR3_SPD_MINIMUM_CYCLE_TIME_TCKMIN); + spd_taa_min = + 0xff & read_spd(&dimm_config_table[0], 0, + DDR3_SPD_MIN_CAS_LATENCY_TAAMIN); + + spd_twr = + 0xff & read_spd(&dimm_config_table[0], 0, + DDR3_SPD_MIN_WRITE_RECOVERY_TWRMIN); + spd_trcd = + 0xff & read_spd(&dimm_config_table[0], 0, + DDR3_SPD_MIN_RAS_CAS_DELAY_TRCDMIN); + spd_trrd = + 0xff & read_spd(&dimm_config_table[0], 0, + DDR3_SPD_MIN_ROW_ACTIVE_DELAY_TRRDMIN); + spd_trp = + 0xff & read_spd(&dimm_config_table[0], 0, + DDR3_SPD_MIN_ROW_PRECHARGE_DELAY_TRPMIN); + spd_tras = + 0xff & read_spd(&dimm_config_table[0], 0, + DDR3_SPD_MIN_ACTIVE_PRECHARGE_LSB_TRASMIN); + spd_tras |= + ((0xff & + read_spd(&dimm_config_table[0], 0, + DDR3_SPD_UPPER_NIBBLES_TRAS_TRC) & 0xf) << 8); + spd_trc = + 0xff & read_spd(&dimm_config_table[0], 0, + DDR3_SPD_MIN_ACTIVE_REFRESH_LSB_TRCMIN); + spd_trc |= + ((0xff & + read_spd(&dimm_config_table[0], 0, + DDR3_SPD_UPPER_NIBBLES_TRAS_TRC) & 0xf0) << 4); + spd_trfc = + 0xff & read_spd(&dimm_config_table[0], 0, + DDR3_SPD_MIN_REFRESH_RECOVERY_LSB_TRFCMIN); + spd_trfc |= + ((0xff & + read_spd(&dimm_config_table[0], 0, + DDR3_SPD_MIN_REFRESH_RECOVERY_MSB_TRFCMIN)) << + 8); + spd_twtr = + 0xff & read_spd(&dimm_config_table[0], 0, + DDR3_SPD_MIN_INTERNAL_WRITE_READ_CMD_TWTRMIN); + spd_trtp = + 0xff & read_spd(&dimm_config_table[0], 0, + DDR3_SPD_MIN_INTERNAL_READ_PRECHARGE_CMD_TRTPMIN); + spd_tfaw = + 0xff & read_spd(&dimm_config_table[0], 0, + DDR3_SPD_MIN_FOUR_ACTIVE_WINDOW_TFAWMIN); + spd_tfaw |= + ((0xff & + read_spd(&dimm_config_table[0], 0, + DDR3_SPD_UPPER_NIBBLE_TFAW) & 0xf) << 8); + spd_addr_mirror = + 0xff & read_spd(&dimm_config_table[0], 0, + DDR3_SPD_ADDRESS_MAPPING) & 0x1; + /* Only address mirror unbuffered dimms. */ + spd_addr_mirror = spd_addr_mirror && !spd_rdimm; + ftb_dividend = + read_spd(&dimm_config_table[0], 0, + DDR3_SPD_FINE_TIMEBASE_DIVIDEND_DIVISOR) >> 4; + ftb_divisor = + read_spd(&dimm_config_table[0], 0, + DDR3_SPD_FINE_TIMEBASE_DIVIDEND_DIVISOR) & 0xf; + /* Make sure that it is not 0 */ + ftb_divisor = (ftb_divisor == 0) ? 1 : ftb_divisor; + + debug("spd_twr : %#06x\n", spd_twr); + debug("spd_trcd : %#06x\n", spd_trcd); + debug("spd_trrd : %#06x\n", spd_trrd); + debug("spd_trp : %#06x\n", spd_trp); + debug("spd_tras : %#06x\n", spd_tras); + debug("spd_trc : %#06x\n", spd_trc); + debug("spd_trfc : %#06x\n", spd_trfc); + debug("spd_twtr : %#06x\n", spd_twtr); + debug("spd_trtp : %#06x\n", spd_trtp); + debug("spd_tfaw : %#06x\n", spd_tfaw); + debug("spd_addr_mirror : %#06x\n", spd_addr_mirror); + + mtb_psec = spd_mtb_dividend * 1000 / spd_mtb_divisor; + taamin = mtb_psec * spd_taa_min; + taamin += ftb_dividend * + (signed char)read_spd(&dimm_config_table[0], + 0, DDR3_SPD_MIN_CAS_LATENCY_FINE_TAAMIN) / + ftb_divisor; + tckmin = mtb_psec * spd_tck_min; + tckmin += ftb_dividend * + (signed char)read_spd(&dimm_config_table[0], + 0, DDR3_SPD_MINIMUM_CYCLE_TIME_FINE_TCKMIN) / + ftb_divisor; + + twr = spd_twr * mtb_psec; + trcd = spd_trcd * mtb_psec; + trrd = spd_trrd * mtb_psec; + trp = spd_trp * mtb_psec; + tras = spd_tras * mtb_psec; + trc = spd_trc * mtb_psec; + trfc = spd_trfc * mtb_psec; + if (octeon_is_cpuid(OCTEON_CN78XX_PASS2_X) && trfc < 260000) { + // default to this - because it works... + int new_trfc = 260000; + + s = env_get("ddr_trfc"); + if (s) { + new_trfc = simple_strtoul(s, NULL, 0); + printf("Parameter found in environment. ddr_trfc = %d\n", + new_trfc); + if (new_trfc < 160000 || new_trfc > 260000) { + // back to default if out of range + new_trfc = 260000; + } + } + debug("N%d.LMC%d: Adjusting tRFC from %d to %d, for CN78XX Pass 2.x\n", + node, if_num, trfc, new_trfc); + trfc = new_trfc; + } + + twtr = spd_twtr * mtb_psec; + trtp = spd_trtp * mtb_psec; + tfaw = spd_tfaw * mtb_psec; + + debug("Medium Timebase (MTB) : %6d ps\n", + mtb_psec); + debug("Minimum Cycle Time (tckmin) : %6d ps (%ld MT/s)\n", + tckmin, pretty_psecs_to_mts(tckmin)); + debug("Minimum CAS Latency Time (taamin) : %6d ps\n", + taamin); + debug("Write Recovery Time (tWR) : %6d ps\n", + twr); + debug("Minimum RAS to CAS delay (tRCD) : %6d ps\n", + trcd); + debug("Minimum Row Active to Row Active delay (tRRD) : %6d ps\n", + trrd); + debug("Minimum Row Precharge Delay (tRP) : %6d ps\n", + trp); + debug("Minimum Active to Precharge (tRAS) : %6d ps\n", + tras); + debug("Minimum Active to Active/Refresh Delay (tRC) : %6d ps\n", + trc); + debug("Minimum Refresh Recovery Delay (tRFC) : %6d ps\n", + trfc); + debug("Internal write to read command delay (tWTR) : %6d ps\n", + twtr); + debug("Min Internal Rd to Precharge Cmd Delay (tRTP) : %6d ps\n", + trtp); + debug("Minimum Four Activate Window Delay (tFAW) : %6d ps\n", + tfaw); + } + + /* + * When the cycle time is within 1 psec of the minimum accept it + * as a slight rounding error and adjust it to exactly the minimum + * cycle time. This avoids an unnecessary warning. + */ + if (abs(tclk_psecs - tckmin) < 2) + tclk_psecs = tckmin; + + if (tclk_psecs < (u64)tckmin) { + printf("WARNING!!!!: DDR Clock Rate (tCLK: %ld) exceeds DIMM specifications (tckmin: %ld)!!!!\n", + tclk_psecs, (ulong)tckmin); + } + + debug("DDR Clock Rate (tCLK) : %6ld ps\n", + tclk_psecs); + debug("Core Clock Rate (eCLK) : %6ld ps\n", + eclk_psecs); + + s = env_get("ddr_use_ecc"); + if (s) { + use_ecc = !!simple_strtoul(s, NULL, 0); + printf("Parameter found in environment. ddr_use_ecc = %d\n", + use_ecc); + } + use_ecc = use_ecc && spd_ecc; + + if_bytemask = if_64b ? (use_ecc ? 0x1ff : 0xff) + : (use_ecc ? 0x01f : 0x0f); + + debug("DRAM Interface width: %d bits %s bytemask 0x%03x\n", + if_64b ? 64 : 32, use_ecc ? "+ECC" : "", if_bytemask); + + debug("\n------ Board Custom Configuration Settings ------\n"); + debug("%-45s : %d\n", "MIN_RTT_NOM_IDX ", c_cfg->min_rtt_nom_idx); + debug("%-45s : %d\n", "MAX_RTT_NOM_IDX ", c_cfg->max_rtt_nom_idx); + debug("%-45s : %d\n", "MIN_RODT_CTL ", c_cfg->min_rodt_ctl); + debug("%-45s : %d\n", "MAX_RODT_CTL ", c_cfg->max_rodt_ctl); + debug("%-45s : %d\n", "MIN_CAS_LATENCY ", c_cfg->min_cas_latency); + debug("%-45s : %d\n", "OFFSET_EN ", c_cfg->offset_en); + debug("%-45s : %d\n", "OFFSET_UDIMM ", c_cfg->offset_udimm); + debug("%-45s : %d\n", "OFFSET_RDIMM ", c_cfg->offset_rdimm); + debug("%-45s : %d\n", "DDR_RTT_NOM_AUTO ", c_cfg->ddr_rtt_nom_auto); + debug("%-45s : %d\n", "DDR_RODT_CTL_AUTO ", c_cfg->ddr_rodt_ctl_auto); + if (spd_rdimm) + debug("%-45s : %d\n", "RLEVEL_COMP_OFFSET", + c_cfg->rlevel_comp_offset_rdimm); + else + debug("%-45s : %d\n", "RLEVEL_COMP_OFFSET", + c_cfg->rlevel_comp_offset_udimm); + debug("%-45s : %d\n", "RLEVEL_COMPUTE ", c_cfg->rlevel_compute); + debug("%-45s : %d\n", "DDR2T_UDIMM ", c_cfg->ddr2t_udimm); + debug("%-45s : %d\n", "DDR2T_RDIMM ", c_cfg->ddr2t_rdimm); + debug("%-45s : %d\n", "FPRCH2 ", c_cfg->fprch2); + debug("%-45s : %d\n", "PTUNE_OFFSET ", c_cfg->ptune_offset); + debug("%-45s : %d\n", "NTUNE_OFFSET ", c_cfg->ntune_offset); + debug("-------------------------------------------------\n"); + + cl = divide_roundup(taamin, tclk_psecs); + + debug("Desired CAS Latency : %6d\n", cl); + + min_cas_latency = c_cfg->min_cas_latency; + + s = lookup_env(priv, "ddr_min_cas_latency"); + if (s) + min_cas_latency = simple_strtoul(s, NULL, 0); + + debug("CAS Latencies supported in DIMM :"); + base_cl = (ddr_type == DDR4_DRAM) ? 7 : 4; + for (i = 0; i < 32; ++i) { + if ((spd_cas_latency >> i) & 1) { + debug(" %d", i + base_cl); + max_cas_latency = i + base_cl; + if (min_cas_latency == 0) + min_cas_latency = i + base_cl; + } + } + debug("\n"); + + /* + * Use relaxed timing when running slower than the minimum + * supported speed. Adjust timing to match the smallest supported + * CAS Latency. + */ + if (min_cas_latency > cl) { + ulong adjusted_tclk = taamin / min_cas_latency; + + cl = min_cas_latency; + debug("Slow clock speed. Adjusting timing: tClk = %ld, Adjusted tClk = %ld\n", + tclk_psecs, adjusted_tclk); + tclk_psecs = adjusted_tclk; + } + + s = env_get("ddr_cas_latency"); + if (s) { + override_cas_latency = simple_strtoul(s, NULL, 0); + printf("Parameter found in environment. ddr_cas_latency = %d\n", + override_cas_latency); + } + + /* Make sure that the selected cas latency is legal */ + for (i = (cl - base_cl); i < 32; ++i) { + if ((spd_cas_latency >> i) & 1) { + cl = i + base_cl; + break; + } + } + + if (max_cas_latency < cl) + cl = max_cas_latency; + + if (override_cas_latency != 0) + cl = override_cas_latency; + + debug("CAS Latency : %6d\n", cl); + + if ((cl * tckmin) > 20000) { + debug("(CLactual * tckmin) = %d exceeds 20 ns\n", + (cl * tckmin)); + } + + if (tclk_psecs < (ulong)tckmin) { + printf("WARNING!!!!!!: DDR3 Clock Rate (tCLK: %ld) exceeds DIMM specifications (tckmin:%ld)!!!!!!!!\n", + tclk_psecs, (ulong)tckmin); + } + + if (num_banks != 4 && num_banks != 8 && num_banks != 16) { + printf("Unsupported number of banks %d. Must be 4 or 8.\n", + num_banks); + ++fatal_error; + } + + if (num_ranks != 1 && num_ranks != 2 && num_ranks != 4) { + printf("Unsupported number of ranks: %d\n", num_ranks); + ++fatal_error; + } + + if (octeon_is_cpuid(OCTEON_CN78XX) || + octeon_is_cpuid(OCTEON_CN73XX) || + octeon_is_cpuid(OCTEON_CNF75XX)) { + if (dram_width != 8 && dram_width != 16 && dram_width != 4) { + printf("Unsupported SDRAM Width, %d. Must be 4, 8 or 16.\n", + dram_width); + ++fatal_error; + } + } else if (dram_width != 8 && dram_width != 16) { + printf("Unsupported SDRAM Width, %d. Must be 8 or 16.\n", + dram_width); + ++fatal_error; + } + + /* + ** Bail out here if things are not copasetic. + */ + if (fatal_error) + return (-1); + + /* + * 4.8.4 LMC RESET Initialization + * + * The purpose of this step is to assert/deassert the RESET# pin at the + * DDR3/DDR4 parts. + * + * This LMC RESET step is done for all enabled LMCs. + */ + perform_lmc_reset(priv, node, if_num); + + // Make sure scrambling is disabled during init... + ctrl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num)); + ctrl.s.scramble_ena = 0; + lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctrl.u64); + + lmc_wr(priv, CVMX_LMCX_SCRAMBLE_CFG0(if_num), 0); + lmc_wr(priv, CVMX_LMCX_SCRAMBLE_CFG1(if_num), 0); + if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) + lmc_wr(priv, CVMX_LMCX_SCRAMBLE_CFG2(if_num), 0); + + odt_idx = min(dimm_count - 1, 3); + + switch (num_ranks) { + case 1: + odt_config = odt_1rank_config; + break; + case 2: + odt_config = odt_2rank_config; + break; + case 4: + odt_config = odt_4rank_config; + break; + default: + odt_config = disable_odt_config; + printf("Unsupported number of ranks: %d\n", num_ranks); + ++fatal_error; + } + + /* + * 4.8.5 Early LMC Initialization + * + * All of DDR PLL, LMC CK, and LMC DRESET initializations must be + * completed prior to starting this LMC initialization sequence. + * + * Perform the following five substeps for early LMC initialization: + * + * 1. Software must ensure there are no pending DRAM transactions. + * + * 2. Write LMC(0)_CONFIG, LMC(0)_CONTROL, LMC(0)_TIMING_PARAMS0, + * LMC(0)_TIMING_PARAMS1, LMC(0)_MODEREG_PARAMS0, + * LMC(0)_MODEREG_PARAMS1, LMC(0)_DUAL_MEMCFG, LMC(0)_NXM, + * LMC(0)_WODT_MASK, LMC(0)_RODT_MASK, LMC(0)_COMP_CTL2, + * LMC(0)_PHY_CTL, LMC(0)_DIMM0/1_PARAMS, and LMC(0)_DIMM_CTL with + * appropriate values. All sections in this chapter can be used to + * derive proper register settings. + */ + + /* LMC(0)_CONFIG */ + lmc_config(priv); + + /* LMC(0)_CONTROL */ + lmc_control(priv); + + /* LMC(0)_TIMING_PARAMS0 */ + lmc_timing_params0(priv); + + /* LMC(0)_TIMING_PARAMS1 */ + lmc_timing_params1(priv); + + /* LMC(0)_TIMING_PARAMS2 */ + lmc_timing_params2(priv); + + /* LMC(0)_MODEREG_PARAMS0 */ + lmc_modereg_params0(priv); + + /* LMC(0)_MODEREG_PARAMS1 */ + lmc_modereg_params1(priv); + + /* LMC(0)_MODEREG_PARAMS2 */ + lmc_modereg_params2(priv); + + /* LMC(0)_MODEREG_PARAMS3 */ + lmc_modereg_params3(priv); + + /* LMC(0)_NXM */ + lmc_nxm(priv); + + /* LMC(0)_WODT_MASK */ + lmc_wodt_mask(priv); + + /* LMC(0)_RODT_MASK */ + lmc_rodt_mask(priv); + + /* LMC(0)_COMP_CTL2 */ + lmc_comp_ctl2(priv); + + /* LMC(0)_PHY_CTL */ + lmc_phy_ctl(priv); + + /* LMC(0)_EXT_CONFIG */ + lmc_ext_config(priv); + + /* LMC(0)_EXT_CONFIG2 */ + lmc_ext_config2(priv); + + /* LMC(0)_DIMM0/1_PARAMS */ + lmc_dimm01_params(priv); + + ret = lmc_rank_init(priv); + if (ret < 0) + return 0; /* 0 indicates problem */ + + lmc_config_2(priv); + + lmc_write_leveling(priv); + + lmc_read_leveling(priv); + + lmc_workaround(priv); + + ret = lmc_sw_write_leveling(priv); + if (ret < 0) + return 0; /* 0 indicates problem */ + + // this sometimes causes stack overflow crashes.. + // display only for DDR4 RDIMMs. + if (ddr_type == DDR4_DRAM && spd_rdimm) { + int i; + + for (i = 0; i < 3; i += 2) // just pages 0 and 2 for now.. + display_mpr_page(priv, rank_mask, if_num, i); + } + + lmc_dll(priv); + + lmc_workaround_2(priv); + + lmc_final(priv); + + lmc_scrambling(priv); + + return mem_size_mbytes; +} + +///// HW-assist byte DLL offset tuning ////// + +static int cvmx_dram_get_num_lmc(struct ddr_priv *priv) +{ + union cvmx_lmcx_dll_ctl2 lmcx_dll_ctl2; + + if (octeon_is_cpuid(OCTEON_CN70XX)) + return 1; + + if (octeon_is_cpuid(OCTEON_CN73XX) || octeon_is_cpuid(OCTEON_CNF75XX)) { + // sample LMC1 + lmcx_dll_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL2(1)); + if (lmcx_dll_ctl2.cn78xx.intf_en) + return 2; + else + return 1; + } + + // for CN78XX, LMCs are always active in pairs, and always LMC0/1 + // so, we sample LMC2 to see if 2 and 3 are active + lmcx_dll_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL2(2)); + if (lmcx_dll_ctl2.cn78xx.intf_en) + return 4; + else + return 2; +} + +// got to do these here, even though already defined in BDK + +// all DDR3, and DDR4 x16 today, use only 3 bank bits; +// DDR4 x4 and x8 always have 4 bank bits +// NOTE: this will change in the future, when DDR4 x16 devices can +// come with 16 banks!! FIXME!! +static int cvmx_dram_get_num_bank_bits(struct ddr_priv *priv, int lmc) +{ + union cvmx_lmcx_dll_ctl2 lmcx_dll_ctl2; + union cvmx_lmcx_config lmcx_config; + union cvmx_lmcx_ddr_pll_ctl lmcx_ddr_pll_ctl; + int bank_width; + + // can always read this + lmcx_dll_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL2(lmc)); + + if (lmcx_dll_ctl2.cn78xx.dreset) // check LMCn + return 0; + + lmcx_config.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL2(lmc)); + lmcx_ddr_pll_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(lmc)); + + bank_width = ((lmcx_ddr_pll_ctl.s.ddr4_mode != 0) && + (lmcx_config.s.bg2_enable)) ? 4 : 3; + + return bank_width; +} + +#define EXTRACT(v, lsb, width) (((v) >> (lsb)) & ((1ull << (width)) - 1)) +#define ADDRESS_HOLE 0x10000000ULL + +static void cvmx_dram_address_extract_info(struct ddr_priv *priv, u64 address, + int *node, int *lmc, int *dimm, + int *prank, int *lrank, int *bank, + int *row, int *col) +{ + int bank_lsb, xbits; + union cvmx_l2c_ctl l2c_ctl; + union cvmx_lmcx_config lmcx_config; + union cvmx_lmcx_control lmcx_control; + union cvmx_lmcx_ext_config ext_config; + int bitno = (octeon_is_cpuid(OCTEON_CN7XXX)) ? 20 : 18; + int bank_width; + int dimm_lsb; + int dimm_width; + int prank_lsb, lrank_lsb; + int prank_width, lrank_width; + int row_lsb; + int row_width; + int col_hi_lsb; + int col_hi_width; + int col_hi; + + if (octeon_is_cpuid(OCTEON_CN73XX) || octeon_is_cpuid(OCTEON_CNF75XX)) + bitno = 18; + + *node = EXTRACT(address, 40, 2); /* Address bits [41:40] */ + + address &= (1ULL << 40) - 1; // lop off any node bits or above + if (address >= ADDRESS_HOLE) // adjust down if at HOLE or above + address -= ADDRESS_HOLE; + + /* Determine the LMC controllers */ + l2c_ctl.u64 = l2c_rd(priv, CVMX_L2C_CTL); + + /* xbits depends on number of LMCs */ + xbits = cvmx_dram_get_num_lmc(priv) >> 1; // 4->2, 2->1, 1->0 + bank_lsb = 7 + xbits; + + /* LMC number is probably aliased */ + if (l2c_ctl.s.disidxalias) { + *lmc = EXTRACT(address, 7, xbits); + } else { + *lmc = EXTRACT(address, 7, xbits) ^ + EXTRACT(address, bitno, xbits) ^ + EXTRACT(address, 12, xbits); + } + + /* Figure out the bank field width */ + lmcx_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(*lmc)); + ext_config.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG(*lmc)); + bank_width = cvmx_dram_get_num_bank_bits(priv, *lmc); + + /* Extract additional info from the LMC_CONFIG CSR */ + dimm_lsb = 28 + lmcx_config.s.pbank_lsb + xbits; + dimm_width = 40 - dimm_lsb; + prank_lsb = dimm_lsb - lmcx_config.s.rank_ena; + prank_width = dimm_lsb - prank_lsb; + lrank_lsb = prank_lsb - ext_config.s.dimm0_cid; + lrank_width = prank_lsb - lrank_lsb; + row_lsb = 14 + lmcx_config.s.row_lsb + xbits; + row_width = lrank_lsb - row_lsb; + col_hi_lsb = bank_lsb + bank_width; + col_hi_width = row_lsb - col_hi_lsb; + + /* Extract the parts of the address */ + *dimm = EXTRACT(address, dimm_lsb, dimm_width); + *prank = EXTRACT(address, prank_lsb, prank_width); + *lrank = EXTRACT(address, lrank_lsb, lrank_width); + *row = EXTRACT(address, row_lsb, row_width); + + /* bank calculation may be aliased... */ + lmcx_control.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(*lmc)); + if (lmcx_control.s.xor_bank) { + *bank = EXTRACT(address, bank_lsb, bank_width) ^ + EXTRACT(address, 12 + xbits, bank_width); + } else { + *bank = EXTRACT(address, bank_lsb, bank_width); + } + + /* LMC number already extracted */ + col_hi = EXTRACT(address, col_hi_lsb, col_hi_width); + *col = EXTRACT(address, 3, 4) | (col_hi << 4); + /* Bus byte is address bits [2:0]. Unused here */ +} + +// end of added workarounds + +// NOTE: "mode" argument: +// DBTRAIN_TEST: for testing using GP patterns, includes ECC +// DBTRAIN_DBI: for DBI deskew training behavior (uses GP patterns) +// DBTRAIN_LFSR: for testing using LFSR patterns, includes ECC +// NOTE: trust the caller to specify the correct/supported mode +// +static int test_dram_byte_hw(struct ddr_priv *priv, int if_num, u64 p, + int mode, u64 *xor_data) +{ + u64 p1; + u64 k; + int errors = 0; + + u64 mpr_data0, mpr_data1; + u64 bad_bits[2] = { 0, 0 }; + + int node_address, lmc, dimm; + int prank, lrank; + int bank, row, col; + int save_or_dis; + int byte; + int ba_loop, ba_bits; + + union cvmx_lmcx_rlevel_ctl rlevel_ctl; + union cvmx_lmcx_dbtrain_ctl dbtrain_ctl; + union cvmx_lmcx_phy_ctl phy_ctl; + + int biter_errs; + + // FIXME: K iterations set to 4 for now. + // FIXME: decrement to increase interations. + // FIXME: must be no less than 22 to stay above an LMC hash field. + int kshift = 27; + + const char *s; + int node = 0; + + // allow override default setting for kshift + s = env_get("ddr_tune_set_kshift"); + if (s) { + int temp = simple_strtoul(s, NULL, 0); + + if (temp < 22 || temp > 28) { + debug("N%d.LMC%d: ILLEGAL override of kshift to %d, using default %d\n", + node, if_num, temp, kshift); + } else { + debug("N%d.LMC%d: overriding kshift (%d) to %d\n", + node, if_num, kshift, temp); + kshift = temp; + } + } + + /* + * 1) Make sure that RLEVEL_CTL[OR_DIS] = 0. + */ + rlevel_ctl.u64 = lmc_rd(priv, CVMX_LMCX_RLEVEL_CTL(if_num)); + save_or_dis = rlevel_ctl.s.or_dis; + /* or_dis must be disabled for this sequence */ + rlevel_ctl.s.or_dis = 0; + lmc_wr(priv, CVMX_LMCX_RLEVEL_CTL(if_num), rlevel_ctl.u64); + + /* + * NOTE: this step done in the calling routine(s)... + * 3) Setup GENERAL_PURPOSE[0-2] registers with the data pattern + * of choice. + * a. GENERAL_PURPOSE0[DATA<63:0>] – sets the initial lower + * (rising edge) 64 bits of data. + * b. GENERAL_PURPOSE1[DATA<63:0>] – sets the initial upper + * (falling edge) 64 bits of data. + * c. GENERAL_PURPOSE2[DATA<15:0>] – sets the initial lower + * (rising edge <7:0>) and upper (falling edge <15:8>) ECC data. + */ + + // final address must include LMC and node + p |= (if_num << 7); /* Map address into proper interface */ + p |= (u64)node << CVMX_NODE_MEM_SHIFT; // map to node + + /* + * Add base offset to both test regions to not clobber u-boot stuff + * when running from L2 for NAND boot. + */ + p += 0x20000000; // offset to 512MB, ie above THE HOLE!!! + p |= 1ull << 63; // needed for OCTEON + + errors = 0; + + cvmx_dram_address_extract_info(priv, p, &node_address, &lmc, &dimm, + &prank, &lrank, &bank, &row, &col); + debug("%s: START at A:0x%012llx, N%d L%d D%d/%d R%d B%1x Row:%05x Col:%05x\n", + __func__, p, node_address, lmc, dimm, prank, lrank, bank, + row, col); + + // only check once per call, and ignore if no match... + if ((int)node != node_address) { + printf("ERROR: Node address mismatch\n"); + return 0; + } + if (lmc != if_num) { + printf("ERROR: LMC address mismatch\n"); + return 0; + } + + /* + * 7) Set PHY_CTL[PHY_RESET] = 1 (LMC automatically clears this as + * it’s a one-shot operation). This is to get into the habit of + * resetting PHY’s SILO to the original 0 location. + */ + phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num)); + phy_ctl.s.phy_reset = 1; + lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64); + + /* + * Walk through a range of addresses avoiding bits that alias + * interfaces on the CN88XX. + */ + + // FIXME: want to try to keep the K increment from affecting the + // LMC via hash, so keep it above bit 21 we also want to keep k + // less than the base offset of bit 29 (512MB) + + for (k = 0; k < (1UL << 29); k += (1UL << kshift)) { + // FIXME: the sequence will interate over 1/2 cacheline + // FIXME: for each unit specified in "read_cmd_count", + // FIXME: so, we setup each sequence to do the max cachelines + // it can + + p1 = p + k; + + cvmx_dram_address_extract_info(priv, p1, &node_address, &lmc, + &dimm, &prank, &lrank, &bank, + &row, &col); + + /* + * 2) Setup the fields of the CSR DBTRAIN_CTL as follows: + * a. COL, ROW, BA, BG, PRANK points to the starting point + * of the address. + * You can just set them to all 0. + * b. RW_TRAIN – set this to 1. + * c. TCCD_L – set this to 0. + * d. READ_CMD_COUNT – instruct the sequence to the how many + * writes/reads. + * It is 5 bits field, so set to 31 of maximum # of r/w. + */ + dbtrain_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DBTRAIN_CTL(if_num)); + dbtrain_ctl.s.column_a = col; + dbtrain_ctl.s.row_a = row; + dbtrain_ctl.s.bg = (bank >> 2) & 3; + dbtrain_ctl.s.prank = (dimm * 2) + prank; // FIXME? + dbtrain_ctl.s.lrank = lrank; // FIXME? + dbtrain_ctl.s.activate = (mode == DBTRAIN_DBI); + dbtrain_ctl.s.write_ena = 1; + dbtrain_ctl.s.read_cmd_count = 31; // max count pass 1.x + if (octeon_is_cpuid(OCTEON_CN78XX_PASS2_X) || + octeon_is_cpuid(OCTEON_CNF75XX)) { + // max count on chips that support it + dbtrain_ctl.s.cmd_count_ext = 3; + } else { + // max count pass 1.x + dbtrain_ctl.s.cmd_count_ext = 0; + } + + dbtrain_ctl.s.rw_train = 1; + dbtrain_ctl.s.tccd_sel = (mode == DBTRAIN_DBI); + // LFSR should only be on when chip supports it... + dbtrain_ctl.s.lfsr_pattern_sel = (mode == DBTRAIN_LFSR) ? 1 : 0; + + biter_errs = 0; + + // for each address, iterate over the 4 "banks" in the BA + for (ba_loop = 0, ba_bits = bank & 3; + ba_loop < 4; ba_loop++, ba_bits = (ba_bits + 1) & 3) { + dbtrain_ctl.s.ba = ba_bits; + lmc_wr(priv, CVMX_LMCX_DBTRAIN_CTL(if_num), + dbtrain_ctl.u64); + + /* + * We will use the RW_TRAINING sequence (14) for + * this task. + * + * 4) Kick off the sequence (SEQ_CTL[SEQ_SEL] = 14, + * SEQ_CTL[INIT_START] = 1). + * 5) Poll on SEQ_CTL[SEQ_COMPLETE] for completion. + */ + oct3_ddr3_seq(priv, prank, if_num, 14); + + /* + * 6) Read MPR_DATA0 and MPR_DATA1 for results. + * a. MPR_DATA0[MPR_DATA<63:0>] – comparison results + * for DQ63:DQ0. (1 means MATCH, 0 means FAIL). + * b. MPR_DATA1[MPR_DATA<7:0>] – comparison results + * for ECC bit7:0. + */ + mpr_data0 = lmc_rd(priv, CVMX_LMCX_MPR_DATA0(if_num)); + mpr_data1 = lmc_rd(priv, CVMX_LMCX_MPR_DATA1(if_num)); + + /* + * 7) Set PHY_CTL[PHY_RESET] = 1 (LMC automatically + * clears this as it’s a one-shot operation). + * This is to get into the habit of resetting PHY’s + * SILO to the original 0 location. + */ + phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num)); + phy_ctl.s.phy_reset = 1; + lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64); + + // bypass any error checking or updating when DBI mode + if (mode == DBTRAIN_DBI) + continue; + + // data bytes + if (~mpr_data0) { + for (byte = 0; byte < 8; byte++) { + if ((~mpr_data0 >> (8 * byte)) & 0xffUL) + biter_errs |= (1 << byte); + } + // accumulate bad bits + bad_bits[0] |= ~mpr_data0; + } + + // include ECC byte errors + if (~mpr_data1 & 0xffUL) { + biter_errs |= (1 << 8); + bad_bits[1] |= ~mpr_data1 & 0xffUL; + } + } + + errors |= biter_errs; + } /* end for (k=...) */ + + rlevel_ctl.s.or_dis = save_or_dis; + lmc_wr(priv, CVMX_LMCX_RLEVEL_CTL(if_num), rlevel_ctl.u64); + + // send the bad bits back... + if (mode != DBTRAIN_DBI && xor_data) { + xor_data[0] = bad_bits[0]; + xor_data[1] = bad_bits[1]; + } + + return errors; +} + +// setup default for byte test pattern array +// take these from the HRM section 6.9.13 +static const u64 byte_pattern_0[] = { + 0xFFAAFFFFFF55FFFFULL, // GP0 + 0x55555555AAAAAAAAULL, // GP1 + 0xAA55AAAAULL, // GP2 +}; + +static const u64 byte_pattern_1[] = { + 0xFBF7EFDFBF7FFEFDULL, // GP0 + 0x0F1E3C78F0E1C387ULL, // GP1 + 0xF0E1BF7FULL, // GP2 +}; + +// this is from Andrew via LFSR with PRBS=0xFFFFAAAA +static const u64 byte_pattern_2[] = { + 0xEE55AADDEE55AADDULL, // GP0 + 0x55AADDEE55AADDEEULL, // GP1 + 0x55EEULL, // GP2 +}; + +// this is from Mike via LFSR with PRBS=0x4A519909 +static const u64 byte_pattern_3[] = { + 0x0088CCEE0088CCEEULL, // GP0 + 0xBB552211BB552211ULL, // GP1 + 0xBB00ULL, // GP2 +}; + +static const u64 *byte_patterns[4] = { + byte_pattern_0, byte_pattern_1, byte_pattern_2, byte_pattern_3 +}; + +static const u32 lfsr_patterns[4] = { + 0xFFFFAAAAUL, 0x06000000UL, 0xAAAAFFFFUL, 0x4A519909UL +}; + +#define NUM_BYTE_PATTERNS 4 + +#define DEFAULT_BYTE_BURSTS 32 // compromise between time and rigor + +static void setup_hw_pattern(struct ddr_priv *priv, int lmc, + const u64 *pattern_p) +{ + /* + * 3) Setup GENERAL_PURPOSE[0-2] registers with the data pattern + * of choice. + * a. GENERAL_PURPOSE0[DATA<63:0>] – sets the initial lower + * (rising edge) 64 bits of data. + * b. GENERAL_PURPOSE1[DATA<63:0>] – sets the initial upper + * (falling edge) 64 bits of data. + * c. GENERAL_PURPOSE2[DATA<15:0>] – sets the initial lower + * (rising edge <7:0>) and upper + * (falling edge <15:8>) ECC data. + */ + lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE0(lmc), pattern_p[0]); + lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE1(lmc), pattern_p[1]); + lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE2(lmc), pattern_p[2]); +} + +static void setup_lfsr_pattern(struct ddr_priv *priv, int lmc, u32 data) +{ + union cvmx_lmcx_char_ctl char_ctl; + u32 prbs; + const char *s; + + s = env_get("ddr_lfsr_prbs"); + if (s) + prbs = simple_strtoul(s, NULL, 0); + else + prbs = data; + + /* + * 2) DBTRAIN_CTL[LFSR_PATTERN_SEL] = 1 + * here data comes from the LFSR generating a PRBS pattern + * CHAR_CTL.EN = 0 + * CHAR_CTL.SEL = 0; // for PRBS + * CHAR_CTL.DR = 1; + * CHAR_CTL.PRBS = setup for whatever type of PRBS to send + * CHAR_CTL.SKEW_ON = 1; + */ + char_ctl.u64 = lmc_rd(priv, CVMX_LMCX_CHAR_CTL(lmc)); + char_ctl.s.en = 0; + char_ctl.s.sel = 0; + char_ctl.s.dr = 1; + char_ctl.s.prbs = prbs; + char_ctl.s.skew_on = 1; + lmc_wr(priv, CVMX_LMCX_CHAR_CTL(lmc), char_ctl.u64); +} + +static int choose_best_hw_patterns(int lmc, int mode) +{ + int new_mode = mode; + const char *s; + + switch (mode) { + case DBTRAIN_TEST: // always choose LFSR if chip supports it + if (octeon_is_cpuid(OCTEON_CN78XX_PASS2_X)) { + int lfsr_enable = 1; + + s = env_get("ddr_allow_lfsr"); + if (s) { + // override? + lfsr_enable = !!strtoul(s, NULL, 0); + } + + if (lfsr_enable) + new_mode = DBTRAIN_LFSR; + } + break; + + case DBTRAIN_DBI: // possibly can allow LFSR use? + break; + + case DBTRAIN_LFSR: // forced already + if (!octeon_is_cpuid(OCTEON_CN78XX_PASS2_X)) { + debug("ERROR: illegal HW assist mode %d\n", mode); + new_mode = DBTRAIN_TEST; + } + break; + + default: + debug("ERROR: unknown HW assist mode %d\n", mode); + } + + if (new_mode != mode) + debug("%s: changing mode %d to %d\n", __func__, mode, new_mode); + + return new_mode; +} + +int run_best_hw_patterns(struct ddr_priv *priv, int lmc, u64 phys_addr, + int mode, u64 *xor_data) +{ + int pattern; + const u64 *pattern_p; + int errs, errors = 0; + + // FIXME? always choose LFSR if chip supports it??? + mode = choose_best_hw_patterns(lmc, mode); + + for (pattern = 0; pattern < NUM_BYTE_PATTERNS; pattern++) { + if (mode == DBTRAIN_LFSR) { + setup_lfsr_pattern(priv, lmc, lfsr_patterns[pattern]); + } else { + pattern_p = byte_patterns[pattern]; + setup_hw_pattern(priv, lmc, pattern_p); + } + errs = test_dram_byte_hw(priv, lmc, phys_addr, mode, xor_data); + + debug("%s: PATTERN %d at A:0x%012llx errors 0x%x\n", + __func__, pattern, phys_addr, errs); + + errors |= errs; + } + + return errors; +} + +static void hw_assist_test_dll_offset(struct ddr_priv *priv, + int dll_offset_mode, int lmc, + int bytelane, + int if_64b, + u64 dram_tune_rank_offset, + int dram_tune_byte_bursts) +{ + int byte_offset, new_best_offset[9]; + int rank_delay_start[4][9]; + int rank_delay_count[4][9]; + int rank_delay_best_start[4][9]; + int rank_delay_best_count[4][9]; + int errors[4], off_errors, tot_errors; + int rank_mask, rankx, active_ranks; + int pattern; + const u64 *pattern_p; + int byte; + char *mode_str = (dll_offset_mode == 2) ? "Read" : "Write"; + int pat_best_offset[9]; + u64 phys_addr; + int pat_beg, pat_end; + int rank_beg, rank_end; + int byte_lo, byte_hi; + union cvmx_lmcx_config lmcx_config; + u64 hw_rank_offset; + int num_lmcs = cvmx_dram_get_num_lmc(priv); + // FIXME? always choose LFSR if chip supports it??? + int mode = choose_best_hw_patterns(lmc, DBTRAIN_TEST); + int node = 0; + + if (bytelane == 0x0A) { // all bytelanes + byte_lo = 0; + byte_hi = 8; + } else { // just 1 + byte_lo = bytelane; + byte_hi = bytelane; + } + + lmcx_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(lmc)); + rank_mask = lmcx_config.s.init_status; + + // this should be correct for 1 or 2 ranks, 1 or 2 DIMMs + hw_rank_offset = + 1ull << (28 + lmcx_config.s.pbank_lsb - lmcx_config.s.rank_ena + + (num_lmcs / 2)); + + debug("N%d: %s: starting LMC%d with rank offset 0x%016llx\n", + node, __func__, lmc, (unsigned long long)hw_rank_offset); + + // start of pattern loop + // we do the set of tests for each pattern supplied... + + memset(new_best_offset, 0, sizeof(new_best_offset)); + for (pattern = 0; pattern < NUM_BYTE_PATTERNS; pattern++) { + memset(pat_best_offset, 0, sizeof(pat_best_offset)); + + if (mode == DBTRAIN_TEST) { + pattern_p = byte_patterns[pattern]; + setup_hw_pattern(priv, lmc, pattern_p); + } else { + setup_lfsr_pattern(priv, lmc, lfsr_patterns[pattern]); + } + + // now loop through all legal values for the DLL byte offset... + +#define BYTE_OFFSET_INCR 3 // FIXME: make this tunable? + + tot_errors = 0; + + memset(rank_delay_count, 0, sizeof(rank_delay_count)); + memset(rank_delay_start, 0, sizeof(rank_delay_start)); + memset(rank_delay_best_count, 0, sizeof(rank_delay_best_count)); + memset(rank_delay_best_start, 0, sizeof(rank_delay_best_start)); + + for (byte_offset = -63; byte_offset < 64; + byte_offset += BYTE_OFFSET_INCR) { + // do the setup on the active LMC + // set the bytelanes DLL offsets + change_dll_offset_enable(priv, lmc, 0); + // FIXME? bytelane? + load_dll_offset(priv, lmc, dll_offset_mode, + byte_offset, bytelane); + change_dll_offset_enable(priv, lmc, 1); + + //bdk_watchdog_poke(); + + // run the test on each rank + // only 1 call per rank should be enough, let the + // bursts, loops, etc, control the load... + + // errors for this byte_offset, all ranks + off_errors = 0; + + active_ranks = 0; + + for (rankx = 0; rankx < 4; rankx++) { + if (!(rank_mask & (1 << rankx))) + continue; + + phys_addr = hw_rank_offset * active_ranks; + // FIXME: now done by test_dram_byte_hw() + //phys_addr |= (lmc << 7); + //phys_addr |= (u64)node << CVMX_NODE_MEM_SHIFT; + + active_ranks++; + + // NOTE: return is a now a bitmask of the + // erroring bytelanes. + errors[rankx] = + test_dram_byte_hw(priv, lmc, phys_addr, + mode, NULL); + + // process any errors in the bytelane(s) that + // are being tested + for (byte = byte_lo; byte <= byte_hi; byte++) { + // check errors + // yes, an error in the byte lane in + // this rank + if (errors[rankx] & (1 << byte)) { + off_errors |= (1 << byte); + + debug("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test %3d: Address 0x%012llx errors\n", + node, lmc, rankx, byte, + mode_str, byte_offset, + phys_addr); + + // had started run + if (rank_delay_count + [rankx][byte] > 0) { + debug("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test %3d: stopping a run here\n", + node, lmc, rankx, + byte, mode_str, + byte_offset); + // stop now + rank_delay_count + [rankx][byte] = + 0; + } + // FIXME: else had not started + // run - nothing else to do? + } else { + // no error in the byte lane + // first success, set run start + if (rank_delay_count[rankx] + [byte] == 0) { + debug("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test %3d: starting a run here\n", + node, lmc, rankx, + byte, mode_str, + byte_offset); + rank_delay_start[rankx] + [byte] = + byte_offset; + } + // bump run length + rank_delay_count[rankx][byte] + += BYTE_OFFSET_INCR; + + // is this now the biggest + // window? + if (rank_delay_count[rankx] + [byte] > + rank_delay_best_count[rankx] + [byte]) { + rank_delay_best_count + [rankx][byte] = + rank_delay_count + [rankx][byte]; + rank_delay_best_start + [rankx][byte] = + rank_delay_start + [rankx][byte]; + debug("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test %3d: updating best to %d/%d\n", + node, lmc, rankx, + byte, mode_str, + byte_offset, + rank_delay_best_start + [rankx][byte], + rank_delay_best_count + [rankx][byte]); + } + } + } + } /* for (rankx = 0; rankx < 4; rankx++) */ + + tot_errors |= off_errors; + } + + // set the bytelanes DLL offsets all back to 0 + change_dll_offset_enable(priv, lmc, 0); + load_dll_offset(priv, lmc, dll_offset_mode, 0, bytelane); + change_dll_offset_enable(priv, lmc, 1); + + // now choose the best byte_offsets for this pattern + // according to the best windows of the tested ranks + // calculate offset by constructing an average window + // from the rank windows + for (byte = byte_lo; byte <= byte_hi; byte++) { + pat_beg = -999; + pat_end = 999; + + for (rankx = 0; rankx < 4; rankx++) { + if (!(rank_mask & (1 << rankx))) + continue; + + rank_beg = rank_delay_best_start[rankx][byte]; + pat_beg = max(pat_beg, rank_beg); + rank_end = rank_beg + + rank_delay_best_count[rankx][byte] - + BYTE_OFFSET_INCR; + pat_end = min(pat_end, rank_end); + + debug("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test: Rank Window %3d:%3d\n", + node, lmc, rankx, byte, mode_str, + rank_beg, rank_end); + + } /* for (rankx = 0; rankx < 4; rankx++) */ + + pat_best_offset[byte] = (pat_end + pat_beg) / 2; + + // sum the pattern averages + new_best_offset[byte] += pat_best_offset[byte]; + } + + // now print them on 1 line, descending order... + debug("N%d.LMC%d: HW DLL %s Offset Pattern %d :", + node, lmc, mode_str, pattern); + for (byte = byte_hi; byte >= byte_lo; --byte) + debug(" %4d", pat_best_offset[byte]); + debug("\n"); + } + // end of pattern loop + + debug("N%d.LMC%d: HW DLL %s Offset Average : ", node, lmc, mode_str); + + // print in decending byte index order + for (byte = byte_hi; byte >= byte_lo; --byte) { + // create the new average NINT + new_best_offset[byte] = divide_nint(new_best_offset[byte], + NUM_BYTE_PATTERNS); + + // print the best offsets from all patterns + + // print just the offset of all the bytes + if (bytelane == 0x0A) + debug("%4d ", new_best_offset[byte]); + else // print the bytelanes also + debug("(byte %d) %4d ", byte, new_best_offset[byte]); + + // done with testing, load up the best offsets we found... + // disable offsets while we load... + change_dll_offset_enable(priv, lmc, 0); + load_dll_offset(priv, lmc, dll_offset_mode, + new_best_offset[byte], byte); + // re-enable the offsets now that we are done loading + change_dll_offset_enable(priv, lmc, 1); + } + + debug("\n"); +} + +/* + * Automatically adjust the DLL offset for the selected bytelane using + * hardware-assist + */ +static int perform_HW_dll_offset_tuning(struct ddr_priv *priv, + int dll_offset_mode, int bytelane) +{ + int if_64b; + int save_ecc_ena[4]; + union cvmx_lmcx_config lmc_config; + int lmc, num_lmcs = cvmx_dram_get_num_lmc(priv); + const char *s; + int loops = 1, loop; + int by; + u64 dram_tune_rank_offset; + int dram_tune_byte_bursts = DEFAULT_BYTE_BURSTS; + int node = 0; + + // see if we want to do the tuning more than once per LMC... + s = env_get("ddr_tune_ecc_loops"); + if (s) + loops = strtoul(s, NULL, 0); + + // allow override of the test repeats (bursts) + s = env_get("ddr_tune_byte_bursts"); + if (s) + dram_tune_byte_bursts = strtoul(s, NULL, 10); + + // print current working values + debug("N%d: H/W Tuning for bytelane %d will use %d loops, %d bursts, and %d patterns.\n", + node, bytelane, loops, dram_tune_byte_bursts, NUM_BYTE_PATTERNS); + + // FIXME? get flag from LMC0 only + lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(0)); + if_64b = !lmc_config.s.mode32b; + + // this should be correct for 1 or 2 ranks, 1 or 2 DIMMs + dram_tune_rank_offset = + 1ull << (28 + lmc_config.s.pbank_lsb - lmc_config.s.rank_ena + + (num_lmcs / 2)); + + // do once for each active LMC + + for (lmc = 0; lmc < num_lmcs; lmc++) { + debug("N%d: H/W Tuning: starting LMC%d bytelane %d tune.\n", + node, lmc, bytelane); + + /* Enable ECC for the HW tests */ + // NOTE: we do enable ECC, but the HW tests used will not + // generate "visible" errors + lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(lmc)); + save_ecc_ena[lmc] = lmc_config.s.ecc_ena; + lmc_config.s.ecc_ena = 1; + lmc_wr(priv, CVMX_LMCX_CONFIG(lmc), lmc_config.u64); + lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(lmc)); + + // testing is done on a single LMC at a time + // FIXME: for now, loop here to show what happens multiple times + for (loop = 0; loop < loops; loop++) { + /* Perform DLL offset tuning */ + hw_assist_test_dll_offset(priv, 2 /* 2=read */, lmc, + bytelane, + if_64b, dram_tune_rank_offset, + dram_tune_byte_bursts); + } + + // perform cleanup on active LMC + debug("N%d: H/W Tuning: finishing LMC%d bytelane %d tune.\n", + node, lmc, bytelane); + + /* Restore ECC for DRAM tests */ + lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(lmc)); + lmc_config.s.ecc_ena = save_ecc_ena[lmc]; + lmc_wr(priv, CVMX_LMCX_CONFIG(lmc), lmc_config.u64); + lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(lmc)); + + // finally, see if there are any read offset overrides + // after tuning + for (by = 0; by < 9; by++) { + s = lookup_env(priv, "ddr%d_tune_byte%d", lmc, by); + if (s) { + int dllro = strtoul(s, NULL, 10); + + change_dll_offset_enable(priv, lmc, 0); + load_dll_offset(priv, lmc, 2, dllro, by); + change_dll_offset_enable(priv, lmc, 1); + } + } + + } /* for (lmc = 0; lmc < num_lmcs; lmc++) */ + + // finish up... + + return 0; + +} /* perform_HW_dll_offset_tuning */ + +// this routine simply makes the calls to the tuning routine and returns +// any errors +static int cvmx_tune_node(struct ddr_priv *priv) +{ + int errs, tot_errs; + int do_dllwo = 0; // default to NO + const char *str; + int node = 0; + + // Automatically tune the data and ECC byte DLL read offsets + debug("N%d: Starting DLL Read Offset Tuning for LMCs\n", node); + errs = perform_HW_dll_offset_tuning(priv, 2, 0x0A /* all bytelanes */); + debug("N%d: Finished DLL Read Offset Tuning for LMCs, %d errors\n", + node, errs); + tot_errs = errs; + + // disabled by default for now, does not seem to be needed? + // Automatically tune the data and ECC byte DLL write offsets + // allow override of default setting + str = env_get("ddr_tune_write_offsets"); + if (str) + do_dllwo = !!strtoul(str, NULL, 0); + if (do_dllwo) { + debug("N%d: Starting DLL Write Offset Tuning for LMCs\n", node); + errs = + perform_HW_dll_offset_tuning(priv, 1, + 0x0A /* all bytelanes */); + debug("N%d: Finished DLL Write Offset Tuning for LMCs, %d errors\n", + node, errs); + tot_errs += errs; + } + + return tot_errs; +} + +// this routine makes the calls to the tuning routines when criteria are met +// intended to be called for automated tuning, to apply filtering... + +#define IS_DDR4 1 +#define IS_DDR3 0 +#define IS_RDIMM 1 +#define IS_UDIMM 0 +#define IS_1SLOT 1 +#define IS_2SLOT 0 + +// FIXME: DDR3 is not tuned +static const u32 ddr_speed_filter[2][2][2] = { + [IS_DDR4] = { + [IS_RDIMM] = { + [IS_1SLOT] = 940, + [IS_2SLOT] = 800}, + [IS_UDIMM] = { + [IS_1SLOT] = 1050, + [IS_2SLOT] = 940}, + }, + [IS_DDR3] = { + [IS_RDIMM] = { + [IS_1SLOT] = 0, // disabled + [IS_2SLOT] = 0 // disabled + }, + [IS_UDIMM] = { + [IS_1SLOT] = 0, // disabled + [IS_2SLOT] = 0 // disabled + } + } +}; + +void cvmx_maybe_tune_node(struct ddr_priv *priv, u32 ddr_speed) +{ + const char *s; + union cvmx_lmcx_config lmc_config; + union cvmx_lmcx_control lmc_control; + union cvmx_lmcx_ddr_pll_ctl lmc_ddr_pll_ctl; + int is_ddr4; + int is_rdimm; + int is_1slot; + int do_tune = 0; + u32 ddr_min_speed; + int node = 0; + + // scale it down from Hz to MHz + ddr_speed = divide_nint(ddr_speed, 1000000); + + // FIXME: allow an override here so that all configs can be tuned + // or none + // If the envvar is defined, always either force it or avoid it + // accordingly + s = env_get("ddr_tune_all_configs"); + if (s) { + do_tune = !!strtoul(s, NULL, 0); + printf("N%d: DRAM auto-tuning %s.\n", node, + (do_tune) ? "forced" : "disabled"); + if (do_tune) + cvmx_tune_node(priv); + + return; + } + + // filter the tuning calls here... + // determine if we should/can run automatically for this configuration + // + // FIXME: tune only when the configuration indicates it will help: + // DDR type, RDIMM or UDIMM, 1-slot or 2-slot, and speed + // + lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(0)); // sample LMC0 + lmc_control.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(0)); // sample LMC0 + // sample LMC0 + lmc_ddr_pll_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(0)); + + is_ddr4 = (lmc_ddr_pll_ctl.s.ddr4_mode != 0); + is_rdimm = (lmc_control.s.rdimm_ena != 0); + // HACK, should do better + is_1slot = (lmc_config.s.init_status < 4); + + ddr_min_speed = ddr_speed_filter[is_ddr4][is_rdimm][is_1slot]; + do_tune = ((ddr_min_speed != 0) && (ddr_speed > ddr_min_speed)); + + debug("N%d: DDR%d %cDIMM %d-slot at %d MHz %s eligible for auto-tuning.\n", + node, (is_ddr4) ? 4 : 3, (is_rdimm) ? 'R' : 'U', + (is_1slot) ? 1 : 2, ddr_speed, (do_tune) ? "is" : "is not"); + + // call the tuning routine, filtering is done... + if (do_tune) + cvmx_tune_node(priv); +} + +/* + * first pattern example: + * GENERAL_PURPOSE0.DATA == 64'h00ff00ff00ff00ff; + * GENERAL_PURPOSE1.DATA == 64'h00ff00ff00ff00ff; + * GENERAL_PURPOSE0.DATA == 16'h0000; + */ + +static const u64 dbi_pattern[3] = { + 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL, 0x0000ULL }; + +// Perform switchover to DBI +static void cvmx_dbi_switchover_interface(struct ddr_priv *priv, int lmc) +{ + union cvmx_lmcx_modereg_params0 modereg_params0; + union cvmx_lmcx_modereg_params3 modereg_params3; + union cvmx_lmcx_phy_ctl phy_ctl; + union cvmx_lmcx_config lmcx_config; + union cvmx_lmcx_ddr_pll_ctl ddr_pll_ctl; + int rank_mask, rankx, active_ranks; + u64 phys_addr, rank_offset; + int num_lmcs, errors; + int dbi_settings[9], byte, unlocked, retries; + int ecc_ena; + int rank_max = 1; // FIXME: make this 4 to try all the ranks + int node = 0; + + ddr_pll_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(0)); + + lmcx_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(lmc)); + rank_mask = lmcx_config.s.init_status; + ecc_ena = lmcx_config.s.ecc_ena; + + // FIXME: must filter out any non-supported configs + // ie, no DDR3, no x4 devices + if (ddr_pll_ctl.s.ddr4_mode == 0 || lmcx_config.s.mode_x4dev == 1) { + debug("N%d.LMC%d: DBI switchover: inappropriate device; EXITING...\n", + node, lmc); + return; + } + + // this should be correct for 1 or 2 ranks, 1 or 2 DIMMs + num_lmcs = cvmx_dram_get_num_lmc(priv); + rank_offset = 1ull << (28 + lmcx_config.s.pbank_lsb - + lmcx_config.s.rank_ena + (num_lmcs / 2)); + + debug("N%d.LMC%d: DBI switchover: rank mask 0x%x, rank size 0x%016llx.\n", + node, lmc, rank_mask, (unsigned long long)rank_offset); + + /* + * 1. conduct the current init sequence as usual all the way + * after software write leveling. + */ + + read_dac_dbi_settings(priv, lmc, /*DBI*/ 0, dbi_settings); + + display_dac_dbi_settings(lmc, /*DBI*/ 0, ecc_ena, dbi_settings, + " INIT"); + + /* + * 2. set DBI related CSRs as below and issue MR write. + * MODEREG_PARAMS3.WR_DBI=1 + * MODEREG_PARAMS3.RD_DBI=1 + * PHY_CTL.DBI_MODE_ENA=1 + */ + modereg_params0.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(lmc)); + + modereg_params3.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS3(lmc)); + modereg_params3.s.wr_dbi = 1; + modereg_params3.s.rd_dbi = 1; + lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS3(lmc), modereg_params3.u64); + + phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(lmc)); + phy_ctl.s.dbi_mode_ena = 1; + lmc_wr(priv, CVMX_LMCX_PHY_CTL(lmc), phy_ctl.u64); + + /* + * there are two options for data to send. Lets start with (1) + * and could move to (2) in the future: + * + * 1) DBTRAIN_CTL[LFSR_PATTERN_SEL] = 0 (or for older chips where + * this does not exist) set data directly in these reigsters. + * this will yield a clk/2 pattern: + * GENERAL_PURPOSE0.DATA == 64'h00ff00ff00ff00ff; + * GENERAL_PURPOSE1.DATA == 64'h00ff00ff00ff00ff; + * GENERAL_PURPOSE0.DATA == 16'h0000; + * 2) DBTRAIN_CTL[LFSR_PATTERN_SEL] = 1 + * here data comes from the LFSR generating a PRBS pattern + * CHAR_CTL.EN = 0 + * CHAR_CTL.SEL = 0; // for PRBS + * CHAR_CTL.DR = 1; + * CHAR_CTL.PRBS = setup for whatever type of PRBS to send + * CHAR_CTL.SKEW_ON = 1; + */ + lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE0(lmc), dbi_pattern[0]); + lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE1(lmc), dbi_pattern[1]); + lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE2(lmc), dbi_pattern[2]); + + /* + * 3. adjust cas_latency (only necessary if RD_DBI is set). + * here is my code for doing this: + * + * if (csr_model.MODEREG_PARAMS3.RD_DBI.value == 1) begin + * case (csr_model.MODEREG_PARAMS0.CL.value) + * 0,1,2,3,4: csr_model.MODEREG_PARAMS0.CL.value += 2; + * // CL 9-13 -> 11-15 + * 5: begin + * // CL=14, CWL=10,12 gets +2, CLW=11,14 gets +3 + * if((csr_model.MODEREG_PARAMS0.CWL.value==1 || + * csr_model.MODEREG_PARAMS0.CWL.value==3)) + * csr_model.MODEREG_PARAMS0.CL.value = 7; // 14->16 + * else + * csr_model.MODEREG_PARAMS0.CL.value = 13; // 14->17 + * end + * 6: csr_model.MODEREG_PARAMS0.CL.value = 8; // 15->18 + * 7: csr_model.MODEREG_PARAMS0.CL.value = 14; // 16->19 + * 8: csr_model.MODEREG_PARAMS0.CL.value = 15; // 18->21 + * default: + * `cn_fatal(("Error mem_cfg (%s) CL (%d) with RD_DBI=1, + * I am not sure what to do.", + * mem_cfg, csr_model.MODEREG_PARAMS3.RD_DBI.value)) + * endcase + * end + */ + + if (modereg_params3.s.rd_dbi == 1) { + int old_cl, new_cl, old_cwl; + + old_cl = modereg_params0.s.cl; + old_cwl = modereg_params0.s.cwl; + + switch (old_cl) { + case 0: + case 1: + case 2: + case 3: + case 4: + new_cl = old_cl + 2; + break; // 9-13->11-15 + // CL=14, CWL=10,12 gets +2, CLW=11,14 gets +3 + case 5: + new_cl = ((old_cwl == 1) || (old_cwl == 3)) ? 7 : 13; + break; + case 6: + new_cl = 8; + break; // 15->18 + case 7: + new_cl = 14; + break; // 16->19 + case 8: + new_cl = 15; + break; // 18->21 + default: + printf("ERROR: Bad CL value (%d) for DBI switchover.\n", + old_cl); + // FIXME: need to error exit here... + old_cl = -1; + new_cl = -1; + break; + } + debug("N%d.LMC%d: DBI switchover: CL ADJ: old_cl 0x%x, old_cwl 0x%x, new_cl 0x%x.\n", + node, lmc, old_cl, old_cwl, new_cl); + modereg_params0.s.cl = new_cl; + lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(lmc), + modereg_params0.u64); + } + + /* + * 4. issue MRW to MR0 (CL) and MR5 (DBI), using LMC sequence + * SEQ_CTL[SEQ_SEL] = MRW. + */ + // Use the default values, from the CSRs fields + // also, do B-sides for RDIMMs... + + for (rankx = 0; rankx < 4; rankx++) { + if (!(rank_mask & (1 << rankx))) + continue; + + // for RDIMMs, B-side writes should get done automatically + // when the A-side is written + ddr4_mrw(priv, lmc, rankx, -1 /* use_default */, + 0 /*MRreg */, 0 /*A-side */); /* MR0 */ + ddr4_mrw(priv, lmc, rankx, -1 /* use_default */, + 5 /*MRreg */, 0 /*A-side */); /* MR5 */ + } + + /* + * 5. conduct DBI bit deskew training via the General Purpose + * R/W sequence (dbtrain). may need to run this over and over to get + * a lock (I need up to 5 in simulation): + * SEQ_CTL[SEQ_SEL] = RW_TRAINING (15) + * DBTRAIN_CTL.CMD_COUNT_EXT = all 1's + * DBTRAIN_CTL.READ_CMD_COUNT = all 1's + * DBTRAIN_CTL.TCCD_SEL = set according to MODEREG_PARAMS3[TCCD_L] + * DBTRAIN_CTL.RW_TRAIN = 1 + * DBTRAIN_CTL.READ_DQ_COUNT = dont care + * DBTRAIN_CTL.WRITE_ENA = 1; + * DBTRAIN_CTL.ACTIVATE = 1; + * DBTRAIN_CTL LRANK, PRANK, ROW_A, BG, BA, COLUMN_A = set to a + * valid address + */ + + // NOW - do the training + debug("N%d.LMC%d: DBI switchover: TRAINING begins...\n", node, lmc); + + active_ranks = 0; + for (rankx = 0; rankx < rank_max; rankx++) { + if (!(rank_mask & (1 << rankx))) + continue; + + phys_addr = rank_offset * active_ranks; + // FIXME: now done by test_dram_byte_hw() + + active_ranks++; + + retries = 0; + +restart_training: + + // NOTE: return is a bitmask of the erroring bytelanes - + // we only print it + errors = + test_dram_byte_hw(priv, lmc, phys_addr, DBTRAIN_DBI, NULL); + + debug("N%d.LMC%d: DBI switchover: TEST: rank %d, phys_addr 0x%llx, errors 0x%x.\n", + node, lmc, rankx, (unsigned long long)phys_addr, errors); + + // NEXT - check for locking + unlocked = 0; + read_dac_dbi_settings(priv, lmc, /*DBI*/ 0, dbi_settings); + + for (byte = 0; byte < (8 + ecc_ena); byte++) + unlocked += (dbi_settings[byte] & 1) ^ 1; + + // FIXME: print out the DBI settings array after each rank? + if (rank_max > 1) // only when doing more than 1 rank + display_dac_dbi_settings(lmc, /*DBI*/ 0, ecc_ena, + dbi_settings, " RANK"); + + if (unlocked > 0) { + debug("N%d.LMC%d: DBI switchover: LOCK: %d still unlocked.\n", + node, lmc, unlocked); + retries++; + if (retries < 10) { + goto restart_training; + } else { + debug("N%d.LMC%d: DBI switchover: LOCK: %d retries exhausted.\n", + node, lmc, retries); + } + } + } /* for (rankx = 0; rankx < 4; rankx++) */ + + // print out the final DBI settings array + display_dac_dbi_settings(lmc, /*DBI*/ 0, ecc_ena, dbi_settings, + "FINAL"); +} + +void cvmx_dbi_switchover(struct ddr_priv *priv) +{ + int lmc; + int num_lmcs = cvmx_dram_get_num_lmc(priv); + + for (lmc = 0; lmc < num_lmcs; lmc++) + cvmx_dbi_switchover_interface(priv, lmc); +} diff --git a/drivers/ram/octeon/octeon_ddr.c b/drivers/ram/octeon/octeon_ddr.c new file mode 100644 index 0000000000..757436b9d3 --- /dev/null +++ b/drivers/ram/octeon/octeon_ddr.c @@ -0,0 +1,2728 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Marvell International Ltd. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +#define CONFIG_REF_HERTZ 50000000 + +DECLARE_GLOBAL_DATA_PTR; + +/* Sign of an integer */ +static s64 _sign(s64 v) +{ + return (v < 0); +} + +#ifndef DDR_NO_DEBUG +char *lookup_env(struct ddr_priv *priv, const char *format, ...) +{ + char *s; + unsigned long value; + va_list args; + char buffer[64]; + + va_start(args, format); + vsnprintf(buffer, sizeof(buffer), format, args); + va_end(args); + + s = ddr_getenv_debug(priv, buffer); + if (s) { + value = simple_strtoul(s, NULL, 0); + printf("Parameter found in environment %s=\"%s\" 0x%lx (%ld)\n", + buffer, s, value, value); + } + + return s; +} + +char *lookup_env_ull(struct ddr_priv *priv, const char *format, ...) +{ + char *s; + u64 value; + va_list args; + char buffer[64]; + + va_start(args, format); + vsnprintf(buffer, sizeof(buffer), format, args); + va_end(args); + + s = ddr_getenv_debug(priv, buffer); + if (s) { + value = simple_strtoull(s, NULL, 0); + printf("Parameter found in environment. %s = 0x%016llx\n", + buffer, value); + } + + return s; +} +#else +char *lookup_env(struct ddr_priv *priv, const char *format, ...) +{ + return NULL; +} + +char *lookup_env_ull(struct ddr_priv *priv, const char *format, ...) +{ + return NULL; +} +#endif + +/* Number of L2C Tag-and-data sections (TADs) that are connected to LMC. */ +#define CVMX_L2C_TADS ((OCTEON_IS_MODEL(OCTEON_CN68XX) || \ + OCTEON_IS_MODEL(OCTEON_CN73XX) || \ + OCTEON_IS_MODEL(OCTEON_CNF75XX)) ? 4 : \ + (OCTEON_IS_MODEL(OCTEON_CN78XX)) ? 8 : 1) + +/* Number of L2C IOBs connected to LMC. */ +#define CVMX_L2C_IOBS ((OCTEON_IS_MODEL(OCTEON_CN68XX) || \ + OCTEON_IS_MODEL(OCTEON_CN78XX) || \ + OCTEON_IS_MODEL(OCTEON_CN73XX) || \ + OCTEON_IS_MODEL(OCTEON_CNF75XX)) ? 2 : 1) + +#define CVMX_L2C_MAX_MEMSZ_ALLOWED (OCTEON_IS_OCTEON2() ? \ + (32 * CVMX_L2C_TADS) : \ + (OCTEON_IS_MODEL(OCTEON_CN70XX) ? \ + 512 : (OCTEON_IS_OCTEON3() ? 1024 : 0))) + +/** + * Initialize the BIG address in L2C+DRAM to generate proper error + * on reading/writing to an non-existent memory location. + * + * @param node OCX CPU node number + * @param mem_size Amount of DRAM configured in MB. + * @param mode Allow/Disallow reporting errors L2C_INT_SUM[BIGRD,BIGWR]. + */ +static void cvmx_l2c_set_big_size(struct ddr_priv *priv, u64 mem_size, int mode) +{ + if ((OCTEON_IS_OCTEON2() || OCTEON_IS_OCTEON3()) && + !OCTEON_IS_MODEL(OCTEON_CN63XX_PASS1_X)) { + union cvmx_l2c_big_ctl big_ctl; + int bits = 0, zero_bits = 0; + u64 mem; + + if (mem_size > (CVMX_L2C_MAX_MEMSZ_ALLOWED * 1024ull)) { + printf("WARNING: Invalid memory size(%lld) requested, should be <= %lld\n", + mem_size, + (u64)CVMX_L2C_MAX_MEMSZ_ALLOWED * 1024); + mem_size = CVMX_L2C_MAX_MEMSZ_ALLOWED * 1024; + } + + mem = mem_size; + while (mem) { + if ((mem & 1) == 0) + zero_bits++; + bits++; + mem >>= 1; + } + + if ((bits - zero_bits) != 1 || (bits - 9) <= 0) { + printf("ERROR: Invalid DRAM size (%lld) requested, refer to L2C_BIG_CTL[maxdram] for valid options.\n", + mem_size); + return; + } + + /* + * The BIG/HOLE is logic is not supported in pass1 as per + * Errata L2C-17736 + */ + if (mode == 0 && OCTEON_IS_MODEL(OCTEON_CN78XX_PASS1_X)) + mode = 1; + + big_ctl.u64 = 0; + big_ctl.s.maxdram = bits - 9; + big_ctl.cn61xx.disable = mode; + l2c_wr(priv, CVMX_L2C_BIG_CTL, big_ctl.u64); + } +} + +static u32 octeon3_refclock(u32 alt_refclk, u32 ddr_hertz, + struct dimm_config *dimm_config) +{ + u32 ddr_ref_hertz = CONFIG_REF_HERTZ; + int ddr_type; + int spd_dimm_type; + + debug("%s(%u, %u, %p)\n", __func__, alt_refclk, ddr_hertz, dimm_config); + + /* Octeon 3 case... */ + + /* we know whether alternate refclk is always wanted + * we also know already if we want 2133 MT/s + * if alt refclk not always wanted, then probe DDR and + * DIMM type if DDR4 and RDIMMs, then set desired refclk + * to 100MHz, otherwise to default (50MHz) + * depend on ddr_initialize() to do the refclk selection + * and validation/ + */ + if (alt_refclk) { + /* + * If alternate refclk was specified, let it override + * everything + */ + ddr_ref_hertz = alt_refclk * 1000000; + printf("%s: DRAM init: %d MHz refclk is REQUESTED ALWAYS\n", + __func__, alt_refclk); + } else if (ddr_hertz > 1000000000) { + ddr_type = get_ddr_type(dimm_config, 0); + spd_dimm_type = get_dimm_module_type(dimm_config, 0, ddr_type); + + debug("ddr type: 0x%x, dimm type: 0x%x\n", ddr_type, + spd_dimm_type); + /* Is DDR4 and RDIMM just to be sure. */ + if (ddr_type == DDR4_DRAM && + (spd_dimm_type == 1 || spd_dimm_type == 5 || + spd_dimm_type == 8)) { + /* Yes, we require 100MHz refclk, so set it. */ + ddr_ref_hertz = 100000000; + puts("DRAM init: 100 MHz refclk is REQUIRED\n"); + } + } + + debug("%s: speed: %u\n", __func__, ddr_ref_hertz); + return ddr_ref_hertz; +} + +int encode_row_lsb_ddr3(int row_lsb) +{ + int row_lsb_start = 14; + + /* Decoding for row_lsb */ + /* 000: row_lsb = mem_adr[14] */ + /* 001: row_lsb = mem_adr[15] */ + /* 010: row_lsb = mem_adr[16] */ + /* 011: row_lsb = mem_adr[17] */ + /* 100: row_lsb = mem_adr[18] */ + /* 101: row_lsb = mem_adr[19] */ + /* 110: row_lsb = mem_adr[20] */ + /* 111: RESERVED */ + + if (octeon_is_cpuid(OCTEON_CN6XXX) || + octeon_is_cpuid(OCTEON_CNF7XXX) || octeon_is_cpuid(OCTEON_CN7XXX)) + row_lsb_start = 14; + else + printf("ERROR: Unsupported Octeon model: 0x%x\n", + read_c0_prid()); + + return row_lsb - row_lsb_start; +} + +int encode_pbank_lsb_ddr3(int pbank_lsb) +{ + /* Decoding for pbank_lsb */ + /* 0000:DIMM = mem_adr[28] / rank = mem_adr[27] (if RANK_ENA) */ + /* 0001:DIMM = mem_adr[29] / rank = mem_adr[28] " */ + /* 0010:DIMM = mem_adr[30] / rank = mem_adr[29] " */ + /* 0011:DIMM = mem_adr[31] / rank = mem_adr[30] " */ + /* 0100:DIMM = mem_adr[32] / rank = mem_adr[31] " */ + /* 0101:DIMM = mem_adr[33] / rank = mem_adr[32] " */ + /* 0110:DIMM = mem_adr[34] / rank = mem_adr[33] " */ + /* 0111:DIMM = 0 / rank = mem_adr[34] " */ + /* 1000-1111: RESERVED */ + + int pbank_lsb_start = 0; + + if (octeon_is_cpuid(OCTEON_CN6XXX) || + octeon_is_cpuid(OCTEON_CNF7XXX) || octeon_is_cpuid(OCTEON_CN7XXX)) + pbank_lsb_start = 28; + else + printf("ERROR: Unsupported Octeon model: 0x%x\n", + read_c0_prid()); + + return pbank_lsb - pbank_lsb_start; +} + +static void set_ddr_clock_initialized(struct ddr_priv *priv, int if_num, + bool inited_flag) +{ + priv->ddr_clock_initialized[if_num] = inited_flag; +} + +static int ddr_clock_initialized(struct ddr_priv *priv, int if_num) +{ + return priv->ddr_clock_initialized[if_num]; +} + +static void set_ddr_memory_preserved(struct ddr_priv *priv) +{ + priv->ddr_memory_preserved = true; +} + +bool ddr_memory_preserved(struct ddr_priv *priv) +{ + return priv->ddr_memory_preserved; +} + +static void cn78xx_lmc_dreset_init(struct ddr_priv *priv, int if_num) +{ + union cvmx_lmcx_dll_ctl2 dll_ctl2; + + /* + * The remainder of this section describes the sequence for LMCn. + * + * 1. If not done already, write LMC(0..3)_DLL_CTL2 to its reset value + * (except without changing the LMC(0..3)_DLL_CTL2[INTF_EN] value from + * that set in the prior Step 3), including + * LMC(0..3)_DLL_CTL2[DRESET] = 1. + * + * 2. Without changing any other LMC(0..3)_DLL_CTL2 fields, write + * LMC(0..3)_DLL_CTL2[DLL_BRINGUP] = 1. + */ + + dll_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL2(if_num)); + dll_ctl2.cn78xx.dll_bringup = 1; + lmc_wr(priv, CVMX_LMCX_DLL_CTL2(if_num), dll_ctl2.u64); + + /* + * 3. Read LMC(0..3)_DLL_CTL2 and wait for the result. + */ + + lmc_rd(priv, CVMX_LMCX_DLL_CTL2(if_num)); + + /* + * 4. Wait for a minimum of 10 LMC CK cycles. + */ + + udelay(1); + + /* + * 5. Without changing any other fields in LMC(0..3)_DLL_CTL2, write + * LMC(0..3)_DLL_CTL2[QUAD_DLL_ENA] = 1. + * LMC(0..3)_DLL_CTL2[QUAD_DLL_ENA] must not change after this point + * without restarting the LMCn DRESET initialization sequence. + */ + + dll_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL2(if_num)); + dll_ctl2.cn78xx.quad_dll_ena = 1; + lmc_wr(priv, CVMX_LMCX_DLL_CTL2(if_num), dll_ctl2.u64); + + /* + * 6. Read LMC(0..3)_DLL_CTL2 and wait for the result. + */ + + lmc_rd(priv, CVMX_LMCX_DLL_CTL2(if_num)); + + /* + * 7. Wait a minimum of 10 us. + */ + + udelay(10); + + /* + * 8. Without changing any other fields in LMC(0..3)_DLL_CTL2, write + * LMC(0..3)_DLL_CTL2[DLL_BRINGUP] = 0. + * LMC(0..3)_DLL_CTL2[DLL_BRINGUP] must not change after this point + * without restarting the LMCn DRESET initialization sequence. + */ + + dll_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL2(if_num)); + dll_ctl2.cn78xx.dll_bringup = 0; + lmc_wr(priv, CVMX_LMCX_DLL_CTL2(if_num), dll_ctl2.u64); + + /* + * 9. Read LMC(0..3)_DLL_CTL2 and wait for the result. + */ + + lmc_rd(priv, CVMX_LMCX_DLL_CTL2(if_num)); + + /* + * 10. Without changing any other fields in LMC(0..3)_DLL_CTL2, write + * LMC(0..3)_DLL_CTL2[DRESET] = 0. + * LMC(0..3)_DLL_CTL2[DRESET] must not change after this point without + * restarting the LMCn DRESET initialization sequence. + * + * After completing LMCn DRESET initialization, all LMC CSRs may be + * accessed. Prior to completing LMC DRESET initialization, only + * LMC(0..3)_DDR_PLL_CTL, LMC(0..3)_DLL_CTL2, LMC(0..3)_RESET_CTL, and + * LMC(0..3)_COMP_CTL2 LMC CSRs can be accessed. + */ + + dll_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL2(if_num)); + dll_ctl2.cn78xx.dreset = 0; + lmc_wr(priv, CVMX_LMCX_DLL_CTL2(if_num), dll_ctl2.u64); +} + +int initialize_ddr_clock(struct ddr_priv *priv, struct ddr_conf *ddr_conf, + u32 cpu_hertz, u32 ddr_hertz, u32 ddr_ref_hertz, + int if_num, u32 if_mask) +{ + char *s; + + if (ddr_clock_initialized(priv, if_num)) + return 0; + + if (!ddr_clock_initialized(priv, 0)) { /* Do this once */ + union cvmx_lmcx_reset_ctl reset_ctl; + int i; + + /* + * Check to see if memory is to be preserved and set global + * flag + */ + for (i = 3; i >= 0; --i) { + if ((if_mask & (1 << i)) == 0) + continue; + + reset_ctl.u64 = lmc_rd(priv, CVMX_LMCX_RESET_CTL(i)); + if (reset_ctl.s.ddr3psv == 1) { + debug("LMC%d Preserving memory\n", i); + set_ddr_memory_preserved(priv); + + /* Re-initialize flags */ + reset_ctl.s.ddr3pwarm = 0; + reset_ctl.s.ddr3psoft = 0; + reset_ctl.s.ddr3psv = 0; + lmc_wr(priv, CVMX_LMCX_RESET_CTL(i), + reset_ctl.u64); + } + } + } + + /* + * ToDo: Add support for these SoCs: + * + * if (octeon_is_cpuid(OCTEON_CN63XX) || + * octeon_is_cpuid(OCTEON_CN66XX) || + * octeon_is_cpuid(OCTEON_CN61XX) || octeon_is_cpuid(OCTEON_CNF71XX)) + * + * and + * + * if (octeon_is_cpuid(OCTEON_CN68XX)) + * + * and + * + * if (octeon_is_cpuid(OCTEON_CN70XX)) + * + */ + + if (octeon_is_cpuid(OCTEON_CN78XX) || octeon_is_cpuid(OCTEON_CN73XX) || + octeon_is_cpuid(OCTEON_CNF75XX)) { + union cvmx_lmcx_dll_ctl2 dll_ctl2; + union cvmx_lmcx_dll_ctl3 ddr_dll_ctl3; + union cvmx_lmcx_ddr_pll_ctl ddr_pll_ctl; + struct dimm_config *dimm_config_table = + ddr_conf->dimm_config_table; + int en_idx, save_en_idx, best_en_idx = 0; + u64 clkf, clkr, max_clkf = 127; + u64 best_clkf = 0, best_clkr = 0; + u64 best_pll_MHz = 0; + u64 pll_MHz; + u64 min_pll_MHz = 800; + u64 max_pll_MHz = 5000; + u64 error; + u64 best_error; + u64 best_calculated_ddr_hertz = 0; + u64 calculated_ddr_hertz = 0; + u64 orig_ddr_hertz = ddr_hertz; + const int _en[] = { 1, 2, 3, 4, 5, 6, 7, 8, 10, 12 }; + int override_pll_settings; + int new_bwadj; + int ddr_type; + int i; + + /* ddr_type only indicates DDR4 or DDR3 */ + ddr_type = (read_spd(&dimm_config_table[0], 0, + DDR4_SPD_KEY_BYTE_DEVICE_TYPE) == + 0x0C) ? DDR4_DRAM : DDR3_DRAM; + + /* + * 5.9 LMC Initialization Sequence + * + * There are 13 parts to the LMC initialization procedure: + * + * 1. DDR PLL initialization + * + * 2. LMC CK initialization + * + * 3. LMC interface enable initialization + * + * 4. LMC DRESET initialization + * + * 5. LMC CK local initialization + * + * 6. LMC RESET initialization + * + * 7. Early LMC initialization + * + * 8. LMC offset training + * + * 9. LMC internal Vref training + * + * 10. LMC deskew training + * + * 11. LMC write leveling + * + * 12. LMC read leveling + * + * 13. Final LMC initialization + * + * CN78XX supports two modes: + * + * - two-LMC mode: both LMCs 2/3 must not be enabled + * (LMC2/3_DLL_CTL2[DRESET] must be set to 1 and + * LMC2/3_DLL_CTL2[INTF_EN] + * must be set to 0) and both LMCs 0/1 must be enabled). + * + * - four-LMC mode: all four LMCs 0..3 must be enabled. + * + * Steps 4 and 6..13 should each be performed for each + * enabled LMC (either twice or four times). Steps 1..3 and + * 5 are more global in nature and each must be executed + * exactly once (not once per LMC) each time the DDR PLL + * changes or is first brought up. Steps 1..3 and 5 need + * not be performed if the DDR PLL is stable. + * + * Generally, the steps are performed in order. The exception + * is that the CK local initialization (step 5) must be + * performed after some DRESET initializations (step 4) and + * before other DRESET initializations when the DDR PLL is + * brought up or changed. (The CK local initialization uses + * information from some LMCs to bring up the other local + * CKs.) The following text describes these ordering + * requirements in more detail. + * + * Following any chip reset, the DDR PLL must be brought up, + * and all 13 steps should be executed. Subsequently, it is + * possible to execute only steps 4 and 6..13, or to execute + * only steps 8..13. + * + * The remainder of this section covers these initialization + * steps in sequence. + */ + + /* Do the following init only once */ + if (if_num != 0) + goto not_if0; + + /* Only for interface #0 ... */ + + /* + * 5.9.3 LMC Interface-Enable Initialization + * + * LMC interface-enable initialization (Step 3) must be# + * performed after Step 2 for each chip reset and whenever + * the DDR clock speed changes. This step needs to be + * performed only once, not once per LMC. Perform the + * following three substeps for the LMC interface-enable + * initialization: + * + * 1. Without changing any other LMC2_DLL_CTL2 fields + * (LMC(0..3)_DLL_CTL2 should be at their reset values after + * Step 1), write LMC2_DLL_CTL2[INTF_EN] = 1 if four-LMC + * mode is desired. + * + * 2. Without changing any other LMC3_DLL_CTL2 fields, write + * LMC3_DLL_CTL2[INTF_EN] = 1 if four-LMC mode is desired. + * + * 3. Read LMC2_DLL_CTL2 and wait for the result. + * + * The LMC2_DLL_CTL2[INTF_EN] and LMC3_DLL_CTL2[INTF_EN] + * values should not be changed by software from this point. + */ + + for (i = 0; i < 4; ++i) { + if ((if_mask & (1 << i)) == 0) + continue; + + dll_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL2(i)); + + dll_ctl2.cn78xx.byp_setting = 0; + dll_ctl2.cn78xx.byp_sel = 0; + dll_ctl2.cn78xx.quad_dll_ena = 0; + dll_ctl2.cn78xx.dreset = 1; + dll_ctl2.cn78xx.dll_bringup = 0; + dll_ctl2.cn78xx.intf_en = 0; + + lmc_wr(priv, CVMX_LMCX_DLL_CTL2(i), dll_ctl2.u64); + } + + /* + * ###### Interface enable (intf_en) deferred until after + * DDR_DIV_RESET=0 ####### + */ + + /* + * 5.9.1 DDR PLL Initialization + * + * DDR PLL initialization (Step 1) must be performed for each + * chip reset and whenever the DDR clock speed changes. This + * step needs to be performed only once, not once per LMC. + * + * Perform the following eight substeps to initialize the + * DDR PLL: + * + * 1. If not done already, write all fields in + * LMC(0..3)_DDR_PLL_CTL and + * LMC(0..1)_DLL_CTL2 to their reset values, including: + * + * .. LMC0_DDR_PLL_CTL[DDR_DIV_RESET] = 1 + * .. LMC0_DLL_CTL2[DRESET] = 1 + * + * This substep is not necessary after a chip reset. + * + */ + + ddr_pll_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(0)); + + ddr_pll_ctl.cn78xx.reset_n = 0; + ddr_pll_ctl.cn78xx.ddr_div_reset = 1; + ddr_pll_ctl.cn78xx.phy_dcok = 0; + + /* + * 73XX pass 1.3 has LMC0 DCLK_INVERT tied to 1; earlier + * 73xx passes are tied to 0 + * + * 75XX needs LMC0 DCLK_INVERT set to 1 to minimize duty + * cycle falling points + * + * and we default all other chips LMC0 to DCLK_INVERT=0 + */ + ddr_pll_ctl.cn78xx.dclk_invert = + !!(octeon_is_cpuid(OCTEON_CN73XX_PASS1_3) || + octeon_is_cpuid(OCTEON_CNF75XX)); + + /* + * allow override of LMC0 desired setting for DCLK_INVERT, + * but not on 73XX; + * we cannot change LMC0 DCLK_INVERT on 73XX any pass + */ + if (!(octeon_is_cpuid(OCTEON_CN73XX))) { + s = lookup_env(priv, "ddr0_set_dclk_invert"); + if (s) { + ddr_pll_ctl.cn78xx.dclk_invert = + !!simple_strtoul(s, NULL, 0); + debug("LMC0: override DDR_PLL_CTL[dclk_invert] to %d\n", + ddr_pll_ctl.cn78xx.dclk_invert); + } + } + + lmc_wr(priv, CVMX_LMCX_DDR_PLL_CTL(0), ddr_pll_ctl.u64); + debug("%-45s : 0x%016llx\n", "LMC0: DDR_PLL_CTL", + ddr_pll_ctl.u64); + + // only when LMC1 is active + if (if_mask & 0x2) { + /* + * For CNF75XX, both LMC0 and LMC1 use the same PLL, + * so we use the LMC0 setting of DCLK_INVERT for LMC1. + */ + if (!octeon_is_cpuid(OCTEON_CNF75XX)) { + int override = 0; + + /* + * by default, for non-CNF75XX, we want + * LMC1 toggled LMC0 + */ + int lmc0_dclk_invert = + ddr_pll_ctl.cn78xx.dclk_invert; + + /* + * FIXME: work-around for DDR3 UDIMM problems + * is to use LMC0 setting on LMC1 and if + * 73xx pass 1.3, we want to default LMC1 + * DCLK_INVERT to LMC0, not the invert of LMC0 + */ + int lmc1_dclk_invert; + + lmc1_dclk_invert = + ((ddr_type == DDR4_DRAM) && + !octeon_is_cpuid(OCTEON_CN73XX_PASS1_3)) + ? lmc0_dclk_invert ^ 1 : + lmc0_dclk_invert; + + /* + * allow override of LMC1 desired setting for + * DCLK_INVERT + */ + s = lookup_env(priv, "ddr1_set_dclk_invert"); + if (s) { + lmc1_dclk_invert = + !!simple_strtoul(s, NULL, 0); + override = 1; + } + debug("LMC1: %s DDR_PLL_CTL[dclk_invert] to %d (LMC0 %d)\n", + (override) ? "override" : + "default", lmc1_dclk_invert, + lmc0_dclk_invert); + + ddr_pll_ctl.cn78xx.dclk_invert = + lmc1_dclk_invert; + } + + // but always write LMC1 CSR if it is active + lmc_wr(priv, CVMX_LMCX_DDR_PLL_CTL(1), ddr_pll_ctl.u64); + debug("%-45s : 0x%016llx\n", + "LMC1: DDR_PLL_CTL", ddr_pll_ctl.u64); + } + + /* + * 2. If the current DRAM contents are not preserved (see + * LMC(0..3)_RESET_ CTL[DDR3PSV]), this is also an appropriate + * time to assert the RESET# pin of the DDR3/DDR4 DRAM parts. + * If desired, write + * LMC0_RESET_ CTL[DDR3RST] = 0 without modifying any other + * LMC0_RESET_CTL fields to assert the DDR_RESET_L pin. + * No action is required here to assert DDR_RESET_L + * following a chip reset. Refer to Section 5.9.6. Do this + * for all enabled LMCs. + */ + + for (i = 0; (!ddr_memory_preserved(priv)) && i < 4; ++i) { + union cvmx_lmcx_reset_ctl reset_ctl; + + if ((if_mask & (1 << i)) == 0) + continue; + + reset_ctl.u64 = lmc_rd(priv, CVMX_LMCX_RESET_CTL(i)); + reset_ctl.cn78xx.ddr3rst = 0; /* Reset asserted */ + debug("LMC%d Asserting DDR_RESET_L\n", i); + lmc_wr(priv, CVMX_LMCX_RESET_CTL(i), reset_ctl.u64); + lmc_rd(priv, CVMX_LMCX_RESET_CTL(i)); + } + + /* + * 3. Without changing any other LMC0_DDR_PLL_CTL values, + * write LMC0_DDR_PLL_CTL[CLKF] with a value that gives a + * desired DDR PLL speed. The LMC0_DDR_PLL_CTL[CLKF] value + * should be selected in conjunction with the post-scalar + * divider values for LMC (LMC0_DDR_PLL_CTL[DDR_PS_EN]) so + * that the desired LMC CK speeds are is produced (all + * enabled LMCs must run the same speed). Section 5.14 + * describes LMC0_DDR_PLL_CTL[CLKF] and + * LMC0_DDR_PLL_CTL[DDR_PS_EN] programmings that produce + * the desired LMC CK speed. Section 5.9.2 describes LMC CK + * initialization, which can be done separately from the DDR + * PLL initialization described in this section. + * + * The LMC0_DDR_PLL_CTL[CLKF] value must not change after + * this point without restarting this SDRAM PLL + * initialization sequence. + */ + + /* Init to max error */ + error = ddr_hertz; + best_error = ddr_hertz; + + debug("DDR Reference Hertz = %d\n", ddr_ref_hertz); + + while (best_error == ddr_hertz) { + for (clkr = 0; clkr < 4; ++clkr) { + for (en_idx = + sizeof(_en) / sizeof(int) - + 1; en_idx >= 0; --en_idx) { + save_en_idx = en_idx; + clkf = + ((ddr_hertz) * + (clkr + 1) * (_en[save_en_idx])); + clkf = divide_nint(clkf, ddr_ref_hertz) + - 1; + pll_MHz = + ddr_ref_hertz * + (clkf + 1) / (clkr + 1) / 1000000; + calculated_ddr_hertz = + ddr_ref_hertz * + (clkf + + 1) / ((clkr + + 1) * (_en[save_en_idx])); + error = + ddr_hertz - calculated_ddr_hertz; + + if (pll_MHz < min_pll_MHz || + pll_MHz > max_pll_MHz) + continue; + if (clkf > max_clkf) { + /* + * PLL requires clkf to be + * limited + */ + continue; + } + if (abs(error) > abs(best_error)) + continue; + + debug("clkr: %2llu, en[%d]: %2d, clkf: %4llu, pll_MHz: %4llu, ddr_hertz: %8llu, error: %8lld\n", + clkr, save_en_idx, + _en[save_en_idx], clkf, pll_MHz, + calculated_ddr_hertz, error); + + /* Favor the highest PLL frequency. */ + if (abs(error) < abs(best_error) || + pll_MHz > best_pll_MHz) { + best_pll_MHz = pll_MHz; + best_calculated_ddr_hertz = + calculated_ddr_hertz; + best_error = error; + best_clkr = clkr; + best_clkf = clkf; + best_en_idx = save_en_idx; + } + } + } + + override_pll_settings = 0; + + s = lookup_env(priv, "ddr_pll_clkr"); + if (s) { + best_clkr = simple_strtoul(s, NULL, 0); + override_pll_settings = 1; + } + + s = lookup_env(priv, "ddr_pll_clkf"); + if (s) { + best_clkf = simple_strtoul(s, NULL, 0); + override_pll_settings = 1; + } + + s = lookup_env(priv, "ddr_pll_en_idx"); + if (s) { + best_en_idx = simple_strtoul(s, NULL, 0); + override_pll_settings = 1; + } + + if (override_pll_settings) { + best_pll_MHz = + ddr_ref_hertz * (best_clkf + + 1) / + (best_clkr + 1) / 1000000; + best_calculated_ddr_hertz = + ddr_ref_hertz * (best_clkf + + 1) / + ((best_clkr + 1) * (_en[best_en_idx])); + best_error = + ddr_hertz - best_calculated_ddr_hertz; + } + + debug("clkr: %2llu, en[%d]: %2d, clkf: %4llu, pll_MHz: %4llu, ddr_hertz: %8llu, error: %8lld <==\n", + best_clkr, best_en_idx, _en[best_en_idx], + best_clkf, best_pll_MHz, + best_calculated_ddr_hertz, best_error); + + /* + * Try lowering the frequency if we can't get a + * working configuration + */ + if (best_error == ddr_hertz) { + if (ddr_hertz < orig_ddr_hertz - 10000000) + break; + ddr_hertz -= 1000000; + best_error = ddr_hertz; + } + } + + if (best_error == ddr_hertz) { + printf("ERROR: Can not compute a legal DDR clock speed configuration.\n"); + return -1; + } + + new_bwadj = (best_clkf + 1) / 10; + debug("bwadj: %2d\n", new_bwadj); + + s = lookup_env(priv, "ddr_pll_bwadj"); + if (s) { + new_bwadj = strtoul(s, NULL, 0); + debug("bwadj: %2d\n", new_bwadj); + } + + for (i = 0; i < 2; ++i) { + if ((if_mask & (1 << i)) == 0) + continue; + + ddr_pll_ctl.u64 = + lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(i)); + debug("LMC%d: DDR_PLL_CTL : 0x%016llx\n", + i, ddr_pll_ctl.u64); + + ddr_pll_ctl.cn78xx.ddr_ps_en = best_en_idx; + ddr_pll_ctl.cn78xx.clkf = best_clkf; + ddr_pll_ctl.cn78xx.clkr = best_clkr; + ddr_pll_ctl.cn78xx.reset_n = 0; + ddr_pll_ctl.cn78xx.bwadj = new_bwadj; + + lmc_wr(priv, CVMX_LMCX_DDR_PLL_CTL(i), ddr_pll_ctl.u64); + debug("LMC%d: DDR_PLL_CTL : 0x%016llx\n", + i, ddr_pll_ctl.u64); + + /* + * For cnf75xx LMC0 and LMC1 use the same PLL so + * only program LMC0 PLL. + */ + if (octeon_is_cpuid(OCTEON_CNF75XX)) + break; + } + + for (i = 0; i < 4; ++i) { + if ((if_mask & (1 << i)) == 0) + continue; + + /* + * 4. Read LMC0_DDR_PLL_CTL and wait for the result. + */ + + lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(i)); + + /* + * 5. Wait a minimum of 3 us. + */ + + udelay(3); /* Wait 3 us */ + + /* + * 6. Write LMC0_DDR_PLL_CTL[RESET_N] = 1 without + * changing any other LMC0_DDR_PLL_CTL values. + */ + + ddr_pll_ctl.u64 = + lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(i)); + ddr_pll_ctl.cn78xx.reset_n = 1; + lmc_wr(priv, CVMX_LMCX_DDR_PLL_CTL(i), ddr_pll_ctl.u64); + + /* + * 7. Read LMC0_DDR_PLL_CTL and wait for the result. + */ + + lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(i)); + + /* + * 8. Wait a minimum of 25 us. + */ + + udelay(25); /* Wait 25 us */ + + /* + * For cnf75xx LMC0 and LMC1 use the same PLL so + * only program LMC0 PLL. + */ + if (octeon_is_cpuid(OCTEON_CNF75XX)) + break; + } + + for (i = 0; i < 4; ++i) { + if ((if_mask & (1 << i)) == 0) + continue; + + /* + * 5.9.2 LMC CK Initialization + * + * DDR PLL initialization must be completed prior to + * starting LMC CK initialization. + * + * Perform the following substeps to initialize the + * LMC CK: + * + * 1. Without changing any other LMC(0..3)_DDR_PLL_CTL + * values, write + * LMC(0..3)_DDR_PLL_CTL[DDR_DIV_RESET] = 1 and + * LMC(0..3)_DDR_PLL_CTL[DDR_PS_EN] with the + * appropriate value to get the desired LMC CK speed. + * Section 5.14 discusses CLKF and DDR_PS_EN + * programmings. The LMC(0..3)_DDR_PLL_CTL[DDR_PS_EN] + * must not change after this point without restarting + * this LMC CK initialization sequence. + */ + + ddr_pll_ctl.u64 = lmc_rd(priv, + CVMX_LMCX_DDR_PLL_CTL(i)); + ddr_pll_ctl.cn78xx.ddr_div_reset = 1; + lmc_wr(priv, CVMX_LMCX_DDR_PLL_CTL(i), ddr_pll_ctl.u64); + + /* + * 2. Without changing any other fields in + * LMC(0..3)_DDR_PLL_CTL, write + * LMC(0..3)_DDR_PLL_CTL[DDR4_MODE] = 0. + */ + + ddr_pll_ctl.u64 = + lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(i)); + ddr_pll_ctl.cn78xx.ddr4_mode = + (ddr_type == DDR4_DRAM) ? 1 : 0; + lmc_wr(priv, CVMX_LMCX_DDR_PLL_CTL(i), ddr_pll_ctl.u64); + + /* + * 3. Read LMC(0..3)_DDR_PLL_CTL and wait for the + * result. + */ + + lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(i)); + + /* + * 4. Wait a minimum of 1 us. + */ + + udelay(1); /* Wait 1 us */ + + /* + * ###### Steps 5 through 7 deferred until after + * DDR_DIV_RESET=0 ####### + */ + + /* + * 8. Without changing any other LMC(0..3)_COMP_CTL2 + * values, write + * LMC(0..3)_COMP_CTL2[CK_CTL,CONTROL_CTL,CMD_CTL] + * to the desired DDR*_CK_*_P control and command + * signals drive strength. + */ + + union cvmx_lmcx_comp_ctl2 comp_ctl2; + const struct ddr3_custom_config *custom_lmc_config = + &ddr_conf->custom_lmc_config; + + comp_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(i)); + + /* Default 4=34.3 ohm */ + comp_ctl2.cn78xx.dqx_ctl = + (custom_lmc_config->dqx_ctl == + 0) ? 4 : custom_lmc_config->dqx_ctl; + /* Default 4=34.3 ohm */ + comp_ctl2.cn78xx.ck_ctl = + (custom_lmc_config->ck_ctl == + 0) ? 4 : custom_lmc_config->ck_ctl; + /* Default 4=34.3 ohm */ + comp_ctl2.cn78xx.cmd_ctl = + (custom_lmc_config->cmd_ctl == + 0) ? 4 : custom_lmc_config->cmd_ctl; + + comp_ctl2.cn78xx.rodt_ctl = 0x4; /* 60 ohm */ + + comp_ctl2.cn70xx.ptune_offset = + (abs(custom_lmc_config->ptune_offset) & 0x7) + | (_sign(custom_lmc_config->ptune_offset) << 3); + comp_ctl2.cn70xx.ntune_offset = + (abs(custom_lmc_config->ntune_offset) & 0x7) + | (_sign(custom_lmc_config->ntune_offset) << 3); + + s = lookup_env(priv, "ddr_clk_ctl"); + if (s) { + comp_ctl2.cn78xx.ck_ctl = + simple_strtoul(s, NULL, 0); + } + + s = lookup_env(priv, "ddr_ck_ctl"); + if (s) { + comp_ctl2.cn78xx.ck_ctl = + simple_strtoul(s, NULL, 0); + } + + s = lookup_env(priv, "ddr_cmd_ctl"); + if (s) { + comp_ctl2.cn78xx.cmd_ctl = + simple_strtoul(s, NULL, 0); + } + + s = lookup_env(priv, "ddr_dqx_ctl"); + if (s) { + comp_ctl2.cn78xx.dqx_ctl = + simple_strtoul(s, NULL, 0); + } + + s = lookup_env(priv, "ddr_ptune_offset"); + if (s) { + comp_ctl2.cn78xx.ptune_offset = + simple_strtoul(s, NULL, 0); + } + + s = lookup_env(priv, "ddr_ntune_offset"); + if (s) { + comp_ctl2.cn78xx.ntune_offset = + simple_strtoul(s, NULL, 0); + } + + lmc_wr(priv, CVMX_LMCX_COMP_CTL2(i), comp_ctl2.u64); + + /* + * 9. Read LMC(0..3)_DDR_PLL_CTL and wait for the + * result. + */ + + lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(i)); + + /* + * 10. Wait a minimum of 200 ns. + */ + + udelay(1); /* Wait 1 us */ + + /* + * 11. Without changing any other + * LMC(0..3)_DDR_PLL_CTL values, write + * LMC(0..3)_DDR_PLL_CTL[DDR_DIV_RESET] = 0. + */ + + ddr_pll_ctl.u64 = lmc_rd(priv, + CVMX_LMCX_DDR_PLL_CTL(i)); + ddr_pll_ctl.cn78xx.ddr_div_reset = 0; + lmc_wr(priv, CVMX_LMCX_DDR_PLL_CTL(i), ddr_pll_ctl.u64); + + /* + * 12. Read LMC(0..3)_DDR_PLL_CTL and wait for the + * result. + */ + + lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(i)); + + /* + * 13. Wait a minimum of 200 ns. + */ + + udelay(1); /* Wait 1 us */ + } + + /* + * Relocated Interface Enable (intf_en) Step + */ + for (i = (octeon_is_cpuid(OCTEON_CN73XX) || + octeon_is_cpuid(OCTEON_CNF75XX)) ? 1 : 2; + i < 4; ++i) { + /* + * This step is only necessary for LMC 2 and 3 in + * 4-LMC mode. The mask will cause the unpopulated + * interfaces to be skipped. + */ + if ((if_mask & (1 << i)) == 0) + continue; + + dll_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL2(i)); + dll_ctl2.cn78xx.intf_en = 1; + lmc_wr(priv, CVMX_LMCX_DLL_CTL2(i), dll_ctl2.u64); + lmc_rd(priv, CVMX_LMCX_DLL_CTL2(i)); + } + + /* + * Relocated PHY_DCOK Step + */ + for (i = 0; i < 4; ++i) { + if ((if_mask & (1 << i)) == 0) + continue; + /* + * 5. Without changing any other fields in + * LMC(0..3)_DDR_PLL_CTL, write + * LMC(0..3)_DDR_PLL_CTL[PHY_DCOK] = 1. + */ + + ddr_pll_ctl.u64 = lmc_rd(priv, + CVMX_LMCX_DDR_PLL_CTL(i)); + ddr_pll_ctl.cn78xx.phy_dcok = 1; + lmc_wr(priv, CVMX_LMCX_DDR_PLL_CTL(i), ddr_pll_ctl.u64); + /* + * 6. Read LMC(0..3)_DDR_PLL_CTL and wait for + * the result. + */ + + lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(i)); + + /* + * 7. Wait a minimum of 20 us. + */ + + udelay(20); /* Wait 20 us */ + } + + /* + * 5.9.4 LMC DRESET Initialization + * + * All of the DDR PLL, LMC global CK, and LMC interface + * enable initializations must be completed prior to starting + * this LMC DRESET initialization (Step 4). + * + * This LMC DRESET step is done for all enabled LMCs. + * + * There are special constraints on the ordering of DRESET + * initialization (Steps 4) and CK local initialization + * (Step 5) whenever CK local initialization must be executed. + * CK local initialization must be executed whenever the DDR + * PLL is being brought up (for each chip reset* and whenever + * the DDR clock speed changes). + * + * When Step 5 must be executed in the two-LMC mode case: + * - LMC0 DRESET initialization must occur before Step 5. + * - LMC1 DRESET initialization must occur after Step 5. + * + * When Step 5 must be executed in the four-LMC mode case: + * - LMC2 and LMC3 DRESET initialization must occur before + * Step 5. + * - LMC0 and LMC1 DRESET initialization must occur after + * Step 5. + */ + + if (octeon_is_cpuid(OCTEON_CN73XX)) { + /* ONE-LMC or TWO-LMC MODE BEFORE STEP 5 for cn73xx */ + cn78xx_lmc_dreset_init(priv, 0); + } else if (octeon_is_cpuid(OCTEON_CNF75XX)) { + if (if_mask == 0x3) { + /* + * 2-LMC Mode: LMC1 DRESET must occur + * before Step 5 + */ + cn78xx_lmc_dreset_init(priv, 1); + } + } else { + /* TWO-LMC MODE DRESET BEFORE STEP 5 */ + if (if_mask == 0x3) + cn78xx_lmc_dreset_init(priv, 0); + + /* FOUR-LMC MODE BEFORE STEP 5 */ + if (if_mask == 0xf) { + cn78xx_lmc_dreset_init(priv, 2); + cn78xx_lmc_dreset_init(priv, 3); + } + } + + /* + * 5.9.5 LMC CK Local Initialization + * + * All of DDR PLL, LMC global CK, and LMC interface-enable + * initializations must be completed prior to starting this + * LMC CK local initialization (Step 5). + * + * LMC CK Local initialization must be performed for each + * chip reset and whenever the DDR clock speed changes. This + * step needs to be performed only once, not once per LMC. + * + * There are special constraints on the ordering of DRESET + * initialization (Steps 4) and CK local initialization + * (Step 5) whenever CK local initialization must be executed. + * CK local initialization must be executed whenever the + * DDR PLL is being brought up (for each chip reset and + * whenever the DDR clock speed changes). + * + * When Step 5 must be executed in the two-LMC mode case: + * - LMC0 DRESET initialization must occur before Step 5. + * - LMC1 DRESET initialization must occur after Step 5. + * + * When Step 5 must be executed in the four-LMC mode case: + * - LMC2 and LMC3 DRESET initialization must occur before + * Step 5. + * - LMC0 and LMC1 DRESET initialization must occur after + * Step 5. + * + * LMC CK local initialization is different depending on + * whether two-LMC or four-LMC modes are desired. + */ + + if (if_mask == 0x3) { + int temp_lmc_if_num = octeon_is_cpuid(OCTEON_CNF75XX) ? + 1 : 0; + + /* + * 5.9.5.1 LMC CK Local Initialization for Two-LMC + * Mode + * + * 1. Write LMC0_DLL_CTL3 to its reset value. (Note + * that LMC0_DLL_CTL3[DLL_90_BYTE_SEL] = 0x2 .. 0x8 + * should also work.) + */ + + ddr_dll_ctl3.u64 = 0; + ddr_dll_ctl3.cn78xx.dclk90_recal_dis = 1; + + if (octeon_is_cpuid(OCTEON_CNF75XX)) + ddr_dll_ctl3.cn78xx.dll90_byte_sel = 7; + else + ddr_dll_ctl3.cn78xx.dll90_byte_sel = 1; + + lmc_wr(priv, + CVMX_LMCX_DLL_CTL3(temp_lmc_if_num), + ddr_dll_ctl3.u64); + + /* + * 2. Read LMC0_DLL_CTL3 and wait for the result. + */ + + lmc_rd(priv, CVMX_LMCX_DLL_CTL3(temp_lmc_if_num)); + + /* + * 3. Without changing any other fields in + * LMC0_DLL_CTL3, write + * LMC0_DLL_CTL3[DCLK90_FWD] = 1. Writing + * LMC0_DLL_CTL3[DCLK90_FWD] = 1 + * causes clock-delay information to be forwarded + * from LMC0 to LMC1. + */ + + ddr_dll_ctl3.cn78xx.dclk90_fwd = 1; + lmc_wr(priv, + CVMX_LMCX_DLL_CTL3(temp_lmc_if_num), + ddr_dll_ctl3.u64); + + /* + * 4. Read LMC0_DLL_CTL3 and wait for the result. + */ + + lmc_rd(priv, CVMX_LMCX_DLL_CTL3(temp_lmc_if_num)); + } + + if (if_mask == 0xf) { + /* + * 5.9.5.2 LMC CK Local Initialization for Four-LMC + * Mode + * + * 1. Write LMC2_DLL_CTL3 to its reset value except + * LMC2_DLL_CTL3[DLL90_BYTE_SEL] = 0x7. + */ + + ddr_dll_ctl3.u64 = 0; + ddr_dll_ctl3.cn78xx.dclk90_recal_dis = 1; + ddr_dll_ctl3.cn78xx.dll90_byte_sel = 7; + lmc_wr(priv, CVMX_LMCX_DLL_CTL3(2), ddr_dll_ctl3.u64); + + /* + * 2. Write LMC3_DLL_CTL3 to its reset value except + * LMC3_DLL_CTL3[DLL90_BYTE_SEL] = 0x2. + */ + + ddr_dll_ctl3.u64 = 0; + ddr_dll_ctl3.cn78xx.dclk90_recal_dis = 1; + ddr_dll_ctl3.cn78xx.dll90_byte_sel = 2; + lmc_wr(priv, CVMX_LMCX_DLL_CTL3(3), ddr_dll_ctl3.u64); + + /* + * 3. Read LMC3_DLL_CTL3 and wait for the result. + */ + + lmc_rd(priv, CVMX_LMCX_DLL_CTL3(3)); + + /* + * 4. Without changing any other fields in + * LMC2_DLL_CTL3, write LMC2_DLL_CTL3[DCLK90_FWD] = 1 + * and LMC2_DLL_CTL3[DCLK90_RECAL_ DIS] = 1. + * Writing LMC2_DLL_CTL3[DCLK90_FWD] = 1 causes LMC 2 + * to forward clockdelay information to LMC0. Setting + * LMC2_DLL_CTL3[DCLK90_RECAL_DIS] to 1 prevents LMC2 + * from periodically recalibrating this delay + * information. + */ + + ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(2)); + ddr_dll_ctl3.cn78xx.dclk90_fwd = 1; + ddr_dll_ctl3.cn78xx.dclk90_recal_dis = 1; + lmc_wr(priv, CVMX_LMCX_DLL_CTL3(2), ddr_dll_ctl3.u64); + + /* + * 5. Without changing any other fields in + * LMC3_DLL_CTL3, write LMC3_DLL_CTL3[DCLK90_FWD] = 1 + * and LMC3_DLL_CTL3[DCLK90_RECAL_ DIS] = 1. + * Writing LMC3_DLL_CTL3[DCLK90_FWD] = 1 causes LMC3 + * to forward clockdelay information to LMC1. Setting + * LMC3_DLL_CTL3[DCLK90_RECAL_DIS] to 1 prevents LMC3 + * from periodically recalibrating this delay + * information. + */ + + ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(3)); + ddr_dll_ctl3.cn78xx.dclk90_fwd = 1; + ddr_dll_ctl3.cn78xx.dclk90_recal_dis = 1; + lmc_wr(priv, CVMX_LMCX_DLL_CTL3(3), ddr_dll_ctl3.u64); + + /* + * 6. Read LMC3_DLL_CTL3 and wait for the result. + */ + + lmc_rd(priv, CVMX_LMCX_DLL_CTL3(3)); + } + + if (octeon_is_cpuid(OCTEON_CNF75XX)) { + /* + * cnf75xx 2-LMC Mode: LMC0 DRESET must occur after + * Step 5, Do LMC0 for 1-LMC Mode here too + */ + cn78xx_lmc_dreset_init(priv, 0); + } + + /* TWO-LMC MODE AFTER STEP 5 */ + if (if_mask == 0x3) { + if (octeon_is_cpuid(OCTEON_CNF75XX)) { + /* + * cnf75xx 2-LMC Mode: LMC0 DRESET must + * occur after Step 5 + */ + cn78xx_lmc_dreset_init(priv, 0); + } else { + cn78xx_lmc_dreset_init(priv, 1); + } + } + + /* FOUR-LMC MODE AFTER STEP 5 */ + if (if_mask == 0xf) { + cn78xx_lmc_dreset_init(priv, 0); + cn78xx_lmc_dreset_init(priv, 1); + + /* + * Enable periodic recalibration of DDR90 delay + * line in. + */ + ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(0)); + ddr_dll_ctl3.cn78xx.dclk90_recal_dis = 0; + lmc_wr(priv, CVMX_LMCX_DLL_CTL3(0), ddr_dll_ctl3.u64); + ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(1)); + ddr_dll_ctl3.cn78xx.dclk90_recal_dis = 0; + lmc_wr(priv, CVMX_LMCX_DLL_CTL3(1), ddr_dll_ctl3.u64); + } + + /* Enable fine tune mode for all LMCs */ + for (i = 0; i < 4; ++i) { + if ((if_mask & (1 << i)) == 0) + continue; + ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(i)); + ddr_dll_ctl3.cn78xx.fine_tune_mode = 1; + lmc_wr(priv, CVMX_LMCX_DLL_CTL3(i), ddr_dll_ctl3.u64); + } + + /* + * Enable the trim circuit on the appropriate channels to + * adjust the DDR clock duty cycle for chips that support + * it + */ + if (octeon_is_cpuid(OCTEON_CN78XX_PASS2_X) || + octeon_is_cpuid(OCTEON_CN73XX) || + octeon_is_cpuid(OCTEON_CNF75XX)) { + union cvmx_lmcx_phy_ctl lmc_phy_ctl; + int i; + + for (i = 0; i < 4; ++i) { + if ((if_mask & (1 << i)) == 0) + continue; + + lmc_phy_ctl.u64 = + lmc_rd(priv, CVMX_LMCX_PHY_CTL(i)); + + if (octeon_is_cpuid(OCTEON_CNF75XX) || + octeon_is_cpuid(OCTEON_CN73XX_PASS1_3)) { + /* Both LMCs */ + lmc_phy_ctl.s.lv_mode = 0; + } else { + /* Odd LMCs = 0, Even LMCs = 1 */ + lmc_phy_ctl.s.lv_mode = (~i) & 1; + } + + debug("LMC%d: PHY_CTL : 0x%016llx\n", + i, lmc_phy_ctl.u64); + lmc_wr(priv, CVMX_LMCX_PHY_CTL(i), + lmc_phy_ctl.u64); + } + } + } + + /* + * 5.9.6 LMC RESET Initialization + * + * NOTE: this is now done as the first step in + * init_octeon3_ddr3_interface, rather than the last step in clock + * init. This reorg allows restarting per-LMC initialization should + * problems be encountered, rather than being forced to resort to + * resetting the chip and starting all over. + * + * Look for the code in octeon3_lmc.c: perform_lmc_reset(). + */ + + /* Fallthrough for all interfaces... */ +not_if0: + + /* + * Start the DDR clock so that its frequency can be measured. + * For some chips we must activate the memory controller with + * init_start to make the DDR clock start to run. + */ + if ((!octeon_is_cpuid(OCTEON_CN6XXX)) && + (!octeon_is_cpuid(OCTEON_CNF7XXX)) && + (!octeon_is_cpuid(OCTEON_CN7XXX))) { + union cvmx_lmcx_mem_cfg0 mem_cfg0; + + mem_cfg0.u64 = 0; + mem_cfg0.s.init_start = 1; + lmc_wr(priv, CVMX_LMCX_MEM_CFG0(if_num), mem_cfg0.u64); + lmc_rd(priv, CVMX_LMCX_MEM_CFG0(if_num)); + } + + set_ddr_clock_initialized(priv, if_num, 1); + + return 0; +} + +static void octeon_ipd_delay_cycles(u64 cycles) +{ + u64 start = csr_rd(CVMX_IPD_CLK_COUNT); + + while (start + cycles > csr_rd(CVMX_IPD_CLK_COUNT)) + ; +} + +static void octeon_ipd_delay_cycles_o3(u64 cycles) +{ + u64 start = csr_rd(CVMX_FPA_CLK_COUNT); + + while (start + cycles > csr_rd(CVMX_FPA_CLK_COUNT)) + ; +} + +static u32 measure_octeon_ddr_clock(struct ddr_priv *priv, + struct ddr_conf *ddr_conf, u32 cpu_hertz, + u32 ddr_hertz, u32 ddr_ref_hertz, + int if_num, u32 if_mask) +{ + u64 core_clocks; + u64 ddr_clocks; + u64 calc_ddr_hertz; + + if (ddr_conf) { + if (initialize_ddr_clock(priv, ddr_conf, cpu_hertz, + ddr_hertz, ddr_ref_hertz, if_num, + if_mask) != 0) + return 0; + } + + /* Dynamically determine the DDR clock speed */ + if (OCTEON_IS_OCTEON2() || octeon_is_cpuid(OCTEON_CN70XX)) { + core_clocks = csr_rd(CVMX_IPD_CLK_COUNT); + ddr_clocks = lmc_rd(priv, CVMX_LMCX_DCLK_CNT(if_num)); + /* How many cpu cycles to measure over */ + octeon_ipd_delay_cycles(100000000); + core_clocks = csr_rd(CVMX_IPD_CLK_COUNT) - core_clocks; + ddr_clocks = + lmc_rd(priv, CVMX_LMCX_DCLK_CNT(if_num)) - ddr_clocks; + calc_ddr_hertz = ddr_clocks * gd->bus_clk / core_clocks; + } else if (octeon_is_cpuid(OCTEON_CN7XXX)) { + core_clocks = csr_rd(CVMX_FPA_CLK_COUNT); + ddr_clocks = lmc_rd(priv, CVMX_LMCX_DCLK_CNT(if_num)); + /* How many cpu cycles to measure over */ + octeon_ipd_delay_cycles_o3(100000000); + core_clocks = csr_rd(CVMX_FPA_CLK_COUNT) - core_clocks; + ddr_clocks = + lmc_rd(priv, CVMX_LMCX_DCLK_CNT(if_num)) - ddr_clocks; + calc_ddr_hertz = ddr_clocks * gd->bus_clk / core_clocks; + } else { + core_clocks = csr_rd(CVMX_IPD_CLK_COUNT); + /* + * ignore overflow, starts counting when we enable the + * controller + */ + ddr_clocks = lmc_rd(priv, CVMX_LMCX_DCLK_CNT_LO(if_num)); + /* How many cpu cycles to measure over */ + octeon_ipd_delay_cycles(100000000); + core_clocks = csr_rd(CVMX_IPD_CLK_COUNT) - core_clocks; + ddr_clocks = + lmc_rd(priv, CVMX_LMCX_DCLK_CNT_LO(if_num)) - ddr_clocks; + calc_ddr_hertz = ddr_clocks * cpu_hertz / core_clocks; + } + + debug("core clocks: %llu, ddr clocks: %llu, calc rate: %llu\n", + core_clocks, ddr_clocks, calc_ddr_hertz); + debug("LMC%d: Measured DDR clock: %lld, cpu clock: %u, ddr clocks: %llu\n", + if_num, calc_ddr_hertz, cpu_hertz, ddr_clocks); + + /* Check for unreasonable settings. */ + if (calc_ddr_hertz < 10000) { + udelay(8000000 * 100); + printf("DDR clock misconfigured on interface %d. Resetting...\n", + if_num); + do_reset(NULL, 0, 0, NULL); + } + + return calc_ddr_hertz; +} + +u64 lmc_ddr3_rl_dbg_read(struct ddr_priv *priv, int if_num, int idx) +{ + union cvmx_lmcx_rlevel_dbg rlevel_dbg; + union cvmx_lmcx_rlevel_ctl rlevel_ctl; + + rlevel_ctl.u64 = lmc_rd(priv, CVMX_LMCX_RLEVEL_CTL(if_num)); + rlevel_ctl.s.byte = idx; + + lmc_wr(priv, CVMX_LMCX_RLEVEL_CTL(if_num), rlevel_ctl.u64); + lmc_rd(priv, CVMX_LMCX_RLEVEL_CTL(if_num)); + + rlevel_dbg.u64 = lmc_rd(priv, CVMX_LMCX_RLEVEL_DBG(if_num)); + return rlevel_dbg.s.bitmask; +} + +u64 lmc_ddr3_wl_dbg_read(struct ddr_priv *priv, int if_num, int idx) +{ + union cvmx_lmcx_wlevel_dbg wlevel_dbg; + + wlevel_dbg.u64 = 0; + wlevel_dbg.s.byte = idx; + + lmc_wr(priv, CVMX_LMCX_WLEVEL_DBG(if_num), wlevel_dbg.u64); + lmc_rd(priv, CVMX_LMCX_WLEVEL_DBG(if_num)); + + wlevel_dbg.u64 = lmc_rd(priv, CVMX_LMCX_WLEVEL_DBG(if_num)); + return wlevel_dbg.s.bitmask; +} + +int validate_ddr3_rlevel_bitmask(struct rlevel_bitmask *rlevel_bitmask_p, + int ddr_type) +{ + int i; + int errors = 0; + u64 mask = 0; /* Used in 64-bit comparisons */ + u8 mstart = 0; + u8 width = 0; + u8 firstbit = 0; + u8 lastbit = 0; + u8 bubble = 0; + u8 tbubble = 0; + u8 blank = 0; + u8 narrow = 0; + u8 trailing = 0; + u64 bitmask = rlevel_bitmask_p->bm; + u8 extras = 0; + u8 toolong = 0; + u64 temp; + + if (bitmask == 0) { + blank += RLEVEL_BITMASK_BLANK_ERROR; + } else { + /* Look for fb, the first bit */ + temp = bitmask; + while (!(temp & 1)) { + firstbit++; + temp >>= 1; + } + + /* Look for lb, the last bit */ + lastbit = firstbit; + while ((temp >>= 1)) + lastbit++; + + /* + * Start with the max range to try to find the largest mask + * within the bitmask data + */ + width = MASKRANGE_BITS; + for (mask = MASKRANGE; mask > 0; mask >>= 1, --width) { + for (mstart = lastbit - width + 1; mstart >= firstbit; + --mstart) { + temp = mask << mstart; + if ((bitmask & temp) == temp) + goto done_now; + } + } +done_now: + /* look for any more contiguous 1's to the right of mstart */ + if (width == MASKRANGE_BITS) { // only when maximum mask + while ((bitmask >> (mstart - 1)) & 1) { + // slide right over more 1's + --mstart; + // count the number of extra bits only for DDR4 + if (ddr_type == DDR4_DRAM) + extras++; + } + } + + /* Penalize any extra 1's beyond the maximum desired mask */ + if (extras > 0) + toolong = + RLEVEL_BITMASK_TOOLONG_ERROR * ((1 << extras) - 1); + + /* Detect if bitmask is too narrow. */ + if (width < 4) + narrow = (4 - width) * RLEVEL_BITMASK_NARROW_ERROR; + + /* + * detect leading bubble bits, that is, any 0's between first + * and mstart + */ + temp = bitmask >> (firstbit + 1); + i = mstart - firstbit - 1; + while (--i >= 0) { + if ((temp & 1) == 0) + bubble += RLEVEL_BITMASK_BUBBLE_BITS_ERROR; + temp >>= 1; + } + + temp = bitmask >> (mstart + width + extras); + i = lastbit - (mstart + width + extras - 1); + while (--i >= 0) { + if (temp & 1) { + /* + * Detect 1 bits after the trailing end of + * the mask, including last. + */ + trailing += RLEVEL_BITMASK_TRAILING_BITS_ERROR; + } else { + /* + * Detect trailing bubble bits, that is, + * any 0's between end-of-mask and last + */ + tbubble += RLEVEL_BITMASK_BUBBLE_BITS_ERROR; + } + temp >>= 1; + } + } + + errors = bubble + tbubble + blank + narrow + trailing + toolong; + + /* Pass out useful statistics */ + rlevel_bitmask_p->mstart = mstart; + rlevel_bitmask_p->width = width; + + debug_bitmask_print("bm:%08lx mask:%02lx, width:%2u, mstart:%2d, fb:%2u, lb:%2u (bu:%2d, tb:%2d, bl:%2d, n:%2d, t:%2d, x:%2d) errors:%3d %s\n", + (unsigned long)bitmask, mask, width, mstart, + firstbit, lastbit, bubble, tbubble, blank, + narrow, trailing, toolong, errors, + (errors) ? "=> invalid" : ""); + + return errors; +} + +int compute_ddr3_rlevel_delay(u8 mstart, u8 width, + union cvmx_lmcx_rlevel_ctl rlevel_ctl) +{ + int delay; + + debug_bitmask_print(" offset_en:%d", rlevel_ctl.s.offset_en); + + if (rlevel_ctl.s.offset_en) { + delay = max((int)mstart, + (int)(mstart + width - 1 - rlevel_ctl.s.offset)); + } else { + /* if (rlevel_ctl.s.offset) { *//* Experimental */ + if (0) { + delay = max(mstart + rlevel_ctl.s.offset, mstart + 1); + /* + * Insure that the offset delay falls within the + * bitmask + */ + delay = min(delay, mstart + width - 1); + } else { + /* Round down */ + delay = (width - 1) / 2 + mstart; + } + } + + return delay; +} + +/* Default ODT config must disable ODT */ +/* Must be const (read only) so that the structure is in flash */ +const struct dimm_odt_config disable_odt_config[] = { + /* 1 */ { 0, 0x0000, {.u64 = 0x0000}, {.u64 = 0x0000}, 0, 0x0000, 0 }, + /* 2 */ { 0, 0x0000, {.u64 = 0x0000}, {.u64 = 0x0000}, 0, 0x0000, 0 }, + /* 3 */ { 0, 0x0000, {.u64 = 0x0000}, {.u64 = 0x0000}, 0, 0x0000, 0 }, + /* 4 */ { 0, 0x0000, {.u64 = 0x0000}, {.u64 = 0x0000}, 0, 0x0000, 0 }, +}; + +/* Memory controller setup function */ +static int init_octeon_dram_interface(struct ddr_priv *priv, + struct ddr_conf *ddr_conf, + u32 ddr_hertz, u32 cpu_hertz, + u32 ddr_ref_hertz, int if_num, + u32 if_mask) +{ + u32 mem_size_mbytes = 0; + char *s; + + s = lookup_env(priv, "ddr_timing_hertz"); + if (s) + ddr_hertz = simple_strtoul(s, NULL, 0); + + if (OCTEON_IS_OCTEON3()) { + int lmc_restart_retries = 0; +#define DEFAULT_RESTART_RETRIES 3 + int lmc_restart_retries_limit = DEFAULT_RESTART_RETRIES; + + s = lookup_env(priv, "ddr_restart_retries_limit"); + if (s) + lmc_restart_retries_limit = simple_strtoul(s, NULL, 0); + +restart_lmc_init: + mem_size_mbytes = init_octeon3_ddr3_interface(priv, ddr_conf, + ddr_hertz, + cpu_hertz, + ddr_ref_hertz, + if_num, if_mask); + if (mem_size_mbytes == 0) { // 0 means restart is possible + if (lmc_restart_retries < lmc_restart_retries_limit) { + lmc_restart_retries++; + printf("N0.LMC%d Configuration problem: attempting LMC reset and init restart %d\n", + if_num, lmc_restart_retries); + goto restart_lmc_init; + } else { + if (lmc_restart_retries_limit > 0) { + printf("INFO: N0.LMC%d Configuration: fatal problem remains after %d LMC init retries - Resetting node...\n", + if_num, lmc_restart_retries); + mdelay(500); + do_reset(NULL, 0, 0, NULL); + } else { + // return an error, no restart + mem_size_mbytes = -1; + } + } + } + } + + debug("N0.LMC%d Configuration Completed: %d MB\n", + if_num, mem_size_mbytes); + + return mem_size_mbytes; +} + +#define WLEVEL_BYTE_BITS 5 +#define WLEVEL_BYTE_MSK ((1ULL << 5) - 1) + +void upd_wl_rank(union cvmx_lmcx_wlevel_rankx *lmc_wlevel_rank, + int byte, int delay) +{ + union cvmx_lmcx_wlevel_rankx temp_wlevel_rank; + + if (byte >= 0 && byte <= 8) { + temp_wlevel_rank.u64 = lmc_wlevel_rank->u64; + temp_wlevel_rank.u64 &= + ~(WLEVEL_BYTE_MSK << (WLEVEL_BYTE_BITS * byte)); + temp_wlevel_rank.u64 |= + ((delay & WLEVEL_BYTE_MSK) << (WLEVEL_BYTE_BITS * byte)); + lmc_wlevel_rank->u64 = temp_wlevel_rank.u64; + } +} + +int get_wl_rank(union cvmx_lmcx_wlevel_rankx *lmc_wlevel_rank, int byte) +{ + int delay = 0; + + if (byte >= 0 && byte <= 8) + delay = + ((lmc_wlevel_rank->u64) >> (WLEVEL_BYTE_BITS * + byte)) & WLEVEL_BYTE_MSK; + + return delay; +} + +void upd_rl_rank(union cvmx_lmcx_rlevel_rankx *lmc_rlevel_rank, + int byte, int delay) +{ + union cvmx_lmcx_rlevel_rankx temp_rlevel_rank; + + if (byte >= 0 && byte <= 8) { + temp_rlevel_rank.u64 = + lmc_rlevel_rank->u64 & ~(RLEVEL_BYTE_MSK << + (RLEVEL_BYTE_BITS * byte)); + temp_rlevel_rank.u64 |= + ((delay & RLEVEL_BYTE_MSK) << (RLEVEL_BYTE_BITS * byte)); + lmc_rlevel_rank->u64 = temp_rlevel_rank.u64; + } +} + +int get_rl_rank(union cvmx_lmcx_rlevel_rankx *lmc_rlevel_rank, int byte) +{ + int delay = 0; + + if (byte >= 0 && byte <= 8) + delay = + ((lmc_rlevel_rank->u64) >> (RLEVEL_BYTE_BITS * + byte)) & RLEVEL_BYTE_MSK; + + return delay; +} + +void rlevel_to_wlevel(union cvmx_lmcx_rlevel_rankx *lmc_rlevel_rank, + union cvmx_lmcx_wlevel_rankx *lmc_wlevel_rank, int byte) +{ + int byte_delay = get_rl_rank(lmc_rlevel_rank, byte); + + debug("Estimating Wlevel delay byte %d: ", byte); + debug("Rlevel=%d => ", byte_delay); + byte_delay = divide_roundup(byte_delay, 2) & 0x1e; + debug("Wlevel=%d\n", byte_delay); + upd_wl_rank(lmc_wlevel_rank, byte, byte_delay); +} + +/* Delay trend: constant=0, decreasing=-1, increasing=1 */ +static s64 calc_delay_trend(s64 v) +{ + if (v == 0) + return 0; + if (v < 0) + return -1; + + return 1; +} + +/* + * Evaluate delay sequence across the whole range of byte delays while + * keeping track of the overall delay trend, increasing or decreasing. + * If the trend changes charge an error amount to the score. + */ + +// NOTE: "max_adj_delay_inc" argument is, by default, 1 for DDR3 and 2 for DDR4 + +int nonseq_del(struct rlevel_byte_data *rlevel_byte, int start, int end, + int max_adj_delay_inc) +{ + s64 error = 0; + s64 delay_trend, prev_trend = 0; + int byte_idx; + s64 seq_err; + s64 adj_err; + s64 delay_inc; + s64 delay_diff; + + for (byte_idx = start; byte_idx < end; ++byte_idx) { + delay_diff = rlevel_byte[byte_idx + 1].delay - + rlevel_byte[byte_idx].delay; + delay_trend = calc_delay_trend(delay_diff); + + /* + * Increment error each time the trend changes to the + * opposite direction. + */ + if (prev_trend != 0 && delay_trend != 0 && + prev_trend != delay_trend) { + seq_err = RLEVEL_NONSEQUENTIAL_DELAY_ERROR; + } else { + seq_err = 0; + } + + // how big was the delay change, if any + delay_inc = abs(delay_diff); + + /* + * Even if the trend did not change to the opposite direction, + * check for the magnitude of the change, and scale the + * penalty by the amount that the size is larger than the + * provided limit. + */ + if (max_adj_delay_inc != 0 && delay_inc > max_adj_delay_inc) { + adj_err = (delay_inc - max_adj_delay_inc) * + RLEVEL_ADJACENT_DELAY_ERROR; + } else { + adj_err = 0; + } + + rlevel_byte[byte_idx + 1].sqerrs = seq_err + adj_err; + error += seq_err + adj_err; + + debug_bitmask_print("Byte %d: %d, Byte %d: %d, delay_trend: %ld, prev_trend: %ld, [%ld/%ld]%s%s\n", + byte_idx + 0, + rlevel_byte[byte_idx + 0].delay, + byte_idx + 1, + rlevel_byte[byte_idx + 1].delay, + delay_trend, + prev_trend, seq_err, adj_err, + (seq_err) ? + " => Nonsequential byte delay" : "", + (adj_err) ? + " => Adjacent delay error" : ""); + + if (delay_trend != 0) + prev_trend = delay_trend; + } + + return (int)error; +} + +int roundup_ddr3_wlevel_bitmask(int bitmask) +{ + int shifted_bitmask; + int leader; + int delay; + + for (leader = 0; leader < 8; ++leader) { + shifted_bitmask = (bitmask >> leader); + if ((shifted_bitmask & 1) == 0) + break; + } + + for (leader = leader; leader < 16; ++leader) { + shifted_bitmask = (bitmask >> (leader % 8)); + if (shifted_bitmask & 1) + break; + } + + delay = (leader & 1) ? leader + 1 : leader; + delay = delay % 8; + + return delay; +} + +/* Octeon 2 */ +static void oct2_ddr3_seq(struct ddr_priv *priv, int rank_mask, int if_num, + int sequence) +{ + char *s; + +#ifdef DEBUG_PERFORM_DDR3_SEQUENCE + static const char * const sequence_str[] = { + "power-up/init", + "read-leveling", + "self-refresh entry", + "self-refresh exit", + "precharge power-down entry", + "precharge power-down exit", + "write-leveling", + "illegal" + }; +#endif + + union cvmx_lmcx_control lmc_control; + union cvmx_lmcx_config lmc_config; + int save_ddr2t; + + lmc_control.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num)); + save_ddr2t = lmc_control.s.ddr2t; + + if (save_ddr2t == 0 && octeon_is_cpuid(OCTEON_CN63XX_PASS1_X)) { + /* Some register parts (IDT and TI included) do not like + * the sequence that LMC generates for an MRS register + * write in 1T mode. In this case, the register part does + * not properly forward the MRS register write to the DRAM + * parts. See errata (LMC-14548) Issues with registered + * DIMMs. + */ + debug("Forcing DDR 2T during init seq. Re: Pass 1 LMC-14548\n"); + lmc_control.s.ddr2t = 1; + } + + s = lookup_env(priv, "ddr_init_2t"); + if (s) + lmc_control.s.ddr2t = simple_strtoul(s, NULL, 0); + + lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), lmc_control.u64); + + lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num)); + + lmc_config.s.init_start = 1; + if (OCTEON_IS_OCTEON2()) + lmc_config.cn63xx.sequence = sequence; + lmc_config.s.rankmask = rank_mask; + +#ifdef DEBUG_PERFORM_DDR3_SEQUENCE + debug("Performing LMC sequence: rank_mask=0x%02x, sequence=%d, %s\n", + rank_mask, sequence, sequence_str[sequence]); +#endif + + lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), lmc_config.u64); + lmc_rd(priv, CVMX_LMCX_CONFIG(if_num)); + udelay(600); /* Wait a while */ + + lmc_control.s.ddr2t = save_ddr2t; + lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), lmc_control.u64); + lmc_rd(priv, CVMX_LMCX_CONTROL(if_num)); +} + +/* Check to see if any custom offset values are used */ +static int is_dll_offset_provided(const int8_t *dll_offset_table) +{ + int i; + + if (!dll_offset_table) /* Check for pointer to table. */ + return 0; + + for (i = 0; i < 9; ++i) { + if (dll_offset_table[i] != 0) + return 1; + } + + return 0; +} + +void change_dll_offset_enable(struct ddr_priv *priv, int if_num, int change) +{ + union cvmx_lmcx_dll_ctl3 ddr_dll_ctl3; + + ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num)); + SET_DDR_DLL_CTL3(offset_ena, !!change); + lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64); + ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num)); +} + +unsigned short load_dll_offset(struct ddr_priv *priv, int if_num, + int dll_offset_mode, int byte_offset, int byte) +{ + union cvmx_lmcx_dll_ctl3 ddr_dll_ctl3; + int field_width = 6; + /* + * byte_sel: + * 0x1 = byte 0, ..., 0x9 = byte 8 + * 0xA = all bytes + */ + int byte_sel = (byte == 10) ? byte : byte + 1; + + if (octeon_is_cpuid(OCTEON_CN6XXX)) + field_width = 5; + + ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num)); + SET_DDR_DLL_CTL3(load_offset, 0); + lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64); + ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num)); + + SET_DDR_DLL_CTL3(mode_sel, dll_offset_mode); + SET_DDR_DLL_CTL3(offset, + (abs(byte_offset) & (~(-1 << field_width))) | + (_sign(byte_offset) << field_width)); + SET_DDR_DLL_CTL3(byte_sel, byte_sel); + lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64); + ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num)); + + SET_DDR_DLL_CTL3(load_offset, 1); + lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64); + ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num)); + + return (unsigned short)GET_DDR_DLL_CTL3(offset); +} + +void process_custom_dll_offsets(struct ddr_priv *priv, int if_num, + const char *enable_str, + const int8_t *offsets, const char *byte_str, + int mode) +{ + const char *s; + int enabled; + int provided; + int byte_offset; + unsigned short offset[9] = { 0 }; + int byte; + + s = lookup_env(priv, enable_str); + if (s) + enabled = !!simple_strtol(s, NULL, 0); + else + enabled = -1; + + /* + * enabled == -1: no override, do only configured offsets if provided + * enabled == 0: override OFF, do NOT do it even if configured + * offsets provided + * enabled == 1: override ON, do it for overrides plus configured + * offsets + */ + + if (enabled == 0) + return; + + provided = is_dll_offset_provided(offsets); + + if (enabled < 0 && !provided) + return; + + change_dll_offset_enable(priv, if_num, 0); + + for (byte = 0; byte < 9; ++byte) { + // always take the provided, if available + byte_offset = (provided) ? offsets[byte] : 0; + + // then, if enabled, use any overrides present + if (enabled > 0) { + s = lookup_env(priv, byte_str, if_num, byte); + if (s) + byte_offset = simple_strtol(s, NULL, 0); + } + + offset[byte] = + load_dll_offset(priv, if_num, mode, byte_offset, byte); + } + + change_dll_offset_enable(priv, if_num, 1); + + debug("N0.LMC%d: DLL %s Offset 8:0 : 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x\n", + if_num, (mode == 2) ? "Read " : "Write", + offset[8], offset[7], offset[6], offset[5], offset[4], + offset[3], offset[2], offset[1], offset[0]); +} + +void ddr_init_seq(struct ddr_priv *priv, int rank_mask, int if_num) +{ + char *s; + int ddr_init_loops = 1; + int rankx; + + s = lookup_env(priv, "ddr%d_init_loops", if_num); + if (s) + ddr_init_loops = simple_strtoul(s, NULL, 0); + + while (ddr_init_loops--) { + for (rankx = 0; rankx < 8; rankx++) { + if (!(rank_mask & (1 << rankx))) + continue; + + if (OCTEON_IS_OCTEON3()) { + /* power-up/init */ + oct3_ddr3_seq(priv, 1 << rankx, if_num, 0); + } else { + /* power-up/init */ + oct2_ddr3_seq(priv, 1 << rankx, if_num, 0); + } + + udelay(1000); /* Wait a while. */ + + s = lookup_env(priv, "ddr_sequence1"); + if (s) { + int sequence1; + + sequence1 = simple_strtoul(s, NULL, 0); + + if (OCTEON_IS_OCTEON3()) { + oct3_ddr3_seq(priv, 1 << rankx, + if_num, sequence1); + } else { + oct2_ddr3_seq(priv, 1 << rankx, + if_num, sequence1); + } + } + + s = lookup_env(priv, "ddr_sequence2"); + if (s) { + int sequence2; + + sequence2 = simple_strtoul(s, NULL, 0); + + if (OCTEON_IS_OCTEON3()) + oct3_ddr3_seq(priv, 1 << rankx, + if_num, sequence2); + else + oct2_ddr3_seq(priv, 1 << rankx, + if_num, sequence2); + } + } + } +} + +static int octeon_ddr_initialize(struct ddr_priv *priv, u32 cpu_hertz, + u32 ddr_hertz, u32 ddr_ref_hertz, + u32 if_mask, + struct ddr_conf *ddr_conf, + u32 *measured_ddr_hertz) +{ + u32 ddr_conf_valid_mask = 0; + int memsize_mbytes = 0; + char *eptr; + int if_idx; + u32 ddr_max_speed = 667000000; + u32 calc_ddr_hertz = -1; + int val; + int ret; + + if (env_get("ddr_verbose") || env_get("ddr_prompt")) + priv->flags |= FLAG_DDR_VERBOSE; + +#ifdef DDR_VERBOSE + priv->flags |= FLAG_DDR_VERBOSE; +#endif + + if (env_get("ddr_trace_init")) { + printf("Parameter ddr_trace_init found in environment.\n"); + priv->flags |= FLAG_DDR_TRACE_INIT; + priv->flags |= FLAG_DDR_VERBOSE; + } + + priv->flags |= FLAG_DDR_DEBUG; + + val = env_get_ulong("ddr_debug", 10, (u32)-1); + switch (val) { + case 0: + priv->flags &= ~FLAG_DDR_DEBUG; + printf("Parameter ddr_debug clear in environment\n"); + break; + case (u32)-1: + break; + default: + printf("Parameter ddr_debug set in environment\n"); + priv->flags |= FLAG_DDR_DEBUG; + priv->flags |= FLAG_DDR_VERBOSE; + break; + } + if (env_get("ddr_prompt")) + priv->flags |= FLAG_DDR_PROMPT; + + /* Force ddr_verbose for failsafe debugger */ + if (priv->flags & FLAG_FAILSAFE_MODE) + priv->flags |= FLAG_DDR_VERBOSE; + +#ifdef DDR_DEBUG + priv->flags |= FLAG_DDR_DEBUG; + /* Keep verbose on while we are still debugging. */ + priv->flags |= FLAG_DDR_VERBOSE; +#endif + + if ((octeon_is_cpuid(OCTEON_CN61XX) || + octeon_is_cpuid(OCTEON_CNF71XX)) && ddr_max_speed > 533333333) { + ddr_max_speed = 533333333; + } else if (octeon_is_cpuid(OCTEON_CN7XXX)) { + /* Override speed restrictions to support internal testing. */ + ddr_max_speed = 1210000000; + } + + if (ddr_hertz > ddr_max_speed) { + printf("DDR clock speed %u exceeds maximum supported DDR speed, reducing to %uHz\n", + ddr_hertz, ddr_max_speed); + ddr_hertz = ddr_max_speed; + } + + if (OCTEON_IS_OCTEON3()) { // restrict check + if (ddr_hertz > cpu_hertz) { + printf("\nFATAL ERROR: DDR speed %u exceeds CPU speed %u, exiting...\n\n", + ddr_hertz, cpu_hertz); + return -1; + } + } + + /* Enable L2 ECC */ + eptr = env_get("disable_l2_ecc"); + if (eptr) { + printf("Disabling L2 ECC based on disable_l2_ecc environment variable\n"); + union cvmx_l2c_ctl l2c_val; + + l2c_val.u64 = l2c_rd(priv, CVMX_L2C_CTL); + l2c_val.s.disecc = 1; + l2c_wr(priv, CVMX_L2C_CTL, l2c_val.u64); + } else { + union cvmx_l2c_ctl l2c_val; + + l2c_val.u64 = l2c_rd(priv, CVMX_L2C_CTL); + l2c_val.s.disecc = 0; + l2c_wr(priv, CVMX_L2C_CTL, l2c_val.u64); + } + + /* + * Init the L2C, must be done before DRAM access so that we + * know L2 is empty + */ + eptr = env_get("disable_l2_index_aliasing"); + if (eptr) { + union cvmx_l2c_ctl l2c_val; + + puts("L2 index aliasing disabled.\n"); + + l2c_val.u64 = l2c_rd(priv, CVMX_L2C_CTL); + l2c_val.s.disidxalias = 1; + l2c_wr(priv, CVMX_L2C_CTL, l2c_val.u64); + } else { + union cvmx_l2c_ctl l2c_val; + + /* Enable L2C index aliasing */ + + l2c_val.u64 = l2c_rd(priv, CVMX_L2C_CTL); + l2c_val.s.disidxalias = 0; + l2c_wr(priv, CVMX_L2C_CTL, l2c_val.u64); + } + + if (OCTEON_IS_OCTEON3()) { + /* + * rdf_cnt: Defines the sample point of the LMC response data in + * the DDR-clock/core-clock crossing. For optimal + * performance set to 10 * (DDR-clock period/core-clock + * period) - 1. To disable set to 0. All other values + * are reserved. + */ + + union cvmx_l2c_ctl l2c_ctl; + u64 rdf_cnt; + char *s; + + l2c_ctl.u64 = l2c_rd(priv, CVMX_L2C_CTL); + + /* + * It is more convenient to compute the ratio using clock + * frequencies rather than clock periods. + */ + rdf_cnt = (((u64)10 * cpu_hertz) / ddr_hertz) - 1; + rdf_cnt = rdf_cnt < 256 ? rdf_cnt : 255; + l2c_ctl.cn78xx.rdf_cnt = rdf_cnt; + + s = lookup_env(priv, "early_fill_count"); + if (s) + l2c_ctl.cn78xx.rdf_cnt = simple_strtoul(s, NULL, 0); + + debug("%-45s : %d, cpu_hertz:%d, ddr_hertz:%d\n", + "EARLY FILL COUNT ", l2c_ctl.cn78xx.rdf_cnt, cpu_hertz, + ddr_hertz); + l2c_wr(priv, CVMX_L2C_CTL, l2c_ctl.u64); + } + + /* Check for lower DIMM socket populated */ + for (if_idx = 0; if_idx < 4; ++if_idx) { + if ((if_mask & (1 << if_idx)) && + validate_dimm(priv, + &ddr_conf[(int)if_idx].dimm_config_table[0], + 0)) + ddr_conf_valid_mask |= (1 << if_idx); + } + + if (octeon_is_cpuid(OCTEON_CN68XX) || octeon_is_cpuid(OCTEON_CN78XX)) { + int four_lmc_mode = 1; + char *s; + + if (priv->flags & FLAG_FAILSAFE_MODE) + four_lmc_mode = 0; + + /* Pass 1.0 disable four LMC mode. + * See errata (LMC-15811) + */ + if (octeon_is_cpuid(OCTEON_CN68XX_PASS1_0)) + four_lmc_mode = 0; + + s = env_get("ddr_four_lmc"); + if (s) { + four_lmc_mode = simple_strtoul(s, NULL, 0); + printf("Parameter found in environment. ddr_four_lmc = %d\n", + four_lmc_mode); + } + + if (!four_lmc_mode) { + puts("Forcing two-LMC Mode.\n"); + /* Invalidate LMC[2:3] */ + ddr_conf_valid_mask &= ~(3 << 2); + } + } else if (octeon_is_cpuid(OCTEON_CN73XX)) { + int one_lmc_mode = 0; + char *s; + + s = env_get("ddr_one_lmc"); + if (s) { + one_lmc_mode = simple_strtoul(s, NULL, 0); + printf("Parameter found in environment. ddr_one_lmc = %d\n", + one_lmc_mode); + } + + if (one_lmc_mode) { + puts("Forcing one-LMC Mode.\n"); + /* Invalidate LMC[1:3] */ + ddr_conf_valid_mask &= ~(1 << 1); + } + } + + if (!ddr_conf_valid_mask) { + printf + ("ERROR: No valid DIMMs detected on any DDR interface.\n"); + hang(); + return -1; // testr-only: no ret negativ!!! + } + + /* + * We measure the DDR frequency by counting DDR clocks. We can + * confirm or adjust the expected frequency as necessary. We use + * the measured frequency to make accurate timing calculations + * used to configure the controller. + */ + for (if_idx = 0; if_idx < 4; ++if_idx) { + u32 tmp_hertz; + + if (!(ddr_conf_valid_mask & (1 << if_idx))) + continue; + +try_again: + /* + * only check for alternate refclk wanted on chips that + * support it + */ + if ((octeon_is_cpuid(OCTEON_CN73XX)) || + (octeon_is_cpuid(OCTEON_CNF75XX)) || + (octeon_is_cpuid(OCTEON_CN78XX_PASS2_X))) { + // only need do this if we are LMC0 + if (if_idx == 0) { + union cvmx_lmcx_ddr_pll_ctl ddr_pll_ctl; + + ddr_pll_ctl.u64 = + lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(0)); + + /* + * If we are asking for 100 MHz refclk, we can + * only get it via alternate, so switch to it + */ + if (ddr_ref_hertz == 100000000) { + ddr_pll_ctl.cn78xx.dclk_alt_refclk_sel = + 1; + lmc_wr(priv, CVMX_LMCX_DDR_PLL_CTL(0), + ddr_pll_ctl.u64); + udelay(1000); // wait 1 msec + } else { + /* + * If we are NOT asking for 100MHz, + * then reset to (assumed) 50MHz and go + * on + */ + ddr_pll_ctl.cn78xx.dclk_alt_refclk_sel = + 0; + lmc_wr(priv, CVMX_LMCX_DDR_PLL_CTL(0), + ddr_pll_ctl.u64); + udelay(1000); // wait 1 msec + } + } + } else { + if (ddr_ref_hertz == 100000000) { + debug("N0: DRAM init: requested 100 MHz refclk NOT SUPPORTED\n"); + ddr_ref_hertz = CONFIG_REF_HERTZ; + } + } + + tmp_hertz = measure_octeon_ddr_clock(priv, &ddr_conf[if_idx], + cpu_hertz, ddr_hertz, + ddr_ref_hertz, if_idx, + ddr_conf_valid_mask); + + /* + * only check for alternate refclk acquired on chips that + * support it + */ + if ((octeon_is_cpuid(OCTEON_CN73XX)) || + (octeon_is_cpuid(OCTEON_CNF75XX)) || + (octeon_is_cpuid(OCTEON_CN78XX_PASS2_X))) { + /* + * if we are LMC0 and we are asked for 100 MHz refclk, + * we must be sure it is available + * If not, we print an error message, set to 50MHz, + * and go on... + */ + if (if_idx == 0 && ddr_ref_hertz == 100000000) { + /* + * Validate that the clock returned is close + * enough to the clock desired + */ + // FIXME: is 5% close enough? + int hertz_diff = + abs((int)tmp_hertz - (int)ddr_hertz); + if (hertz_diff > ((int)ddr_hertz * 5 / 100)) { + // nope, diff is greater than than 5% + debug("N0: DRAM init: requested 100 MHz refclk NOT FOUND\n"); + ddr_ref_hertz = CONFIG_REF_HERTZ; + // clear the flag before trying again!! + set_ddr_clock_initialized(priv, 0, 0); + goto try_again; + } else { + debug("N0: DRAM Init: requested 100 MHz refclk FOUND and SELECTED\n"); + } + } + } + + if (tmp_hertz > 0) + calc_ddr_hertz = tmp_hertz; + debug("LMC%d: measured speed: %u hz\n", if_idx, tmp_hertz); + } + + if (measured_ddr_hertz) + *measured_ddr_hertz = calc_ddr_hertz; + + memsize_mbytes = 0; + for (if_idx = 0; if_idx < 4; ++if_idx) { + if (!(ddr_conf_valid_mask & (1 << if_idx))) + continue; + + ret = init_octeon_dram_interface(priv, &ddr_conf[if_idx], + calc_ddr_hertz, + cpu_hertz, ddr_ref_hertz, + if_idx, ddr_conf_valid_mask); + if (ret > 0) + memsize_mbytes += ret; + } + + if (memsize_mbytes == 0) + /* All interfaces failed to initialize, so return error */ + return -1; + + /* + * switch over to DBI mode only for chips that support it, and + * enabled by envvar + */ + if ((octeon_is_cpuid(OCTEON_CN73XX)) || + (octeon_is_cpuid(OCTEON_CNF75XX)) || + (octeon_is_cpuid(OCTEON_CN78XX_PASS2_X))) { + eptr = env_get("ddr_dbi_switchover"); + if (eptr) { + printf("DBI Switchover starting...\n"); + cvmx_dbi_switchover(priv); + printf("DBI Switchover finished.\n"); + } + } + + /* call HW-assist tuning here on chips that support it */ + if ((octeon_is_cpuid(OCTEON_CN73XX)) || + (octeon_is_cpuid(OCTEON_CNF75XX)) || + (octeon_is_cpuid(OCTEON_CN78XX_PASS2_X))) + cvmx_maybe_tune_node(priv, calc_ddr_hertz); + + eptr = env_get("limit_dram_mbytes"); + if (eptr) { + unsigned int mbytes = simple_strtoul(eptr, NULL, 10); + + if (mbytes > 0) { + memsize_mbytes = mbytes; + printf("Limiting DRAM size to %d MBytes based on limit_dram_mbytes env. variable\n", + mbytes); + } + } + + debug("LMC Initialization complete. Total DRAM %d MB\n", + memsize_mbytes); + + return memsize_mbytes; +} + +static int octeon_ddr_probe(struct udevice *dev) +{ + struct ddr_priv *priv = dev_get_priv(dev); + struct ofnode_phandle_args l2c_node; + struct ddr_conf *ddr_conf_ptr; + u32 ddr_conf_valid_mask = 0; + u32 measured_ddr_hertz = 0; + int conf_table_count; + int def_ddr_freq; + u32 mem_mbytes = 0; + u32 ddr_hertz; + u32 ddr_ref_hertz; + int alt_refclk; + const char *eptr; + fdt_addr_t addr; + u64 *ptr; + u64 val; + int ret; + int i; + + /* Don't try to re-init the DDR controller after relocation */ + if (gd->flags & GD_FLG_RELOC) + return 0; + + /* + * Dummy read all local variables into cache, so that they are + * locked in cache when the DDR code runs with flushes etc enabled + */ + ptr = (u64 *)_end; + for (i = 0; i < (0x100000 / sizeof(u64)); i++) + val = readq(ptr++); + + /* + * The base addresses of LMC and L2C are read from the DT. This + * makes it possible to use the DDR init code without the need + * of the "node" variable, describing on which node to access. The + * node number is already included implicitly in the base addresses + * read from the DT this way. + */ + + /* Get LMC base address */ + priv->lmc_base = dev_remap_addr(dev); + debug("%s: lmc_base=%p\n", __func__, priv->lmc_base); + + /* Get L2C base address */ + ret = dev_read_phandle_with_args(dev, "l2c-handle", NULL, 0, 0, + &l2c_node); + if (ret) { + printf("Can't access L2C node!\n"); + return -ENODEV; + } + + addr = ofnode_get_addr(l2c_node.node); + if (addr == FDT_ADDR_T_NONE) { + printf("Can't access L2C node!\n"); + return -ENODEV; + } + + priv->l2c_base = map_physmem(addr, 0, MAP_NOCACHE); + debug("%s: l2c_base=%p\n", __func__, priv->l2c_base); + + ddr_conf_ptr = octeon_ddr_conf_table_get(&conf_table_count, + &def_ddr_freq); + if (!ddr_conf_ptr) { + printf("ERROR: unable to determine DDR configuration\n"); + return -ENODEV; + } + + for (i = 0; i < conf_table_count; i++) { + if (ddr_conf_ptr[i].dimm_config_table[0].spd_addrs[0] || + ddr_conf_ptr[i].dimm_config_table[0].spd_ptrs[0]) + ddr_conf_valid_mask |= 1 << i; + } + + /* + * Check for special case of mismarked 3005 samples, + * and adjust cpuid + */ + alt_refclk = 0; + ddr_hertz = def_ddr_freq * 1000000; + + eptr = env_get("ddr_clock_hertz"); + if (eptr) { + ddr_hertz = simple_strtoul(eptr, NULL, 0); + gd->mem_clk = divide_nint(ddr_hertz, 1000000); + printf("Parameter found in environment. ddr_clock_hertz = %d\n", + ddr_hertz); + } + + ddr_ref_hertz = octeon3_refclock(alt_refclk, + ddr_hertz, + &ddr_conf_ptr[0].dimm_config_table[0]); + + debug("Initializing DDR, clock = %uhz, reference = %uhz\n", + ddr_hertz, ddr_ref_hertz); + + mem_mbytes = octeon_ddr_initialize(priv, gd->cpu_clk, + ddr_hertz, ddr_ref_hertz, + ddr_conf_valid_mask, + ddr_conf_ptr, &measured_ddr_hertz); + debug("Mem size in MBYTES: %u\n", mem_mbytes); + + gd->mem_clk = divide_nint(measured_ddr_hertz, 1000000); + + debug("Measured DDR clock %d Hz\n", measured_ddr_hertz); + + if (measured_ddr_hertz != 0) { + if (!gd->mem_clk) { + /* + * If ddr_clock not set, use measured clock + * and don't warn + */ + gd->mem_clk = divide_nint(measured_ddr_hertz, 1000000); + } else if ((measured_ddr_hertz > ddr_hertz + 3000000) || + (measured_ddr_hertz < ddr_hertz - 3000000)) { + printf("\nWARNING:\n"); + printf("WARNING: Measured DDR clock mismatch! expected: %lld MHz, measured: %lldMHz, cpu clock: %lu MHz\n", + divide_nint(ddr_hertz, 1000000), + divide_nint(measured_ddr_hertz, 1000000), + gd->cpu_clk); + printf("WARNING:\n\n"); + gd->mem_clk = divide_nint(measured_ddr_hertz, 1000000); + } + } + + if (!mem_mbytes) + return -ENODEV; + + priv->info.base = CONFIG_SYS_SDRAM_BASE; + priv->info.size = MB(mem_mbytes); + + /* + * For 6XXX generate a proper error when reading/writing + * non-existent memory locations. + */ + cvmx_l2c_set_big_size(priv, mem_mbytes, 0); + + debug("Ram size %uMiB\n", mem_mbytes); + + return 0; +} + +static int octeon_get_info(struct udevice *dev, struct ram_info *info) +{ + struct ddr_priv *priv = dev_get_priv(dev); + + *info = priv->info; + + return 0; +} + +static struct ram_ops octeon_ops = { + .get_info = octeon_get_info, +}; + +static const struct udevice_id octeon_ids[] = { + {.compatible = "cavium,octeon-7xxx-ddr4" }, + { } +}; + +U_BOOT_DRIVER(octeon_ddr) = { + .name = "octeon_ddr", + .id = UCLASS_RAM, + .of_match = octeon_ids, + .ops = &octeon_ops, + .probe = octeon_ddr_probe, + .platdata_auto_alloc_size = sizeof(struct ddr_priv), +}; diff --git a/drivers/usb/host/Kconfig b/drivers/usb/host/Kconfig index 4eb7b34e24..0971a7c813 100644 --- a/drivers/usb/host/Kconfig +++ b/drivers/usb/host/Kconfig @@ -46,6 +46,15 @@ config USB_XHCI_MVEBU SoCs, which includes Armada8K, Armada3700 and other Armada family SoCs. +config USB_XHCI_OCTEON + bool "Support for Marvell Octeon family on-chip xHCI USB controller" + depends on ARCH_OCTEON + default y + help + Enables support for the on-chip xHCI controller on Marvell Octeon + family SoCs. This is a driver for the dwc3 to provide the glue logic + to configure the controller. + config USB_XHCI_PCI bool "Support for PCI-based xHCI USB controller" depends on DM_USB diff --git a/drivers/usb/host/Makefile b/drivers/usb/host/Makefile index 29d4f87e38..a12e8f2702 100644 --- a/drivers/usb/host/Makefile +++ b/drivers/usb/host/Makefile @@ -56,6 +56,7 @@ obj-$(CONFIG_USB_XHCI_OMAP) += xhci-omap.o obj-$(CONFIG_USB_XHCI_PCI) += xhci-pci.o obj-$(CONFIG_USB_XHCI_RCAR) += xhci-rcar.o obj-$(CONFIG_USB_XHCI_STI) += dwc3-sti-glue.o +obj-$(CONFIG_USB_XHCI_OCTEON) += dwc3-octeon-glue.o # designware obj-$(CONFIG_USB_DWC2) += dwc2.o diff --git a/drivers/usb/host/dwc3-octeon-glue.c b/drivers/usb/host/dwc3-octeon-glue.c new file mode 100644 index 0000000000..39b3185616 --- /dev/null +++ b/drivers/usb/host/dwc3-octeon-glue.c @@ -0,0 +1,393 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Octeon family DWC3 specific glue layer + * + * Copyright (C) 2020 Stefan Roese + * + * The low-level init code is based on the Linux driver octeon-usb.c by + * David Daney , which is: + * Copyright (C) 2010-2017 Cavium Networks + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +DECLARE_GLOBAL_DATA_PTR; + +#define CVMX_GPIO_BIT_CFGX(i) (0x0001070000000900ull + ((i) * 8)) +#define CVMX_GPIO_XBIT_CFGX(i) (0x0001070000000900ull + \ + ((i) & 31) * 8 - 8 * 16) + +#define GPIO_BIT_CFG_TX_OE BIT_ULL(0) +#define GPIO_BIT_CFG_OUTPUT_SEL GENMASK_ULL(20, 16) + +#define UCTL_CTL_UCTL_RST BIT_ULL(0) +#define UCTL_CTL_UAHC_RST BIT_ULL(1) +#define UCTL_CTL_UPHY_RST BIT_ULL(2) +#define UCTL_CTL_DRD_MODE BIT_ULL(3) +#define UCTL_CTL_SCLK_EN BIT_ULL(4) +#define UCTL_CTL_HS_POWER_EN BIT_ULL(12) +#define UCTL_CTL_SS_POWER_EN BIT_ULL(14) +#define UCTL_CTL_H_CLKDIV_SEL GENMASK_ULL(26, 24) +#define UCTL_CTL_H_CLKDIV_RST BIT_ULL(28) +#define UCTL_CTL_H_CLK_EN BIT_ULL(30) +#define UCTL_CTL_REF_CLK_FSEL GENMASK_ULL(37, 32) +#define UCTL_CTL_REF_CLK_DIV2 BIT_ULL(38) +#define UCTL_CTL_REF_SSP_EN BIT_ULL(39) +#define UCTL_CTL_MPLL_MULTIPLIER GENMASK_ULL(46, 40) +#define UCTL_CTL_SSC_EN BIT_ULL(59) +#define UCTL_CTL_REF_CLK_SEL GENMASK_ULL(61, 60) + +#define UCTL_HOST_CFG 0xe0 +#define UCTL_HOST_CFG_PPC_ACTIVE_HIGH_EN BIT_ULL(24) +#define UCTL_HOST_CFG_PPC_EN BIT_ULL(25) + +#define UCTL_SHIM_CFG 0xe8 +#define UCTL_SHIM_CFG_CSR_ENDIAN_MODE GENMASK_ULL(1, 0) +#define UCTL_SHIM_CFG_DMA_ENDIAN_MODE GENMASK_ULL(9, 8) + +#define OCTEON_H_CLKDIV_SEL 8 +#define OCTEON_MIN_H_CLK_RATE 150000000 +#define OCTEON_MAX_H_CLK_RATE 300000000 + +#define CLOCK_50MHZ 50000000 +#define CLOCK_100MHZ 100000000 +#define CLOCK_125MHZ 125000000 + +static u8 clk_div[OCTEON_H_CLKDIV_SEL] = {1, 2, 4, 6, 8, 16, 24, 32}; + +static int dwc3_octeon_config_power(struct udevice *dev, void __iomem *base) +{ + u64 uctl_host_cfg; + u64 gpio_bit; + u32 gpio_pwr[3]; + int gpio, len, power_active_low; + const struct device_node *node = dev_np(dev); + int index = ((u64)base >> 24) & 1; + void __iomem *gpio_bit_cfg; + + if (of_find_property(node, "power", &len)) { + if (len == 12) { + dev_read_u32_array(dev, "power", gpio_pwr, 3); + power_active_low = gpio_pwr[2] & 0x01; + gpio = gpio_pwr[1]; + } else if (len == 8) { + dev_read_u32_array(dev, "power", gpio_pwr, 2); + power_active_low = 0; + gpio = gpio_pwr[1]; + } else { + printf("dwc3 controller clock init failure\n"); + return -EINVAL; + } + + gpio_bit_cfg = ioremap(CVMX_GPIO_BIT_CFGX(gpio), 0); + + if ((OCTEON_IS_MODEL(OCTEON_CN73XX) || + OCTEON_IS_MODEL(OCTEON_CNF75XX)) && gpio <= 31) { + gpio_bit = ioread64(gpio_bit_cfg); + gpio_bit |= GPIO_BIT_CFG_TX_OE; + gpio_bit &= ~GPIO_BIT_CFG_OUTPUT_SEL; + gpio_bit |= FIELD_PREP(GPIO_BIT_CFG_OUTPUT_SEL, + index == 0 ? 0x14 : 0x15); + iowrite64(gpio_bit, gpio_bit_cfg); + } else if (gpio <= 15) { + gpio_bit = ioread64(gpio_bit_cfg); + gpio_bit |= GPIO_BIT_CFG_TX_OE; + gpio_bit &= ~GPIO_BIT_CFG_OUTPUT_SEL; + gpio_bit |= FIELD_PREP(GPIO_BIT_CFG_OUTPUT_SEL, + index == 0 ? 0x14 : 0x19); + iowrite64(gpio_bit, gpio_bit_cfg); + } else { + gpio_bit_cfg = ioremap(CVMX_GPIO_XBIT_CFGX(gpio), 0); + + gpio_bit = ioread64(gpio_bit_cfg); + gpio_bit |= GPIO_BIT_CFG_TX_OE; + gpio_bit &= ~GPIO_BIT_CFG_OUTPUT_SEL; + gpio_bit |= FIELD_PREP(GPIO_BIT_CFG_OUTPUT_SEL, + index == 0 ? 0x14 : 0x19); + iowrite64(gpio_bit, gpio_bit_cfg); + } + + /* Enable XHCI power control and set if active high or low. */ + uctl_host_cfg = ioread64(base + UCTL_HOST_CFG); + uctl_host_cfg |= UCTL_HOST_CFG_PPC_EN; + if (power_active_low) + uctl_host_cfg &= ~UCTL_HOST_CFG_PPC_ACTIVE_HIGH_EN; + else + uctl_host_cfg |= UCTL_HOST_CFG_PPC_ACTIVE_HIGH_EN; + iowrite64(uctl_host_cfg, base + UCTL_HOST_CFG); + + /* Wait for power to stabilize */ + mdelay(10); + } else { + /* Disable XHCI power control and set if active high. */ + uctl_host_cfg = ioread64(base + UCTL_HOST_CFG); + uctl_host_cfg &= ~UCTL_HOST_CFG_PPC_EN; + uctl_host_cfg &= ~UCTL_HOST_CFG_PPC_ACTIVE_HIGH_EN; + iowrite64(uctl_host_cfg, base + UCTL_HOST_CFG); + dev_warn(dev, "dwc3 controller clock init failure.\n"); + } + + return 0; +} + +static int dwc3_octeon_clocks_start(struct udevice *dev, void __iomem *base) +{ + u64 uctl_ctl; + int ref_clk_sel = 2; + u64 div; + u32 clock_rate; + int mpll_mul; + int i; + u64 h_clk_rate; + void __iomem *uctl_ctl_reg = base; + const char *ss_clock_type; + const char *hs_clock_type; + + i = dev_read_u32(dev, "refclk-frequency", &clock_rate); + if (i) { + printf("No UCTL \"refclk-frequency\"\n"); + return -EINVAL; + } + + ss_clock_type = dev_read_string(dev, "refclk-type-ss"); + if (!ss_clock_type) { + printf("No UCTL \"refclk-type-ss\"\n"); + return -EINVAL; + } + + hs_clock_type = dev_read_string(dev, "refclk-type-hs"); + if (!hs_clock_type) { + printf("No UCTL \"refclk-type-hs\"\n"); + return -EINVAL; + } + + if (strcmp("dlmc_ref_clk0", ss_clock_type) == 0) { + if (strcmp(hs_clock_type, "dlmc_ref_clk0") == 0) { + ref_clk_sel = 0; + } else if (strcmp(hs_clock_type, "pll_ref_clk") == 0) { + ref_clk_sel = 2; + } else { + printf("Invalid HS clock type %s, using pll_ref_clk\n", + hs_clock_type); + } + } else if (strcmp(ss_clock_type, "dlmc_ref_clk1") == 0) { + if (strcmp(hs_clock_type, "dlmc_ref_clk1") == 0) { + ref_clk_sel = 1; + } else if (strcmp(hs_clock_type, "pll_ref_clk") == 0) { + ref_clk_sel = 3; + } else { + printf("Invalid HS clock type %s, using pll_ref_clk\n", + hs_clock_type); + ref_clk_sel = 3; + } + } else { + printf("Invalid SS clock type %s, using dlmc_ref_clk0\n", + ss_clock_type); + } + + if ((ref_clk_sel == 0 || ref_clk_sel == 1) && + clock_rate != CLOCK_100MHZ) + printf("Invalid UCTL clock rate of %u\n", clock_rate); + + /* + * Step 1: Wait for all voltages to be stable...that surely + * happened before this driver is started. SKIP + */ + + /* Step 2: Select GPIO for overcurrent indication, if desired. SKIP */ + + /* Step 3: Assert all resets. */ + uctl_ctl = ioread64(uctl_ctl_reg); + uctl_ctl |= UCTL_CTL_UCTL_RST | UCTL_CTL_UAHC_RST | UCTL_CTL_UPHY_RST; + iowrite64(uctl_ctl, uctl_ctl_reg); + + /* Step 4a: Reset the clock dividers. */ + uctl_ctl = ioread64(uctl_ctl_reg); + uctl_ctl |= UCTL_CTL_H_CLKDIV_RST; + iowrite64(uctl_ctl, uctl_ctl_reg); + + /* Step 4b: Select controller clock frequency. */ + for (div = ARRAY_SIZE(clk_div) - 1; div >= 0; div--) { + h_clk_rate = gd->bus_clk / clk_div[div]; + if (h_clk_rate <= OCTEON_MAX_H_CLK_RATE && + h_clk_rate >= OCTEON_MIN_H_CLK_RATE) + break; + } + uctl_ctl = ioread64(uctl_ctl_reg); + uctl_ctl &= ~UCTL_CTL_H_CLKDIV_SEL; + uctl_ctl |= FIELD_PREP(UCTL_CTL_H_CLKDIV_SEL, div); + uctl_ctl |= UCTL_CTL_H_CLK_EN; + iowrite64(uctl_ctl, uctl_ctl_reg); + uctl_ctl = ioread64(uctl_ctl_reg); + if (div != FIELD_GET(UCTL_CTL_H_CLKDIV_SEL, uctl_ctl) || + !(uctl_ctl & UCTL_CTL_H_CLK_EN)) { + printf("dwc3 controller clock init failure\n"); + return -EINVAL; + } + + /* Step 4c: Deassert the controller clock divider reset. */ + uctl_ctl = ioread64(uctl_ctl_reg); + uctl_ctl &= ~UCTL_CTL_H_CLKDIV_RST; + iowrite64(uctl_ctl, uctl_ctl_reg); + + /* Step 5a: Reference clock configuration. */ + uctl_ctl = ioread64(uctl_ctl_reg); + uctl_ctl &= ~UCTL_CTL_REF_CLK_SEL; + uctl_ctl |= FIELD_PREP(UCTL_CTL_REF_CLK_SEL, ref_clk_sel); + uctl_ctl &= ~UCTL_CTL_REF_CLK_FSEL; + uctl_ctl |= FIELD_PREP(UCTL_CTL_REF_CLK_FSEL, 0x07); + uctl_ctl &= ~UCTL_CTL_REF_CLK_DIV2; + + switch (clock_rate) { + default: + printf("Invalid ref_clk %u, using %u instead\n", CLOCK_100MHZ, + clock_rate); + fallthrough; + case CLOCK_100MHZ: + mpll_mul = 0x19; + if (ref_clk_sel < 2) { + uctl_ctl &= ~UCTL_CTL_REF_CLK_FSEL; + uctl_ctl |= FIELD_PREP(UCTL_CTL_REF_CLK_FSEL, 0x27); + } + break; + case CLOCK_50MHZ: + mpll_mul = 0x32; + break; + case CLOCK_125MHZ: + mpll_mul = 0x28; + break; + } + uctl_ctl &= ~UCTL_CTL_MPLL_MULTIPLIER; + uctl_ctl |= FIELD_PREP(UCTL_CTL_MPLL_MULTIPLIER, mpll_mul); + + /* Step 5b: Configure and enable spread-spectrum for SuperSpeed. */ + uctl_ctl |= UCTL_CTL_SSC_EN; + + /* Step 5c: Enable SuperSpeed. */ + uctl_ctl |= UCTL_CTL_REF_SSP_EN; + + /* Step 5d: Configure PHYs. SKIP */ + + /* Step 6a & 6b: Power up PHYs. */ + uctl_ctl |= UCTL_CTL_HS_POWER_EN; + uctl_ctl |= UCTL_CTL_SS_POWER_EN; + iowrite64(uctl_ctl, uctl_ctl_reg); + + /* Step 7: Wait 10 controller-clock cycles to take effect. */ + udelay(10); + + /* Step 8a: Deassert UCTL reset signal. */ + uctl_ctl = ioread64(uctl_ctl_reg); + uctl_ctl &= ~UCTL_CTL_UCTL_RST; + iowrite64(uctl_ctl, uctl_ctl_reg); + + /* Step 8b: Wait 10 controller-clock cycles. */ + udelay(10); + + /* Step 8c: Setup power-power control. */ + if (dwc3_octeon_config_power(dev, base)) { + printf("Error configuring power\n"); + return -EINVAL; + } + + /* Step 8d: Deassert UAHC reset signal. */ + uctl_ctl = ioread64(uctl_ctl_reg); + uctl_ctl &= ~UCTL_CTL_UAHC_RST; + iowrite64(uctl_ctl, uctl_ctl_reg); + + /* Step 8e: Wait 10 controller-clock cycles. */ + udelay(10); + + /* Step 9: Enable conditional coprocessor clock of UCTL. */ + uctl_ctl = ioread64(uctl_ctl_reg); + uctl_ctl |= UCTL_CTL_SCLK_EN; + iowrite64(uctl_ctl, uctl_ctl_reg); + + /* Step 10: Set for host mode only. */ + uctl_ctl = ioread64(uctl_ctl_reg); + uctl_ctl &= ~UCTL_CTL_DRD_MODE; + iowrite64(uctl_ctl, uctl_ctl_reg); + + return 0; +} + +static void dwc3_octeon_set_endian_mode(void __iomem *base) +{ + u64 shim_cfg; + + shim_cfg = ioread64(base + UCTL_SHIM_CFG); + shim_cfg &= ~UCTL_SHIM_CFG_CSR_ENDIAN_MODE; + shim_cfg |= FIELD_PREP(UCTL_SHIM_CFG_CSR_ENDIAN_MODE, 1); + shim_cfg &= ~UCTL_SHIM_CFG_DMA_ENDIAN_MODE; + shim_cfg |= FIELD_PREP(UCTL_SHIM_CFG_DMA_ENDIAN_MODE, 1); + iowrite64(shim_cfg, base + UCTL_SHIM_CFG); +} + +static void dwc3_octeon_phy_reset(void __iomem *base) +{ + u64 uctl_ctl; + + uctl_ctl = ioread64(base); + uctl_ctl &= ~UCTL_CTL_UPHY_RST; + iowrite64(uctl_ctl, base); +} + +static int octeon_dwc3_glue_probe(struct udevice *dev) +{ + void __iomem *base; + + base = dev_remap_addr(dev); + if (IS_ERR(base)) + return PTR_ERR(base); + + dwc3_octeon_clocks_start(dev, base); + dwc3_octeon_set_endian_mode(base); + dwc3_octeon_phy_reset(base); + + return 0; +} + +static int octeon_dwc3_glue_bind(struct udevice *dev) +{ + ofnode node, dwc3_node; + + /* Find snps,dwc3 node from subnode */ + dwc3_node = ofnode_null(); + ofnode_for_each_subnode(node, dev->node) { + if (ofnode_device_is_compatible(node, "snps,dwc3")) + dwc3_node = node; + } + + if (!ofnode_valid(dwc3_node)) { + printf("Can't find dwc3 subnode for %s\n", dev->name); + return -ENODEV; + } + + return dm_scan_fdt_dev(dev); +} + +static const struct udevice_id octeon_dwc3_glue_ids[] = { + { .compatible = "cavium,octeon-7130-usb-uctl" }, + { } +}; + +U_BOOT_DRIVER(dwc3_octeon_glue) = { + .name = "dwc3_octeon_glue", + .id = UCLASS_NOP, + .of_match = octeon_dwc3_glue_ids, + .probe = octeon_dwc3_glue_probe, + .bind = octeon_dwc3_glue_bind, + .flags = DM_FLAG_ALLOC_PRIV_DMA, +}; diff --git a/drivers/usb/host/xhci-dwc3.c b/drivers/usb/host/xhci-dwc3.c index 27f84102db..045de2ffde 100644 --- a/drivers/usb/host/xhci-dwc3.c +++ b/drivers/usb/host/xhci-dwc3.c @@ -122,7 +122,7 @@ static int xhci_dwc3_probe(struct udevice *dev) u32 reg; int ret; - hccr = (struct xhci_hccr *)((uintptr_t)dev_read_addr(dev)); + hccr = (struct xhci_hccr *)((uintptr_t)dev_remap_addr(dev)); hcor = (struct xhci_hcor *)((uintptr_t)hccr + HC_LENGTH(xhci_readl(&(hccr)->cr_capbase))); diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index b118207d93..13065d7ca9 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -722,8 +722,6 @@ int xhci_bulk_tx(struct usb_device *udev, unsigned long pipe, BUG_ON(TRB_TO_SLOT_ID(field) != slot_id); BUG_ON(TRB_TO_EP_INDEX(field) != ep_index); - BUG_ON(*(void **)(uintptr_t)le64_to_cpu(event->trans_event.buffer) - - buffer > (size_t)length); record_transfer_result(udev, event, length); xhci_acknowledge_event(ctrl); diff --git a/include/configs/octeon_common.h b/include/configs/octeon_common.h index 530f02ad3c..109ef4064d 100644 --- a/include/configs/octeon_common.h +++ b/include/configs/octeon_common.h @@ -7,13 +7,20 @@ #ifndef __OCTEON_COMMON_H__ #define __OCTEON_COMMON_H__ -/* No DDR init yet -> run in L2 cache with limited resources */ +#if defined(CONFIG_RAM_OCTEON) +#define CONFIG_SYS_MALLOC_LEN (16 << 20) +#define CONFIG_SYS_INIT_SP_OFFSET 0x20100000 +#else +/* No DDR init -> run in L2 cache with limited resources */ #define CONFIG_SYS_MALLOC_LEN (256 << 10) +#define CONFIG_SYS_INIT_SP_OFFSET 0x00180000 +#endif + #define CONFIG_SYS_SDRAM_BASE 0xffffffff80000000 #define CONFIG_SYS_MONITOR_BASE CONFIG_SYS_TEXT_BASE #define CONFIG_SYS_LOAD_ADDR (CONFIG_SYS_SDRAM_BASE + (1 << 20)) -#define CONFIG_SYS_INIT_SP_OFFSET 0x180000 +#define CONFIG_SYS_BOOTM_LEN (64 << 20) /* 64M */ #endif /* __OCTEON_COMMON_H__ */