/* * Copyright (C) 2013-2014 Synopsys, Inc. All rights reserved. * * SPDX-License-Identifier: GPL-2.0+ */ #include #include #include #include #include #include #include #include /* * [ NOTE 1 ]: * Data cache (L1 D$ or SL$) entire invalidate operation or data cache disable * operation may result in unexpected behavior and data loss even if we flush * data cache right before invalidation. That may happens if we store any context * on stack (like we store BLINK register on stack before function call). * BLINK register is the register where return address is automatically saved * when we do function call with instructions like 'bl'. * * There is the real example: * We may hang in the next code as we store any BLINK register on stack in * invalidate_dcache_all() function. * * void flush_dcache_all() { * __dc_entire_op(OP_FLUSH); * // Other code // * } * * void invalidate_dcache_all() { * __dc_entire_op(OP_INV); * // Other code // * } * * void foo(void) { * flush_dcache_all(); * invalidate_dcache_all(); * } * * Now let's see what really happens during that code execution: * * foo() * |->> call flush_dcache_all * [return address is saved to BLINK register] * [push BLINK] (save to stack) ![point 1] * |->> call __dc_entire_op(OP_FLUSH) * [return address is saved to BLINK register] * [flush L1 D$] * return [jump to BLINK] * <<------ * [other flush_dcache_all code] * [pop BLINK] (get from stack) * return [jump to BLINK] * <<------ * |->> call invalidate_dcache_all * [return address is saved to BLINK register] * [push BLINK] (save to stack) ![point 2] * |->> call __dc_entire_op(OP_FLUSH) * [return address is saved to BLINK register] * [invalidate L1 D$] ![point 3] * // Oops!!! * // We lose return address from invalidate_dcache_all function: * // we save it to stack and invalidate L1 D$ after that! * return [jump to BLINK] * <<------ * [other invalidate_dcache_all code] * [pop BLINK] (get from stack) * // we don't have this data in L1 dcache as we invalidated it in [point 3] * // so we get it from next memory level (for example DDR memory) * // but in the memory we have value which we save in [point 1], which * // is return address from flush_dcache_all function (instead of * // address from current invalidate_dcache_all function which we * // saved in [point 2] !) * return [jump to BLINK] * <<------ * // As BLINK points to invalidate_dcache_all, we call it again and * // loop forever. * * Fortunately we may fix that by using flush & invalidation of D$ with a single * one instruction (instead of flush and invalidation instructions pair) and * enabling force function inline with '__attribute__((always_inline))' gcc * attribute to avoid any function call (and BLINK store) between cache flush * and disable. * * * [ NOTE 2 ]: * As of today we only support the following cache configurations on ARC. * Other configurations may exist in HW (for example, since version 3.0 HS * supports SL$ (L2 system level cache) disable) but we don't support it in SW. * Configuration 1: * ______________________ * | | * | ARC CPU | * |______________________| * ___|___ ___|___ * | | | | * | L1 I$ | | L1 D$ | * |_______| |_______| * on/off on/off * ___|______________|____ * | | * | main memory | * |______________________| * * Configuration 2: * ______________________ * | | * | ARC CPU | * |______________________| * ___|___ ___|___ * | | | | * | L1 I$ | | L1 D$ | * |_______| |_______| * on/off on/off * ___|______________|____ * | | * | L2 (SL$) | * |______________________| * always must be on * ___|______________|____ * | | * | main memory | * |______________________| * * Configuration 3: * ______________________ * | | * | ARC CPU | * |______________________| * ___|___ ___|___ * | | | | * | L1 I$ | | L1 D$ | * |_______| |_______| * on/off must be on * ___|______________|____ _______ * | | | | * | L2 (SL$) |-----| IOC | * |______________________| |_______| * always must be on on/off * ___|______________|____ * | | * | main memory | * |______________________| */ DECLARE_GLOBAL_DATA_PTR; /* Bit values in IC_CTRL */ #define IC_CTRL_CACHE_DISABLE BIT(0) /* Bit values in DC_CTRL */ #define DC_CTRL_CACHE_DISABLE BIT(0) #define DC_CTRL_INV_MODE_FLUSH BIT(6) #define DC_CTRL_FLUSH_STATUS BIT(8) #define OP_INV BIT(0) #define OP_FLUSH BIT(1) #define OP_FLUSH_N_INV (OP_FLUSH | OP_INV) /* Bit val in SLC_CONTROL */ #define SLC_CTRL_DIS 0x001 #define SLC_CTRL_IM 0x040 #define SLC_CTRL_BUSY 0x100 #define SLC_CTRL_RGN_OP_INV 0x200 #define CACHE_LINE_MASK (~(gd->arch.l1_line_sz - 1)) static inline bool pae_exists(void) { /* TODO: should we compare mmu version from BCR and from CONFIG? */ #if (CONFIG_ARC_MMU_VER >= 4) union bcr_mmu_4 mmu4; mmu4.word = read_aux_reg(ARC_AUX_MMU_BCR); if (mmu4.fields.pae) return true; #endif /* (CONFIG_ARC_MMU_VER >= 4) */ return false; } static inline bool icache_exists(void) { union bcr_di_cache ibcr; ibcr.word = read_aux_reg(ARC_BCR_IC_BUILD); return !!ibcr.fields.ver; } static inline bool icache_enabled(void) { if (!icache_exists()) return false; return !(read_aux_reg(ARC_AUX_IC_CTRL) & IC_CTRL_CACHE_DISABLE); } static inline bool dcache_exists(void) { union bcr_di_cache dbcr; dbcr.word = read_aux_reg(ARC_BCR_DC_BUILD); return !!dbcr.fields.ver; } static inline bool dcache_enabled(void) { if (!dcache_exists()) return false; return !(read_aux_reg(ARC_AUX_DC_CTRL) & DC_CTRL_CACHE_DISABLE); } static inline bool slc_exists(void) { if (is_isa_arcv2()) { union bcr_generic sbcr; sbcr.word = read_aux_reg(ARC_BCR_SLC); return !!sbcr.fields.ver; } return false; } static inline bool slc_data_bypass(void) { /* * If L1 data cache is disabled SL$ is bypassed and all load/store * requests are sent directly to main memory. */ return !dcache_enabled(); } static inline bool ioc_exists(void) { if (is_isa_arcv2()) { union bcr_clust_cfg cbcr; cbcr.word = read_aux_reg(ARC_BCR_CLUSTER); return cbcr.fields.c; } return false; } static inline bool ioc_enabled(void) { /* * We check only CONFIG option instead of IOC HW state check as IOC * must be disabled by default. */ if (is_ioc_enabled()) return ioc_exists(); return false; } static void __slc_entire_op(const int op) { unsigned int ctrl; if (!slc_exists()) return; ctrl = read_aux_reg(ARC_AUX_SLC_CTRL); if (!(op & OP_FLUSH)) /* i.e. OP_INV */ ctrl &= ~SLC_CTRL_IM; /* clear IM: Disable flush before Inv */ else ctrl |= SLC_CTRL_IM; write_aux_reg(ARC_AUX_SLC_CTRL, ctrl); if (op & OP_INV) /* Inv or flush-n-inv use same cmd reg */ write_aux_reg(ARC_AUX_SLC_INVALIDATE, 0x1); else write_aux_reg(ARC_AUX_SLC_FLUSH, 0x1); /* Make sure "busy" bit reports correct stataus, see STAR 9001165532 */ read_aux_reg(ARC_AUX_SLC_CTRL); /* Important to wait for flush to complete */ while (read_aux_reg(ARC_AUX_SLC_CTRL) & SLC_CTRL_BUSY); } static void slc_upper_region_init(void) { /* * ARC_AUX_SLC_RGN_START1 and ARC_AUX_SLC_RGN_END1 register exist * only if PAE exists in current HW. So we had to check pae_exist * before using them. */ if (!pae_exists()) return; /* * ARC_AUX_SLC_RGN_END1 and ARC_AUX_SLC_RGN_START1 are always == 0 * as we don't use PAE40. */ write_aux_reg(ARC_AUX_SLC_RGN_END1, 0); write_aux_reg(ARC_AUX_SLC_RGN_START1, 0); } static void __slc_rgn_op(unsigned long paddr, unsigned long sz, const int op) { #ifdef CONFIG_ISA_ARCV2 unsigned int ctrl; unsigned long end; if (!slc_exists()) return; /* * The Region Flush operation is specified by CTRL.RGN_OP[11..9] * - b'000 (default) is Flush, * - b'001 is Invalidate if CTRL.IM == 0 * - b'001 is Flush-n-Invalidate if CTRL.IM == 1 */ ctrl = read_aux_reg(ARC_AUX_SLC_CTRL); /* Don't rely on default value of IM bit */ if (!(op & OP_FLUSH)) /* i.e. OP_INV */ ctrl &= ~SLC_CTRL_IM; /* clear IM: Disable flush before Inv */ else ctrl |= SLC_CTRL_IM; if (op & OP_INV) ctrl |= SLC_CTRL_RGN_OP_INV; /* Inv or flush-n-inv */ else ctrl &= ~SLC_CTRL_RGN_OP_INV; write_aux_reg(ARC_AUX_SLC_CTRL, ctrl); /* * Lower bits are ignored, no need to clip * END needs to be setup before START (latter triggers the operation) * END can't be same as START, so add (l2_line_sz - 1) to sz */ end = paddr + sz + gd->arch.slc_line_sz - 1; /* * Upper addresses (ARC_AUX_SLC_RGN_END1 and ARC_AUX_SLC_RGN_START1) * are always == 0 as we don't use PAE40, so we only setup lower ones * (ARC_AUX_SLC_RGN_END and ARC_AUX_SLC_RGN_START) */ write_aux_reg(ARC_AUX_SLC_RGN_END, end); write_aux_reg(ARC_AUX_SLC_RGN_START, paddr); /* Make sure "busy" bit reports correct stataus, see STAR 9001165532 */ read_aux_reg(ARC_AUX_SLC_CTRL); while (read_aux_reg(ARC_AUX_SLC_CTRL) & SLC_CTRL_BUSY); #endif /* CONFIG_ISA_ARCV2 */ } static void arc_ioc_setup(void) { /* IOC Aperture start is equal to DDR start */ unsigned int ap_base = CONFIG_SYS_SDRAM_BASE; /* IOC Aperture size is equal to DDR size */ long ap_size = CONFIG_SYS_SDRAM_SIZE; /* Unsupported configuration. See [ NOTE 2 ] for more details. */ if (!slc_exists()) panic("Try to enable IOC but SLC is not present"); /* Unsupported configuration. See [ NOTE 2 ] for more details. */ if (!dcache_enabled()) panic("Try to enable IOC but L1 D$ is disabled"); flush_n_invalidate_dcache_all(); if (!is_power_of_2(ap_size) || ap_size < 4096) panic("IOC Aperture size must be power of 2 and bigger 4Kib"); /* * IOC Aperture size decoded as 2 ^ (SIZE + 2) KB, * so setting 0x11 implies 512M, 0x12 implies 1G... */ write_aux_reg(ARC_AUX_IO_COH_AP0_SIZE, order_base_2(ap_size / 1024) - 2); /* IOC Aperture start must be aligned to the size of the aperture */ if (ap_base % ap_size != 0) panic("IOC Aperture start must be aligned to the size of the aperture"); write_aux_reg(ARC_AUX_IO_COH_AP0_BASE, ap_base >> 12); write_aux_reg(ARC_AUX_IO_COH_PARTIAL, 1); write_aux_reg(ARC_AUX_IO_COH_ENABLE, 1); } static void read_decode_cache_bcr_arcv2(void) { #ifdef CONFIG_ISA_ARCV2 union bcr_slc_cfg slc_cfg; if (slc_exists()) { slc_cfg.word = read_aux_reg(ARC_AUX_SLC_CONFIG); gd->arch.slc_line_sz = (slc_cfg.fields.lsz == 0) ? 128 : 64; /* * We don't support configuration where L1 I$ or L1 D$ is * absent but SL$ exists. See [ NOTE 2 ] for more details. */ if (!icache_exists() || !dcache_exists()) panic("Unsupported cache configuration: SLC exists but one of L1 caches is absent"); } #endif /* CONFIG_ISA_ARCV2 */ } void read_decode_cache_bcr(void) { int dc_line_sz = 0, ic_line_sz = 0; union bcr_di_cache ibcr, dbcr; ibcr.word = read_aux_reg(ARC_BCR_IC_BUILD); if (ibcr.fields.ver) { gd->arch.l1_line_sz = ic_line_sz = 8 << ibcr.fields.line_len; if (!ic_line_sz) panic("Instruction exists but line length is 0\n"); } dbcr.word = read_aux_reg(ARC_BCR_DC_BUILD); if (dbcr.fields.ver) { gd->arch.l1_line_sz = dc_line_sz = 16 << dbcr.fields.line_len; if (!dc_line_sz) panic("Data cache exists but line length is 0\n"); } if (ic_line_sz && dc_line_sz && (ic_line_sz != dc_line_sz)) panic("Instruction and data cache line lengths differ\n"); } void cache_init(void) { read_decode_cache_bcr(); if (is_isa_arcv2()) read_decode_cache_bcr_arcv2(); if (is_isa_arcv2() && ioc_enabled()) arc_ioc_setup(); if (is_isa_arcv2() && slc_exists()) slc_upper_region_init(); } int icache_status(void) { return icache_enabled(); } void icache_enable(void) { if (icache_exists()) write_aux_reg(ARC_AUX_IC_CTRL, read_aux_reg(ARC_AUX_IC_CTRL) & ~IC_CTRL_CACHE_DISABLE); } void icache_disable(void) { if (icache_exists()) write_aux_reg(ARC_AUX_IC_CTRL, read_aux_reg(ARC_AUX_IC_CTRL) | IC_CTRL_CACHE_DISABLE); } /* IC supports only invalidation */ static inline void __ic_entire_invalidate(void) { if (!icache_enabled()) return; /* Any write to IC_IVIC register triggers invalidation of entire I$ */ write_aux_reg(ARC_AUX_IC_IVIC, 1); /* * As per ARC HS databook (see chapter 5.3.3.2) * it is required to add 3 NOPs after each write to IC_IVIC. */ __builtin_arc_nop(); __builtin_arc_nop(); __builtin_arc_nop(); read_aux_reg(ARC_AUX_IC_CTRL); /* blocks */ } void invalidate_icache_all(void) { __ic_entire_invalidate(); /* * If SL$ is bypassed for data it is used only for instructions, * so we need to invalidate it too. * TODO: HS 3.0 supports SLC disable so we need to check slc * enable/disable status here. */ if (is_isa_arcv2() && slc_data_bypass()) __slc_entire_op(OP_INV); } int dcache_status(void) { return dcache_enabled(); } void dcache_enable(void) { if (!dcache_exists()) return; write_aux_reg(ARC_AUX_DC_CTRL, read_aux_reg(ARC_AUX_DC_CTRL) & ~(DC_CTRL_INV_MODE_FLUSH | DC_CTRL_CACHE_DISABLE)); } void dcache_disable(void) { if (!dcache_exists()) return; write_aux_reg(ARC_AUX_DC_CTRL, read_aux_reg(ARC_AUX_DC_CTRL) | DC_CTRL_CACHE_DISABLE); } /* Common Helper for Line Operations on D-cache */ static inline void __dcache_line_loop(unsigned long paddr, unsigned long sz, const int cacheop) { unsigned int aux_cmd; int num_lines; /* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */ aux_cmd = cacheop & OP_INV ? ARC_AUX_DC_IVDL : ARC_AUX_DC_FLDL; sz += paddr & ~CACHE_LINE_MASK; paddr &= CACHE_LINE_MASK; num_lines = DIV_ROUND_UP(sz, gd->arch.l1_line_sz); while (num_lines-- > 0) { #if (CONFIG_ARC_MMU_VER == 3) write_aux_reg(ARC_AUX_DC_PTAG, paddr); #endif write_aux_reg(aux_cmd, paddr); paddr += gd->arch.l1_line_sz; } } static void __before_dc_op(const int op) { unsigned int ctrl; ctrl = read_aux_reg(ARC_AUX_DC_CTRL); /* IM bit implies flush-n-inv, instead of vanilla inv */ if (op == OP_INV) ctrl &= ~DC_CTRL_INV_MODE_FLUSH; else ctrl |= DC_CTRL_INV_MODE_FLUSH; write_aux_reg(ARC_AUX_DC_CTRL, ctrl); } static void __after_dc_op(const int op) { if (op & OP_FLUSH) /* flush / flush-n-inv both wait */ while (read_aux_reg(ARC_AUX_DC_CTRL) & DC_CTRL_FLUSH_STATUS); } static inline void __dc_entire_op(const int cacheop) { int aux; if (!dcache_enabled()) return; __before_dc_op(cacheop); if (cacheop & OP_INV) /* Inv or flush-n-inv use same cmd reg */ aux = ARC_AUX_DC_IVDC; else aux = ARC_AUX_DC_FLSH; write_aux_reg(aux, 0x1); __after_dc_op(cacheop); } static inline void __dc_line_op(unsigned long paddr, unsigned long sz, const int cacheop) { if (!dcache_enabled()) return; __before_dc_op(cacheop); __dcache_line_loop(paddr, sz, cacheop); __after_dc_op(cacheop); } void invalidate_dcache_range(unsigned long start, unsigned long end) { if (start >= end) return; /* * ARCv1 -> call __dc_line_op * ARCv2 && L1 D$ disabled -> nothing * ARCv2 && L1 D$ enabled && IOC enabled -> nothing * ARCv2 && L1 D$ enabled && no IOC -> call __dc_line_op; call __slc_rgn_op */ if (!is_isa_arcv2() || !ioc_enabled()) __dc_line_op(start, end - start, OP_INV); if (is_isa_arcv2() && !ioc_enabled() && !slc_data_bypass()) __slc_rgn_op(start, end - start, OP_INV); } void flush_dcache_range(unsigned long start, unsigned long end) { if (start >= end) return; /* * ARCv1 -> call __dc_line_op * ARCv2 && L1 D$ disabled -> nothing * ARCv2 && L1 D$ enabled && IOC enabled -> nothing * ARCv2 && L1 D$ enabled && no IOC -> call __dc_line_op; call __slc_rgn_op */ if (!is_isa_arcv2() || !ioc_enabled()) __dc_line_op(start, end - start, OP_FLUSH); if (is_isa_arcv2() && !ioc_enabled() && !slc_data_bypass()) __slc_rgn_op(start, end - start, OP_FLUSH); } void flush_cache(unsigned long start, unsigned long size) { flush_dcache_range(start, start + size); } /* * As invalidate_dcache_all() is not used in generic U-Boot code and as we * don't need it in arch/arc code alone (invalidate without flush) we implement * flush_n_invalidate_dcache_all (flush and invalidate in 1 operation) because * it's much safer. See [ NOTE 1 ] for more details. */ void flush_n_invalidate_dcache_all(void) { __dc_entire_op(OP_FLUSH_N_INV); if (is_isa_arcv2() && !slc_data_bypass()) __slc_entire_op(OP_FLUSH_N_INV); } void flush_dcache_all(void) { __dc_entire_op(OP_FLUSH); if (is_isa_arcv2() && !slc_data_bypass()) __slc_entire_op(OP_FLUSH); } /* * This is function to cleanup all caches (and therefore sync I/D caches) which * can be used for cleanup before linux launch or to sync caches during * relocation. */ void sync_n_cleanup_cache_all(void) { __dc_entire_op(OP_FLUSH_N_INV); /* * If SL$ is bypassed for data it is used only for instructions, * and we shouldn't flush it. So invalidate it instead of flush_n_inv. */ if (is_isa_arcv2()) { if (slc_data_bypass()) __slc_entire_op(OP_INV); else __slc_entire_op(OP_FLUSH_N_INV); } __ic_entire_invalidate(); }