android_kernel_oneplus_msm8998/arch/arm64/mm/cache.S
Srinivasarao P c43902eef7 Merge android-4.4.113 (ef588ef) into msm-4.4
* refs/heads/tmp-ef588ef
  Linux 4.4.113
  MIPS: AR7: ensure the port type's FCR value is used
  x86/retpoline: Optimize inline assembler for vmexit_fill_RSB
  x86/pti: Document fix wrong index
  kprobes/x86: Disable optimizing on the function jumps to indirect thunk
  kprobes/x86: Blacklist indirect thunk functions for kprobes
  retpoline: Introduce start/end markers of indirect thunk
  x86/mce: Make machine check speculation protected
  kbuild: modversions for EXPORT_SYMBOL() for asm
  x86/cpu, x86/pti: Do not enable PTI on AMD processors
  arm64: KVM: Fix SMCCC handling of unimplemented SMC/HVC calls
  dm thin metadata: THIN_MAX_CONCURRENT_LOCKS should be 6
  dm btree: fix serious bug in btree_split_beneath()
  libata: apply MAX_SEC_1024 to all LITEON EP1 series devices
  can: peak: fix potential bug in packet fragmentation
  ARM: dts: kirkwood: fix pin-muxing of MPP7 on OpenBlocks A7
  phy: work around 'phys' references to usb-nop-xceiv devices
  tracing: Fix converting enum's from the map in trace_event_eval_update()
  Input: twl4030-vibra - fix sibling-node lookup
  Input: twl6040-vibra - fix child-node lookup
  Input: twl6040-vibra - fix DT node memory management
  Input: 88pm860x-ts - fix child-node lookup
  x86/apic/vector: Fix off by one in error path
  pipe: avoid round_pipe_size() nr_pages overflow on 32-bit
  module: Add retpoline tag to VERMAGIC
  x86/retpoline: Add LFENCE to the retpoline/RSB filling RSB macros
  sched/deadline: Zero out positive runtime after throttling constrained tasks
  scsi: hpsa: fix volume offline state
  af_key: fix buffer overread in parse_exthdrs()
  af_key: fix buffer overread in verify_address_len()
  ALSA: hda - Apply the existing quirk to iMac 14,1
  ALSA: hda - Apply headphone noise quirk for another Dell XPS 13 variant
  ALSA: pcm: Remove yet superfluous WARN_ON()
  futex: Prevent overflow by strengthen input validation
  scsi: sg: disable SET_FORCE_LOW_DMA
  x86/retpoline: Remove compile time warning
  x86/retpoline: Fill return stack buffer on vmexit
  x86/retpoline/irq32: Convert assembler indirect jumps
  x86/retpoline/checksum32: Convert assembler indirect jumps
  x86/retpoline/xen: Convert Xen hypercall indirect jumps
  x86/retpoline/hyperv: Convert assembler indirect jumps
  x86/retpoline/ftrace: Convert ftrace assembler indirect jumps
  x86/retpoline/entry: Convert entry assembler indirect jumps
  x86/retpoline/crypto: Convert crypto assembler indirect jumps
  x86/spectre: Add boot time option to select Spectre v2 mitigation
  x86/retpoline: Add initial retpoline support
  kconfig.h: use __is_defined() to check if MODULE is defined
  EXPORT_SYMBOL() for asm
  x86/asm: Make asm/alternative.h safe from assembly
  x86/kbuild: enable modversions for symbols exported from asm
  x86/asm: Use register variable to get stack pointer value
  x86/mm/32: Move setup_clear_cpu_cap(X86_FEATURE_PCID) earlier
  x86/cpu/AMD: Use LFENCE_RDTSC in preference to MFENCE_RDTSC
  x86/cpu/AMD: Make LFENCE a serializing instruction
  gcov: disable for COMPILE_TEST
  ANDROID: sdcardfs: Move default_normal to superblock
  blkdev: Refactoring block io latency histogram codes
  FROMLIST: arm64: kpti: Fix the interaction between ASID switching and software PAN
  FROMLIST: arm64: Move post_ttbr_update_workaround to C code
  FROMLIST: arm64: mm: Rename post_ttbr0_update_workaround
  sched: EAS: Initialize push_task as NULL to avoid direct reference on out_unlock path

Conflicts:
	arch/arm64/include/asm/efi.h
	arch/arm64/include/asm/mmu_context.h
	drivers/scsi/sg.c
	drivers/scsi/ufs/ufshcd.h

Change-Id: Ibfa06af8ef308077aad6995874d4b7b0a73e95f3
Signed-off-by: Srinivasarao P <spathi@codeaurora.org>
2018-01-24 18:27:37 +05:30

281 lines
7 KiB
ArmAsm

/*
* Cache maintenance
*
* Copyright (C) 2001 Deep Blue Solutions Ltd.
* Copyright (C) 2012 ARM Ltd.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/errno.h>
#include <linux/linkage.h>
#include <linux/init.h>
#include <asm/assembler.h>
#include <asm/cpufeature.h>
#include <asm/alternative.h>
#include <asm/uaccess.h>
/*
* __flush_dcache_all()
*
* Flush the whole D-cache.
*
* Corrupted registers: x0-x7, x9-x11
*/
__flush_dcache_all:
dmb sy // ensure ordering with previous memory accesses
mrs x0, clidr_el1 // read clidr
and x3, x0, #0x7000000 // extract loc from clidr
lsr x3, x3, #23 // left align loc bit field
cbz x3, finished // if loc is 0, then no need to clean
mov x10, #0 // start clean at cache level 0
loop1:
add x2, x10, x10, lsr #1 // work out 3x current cache level
lsr x1, x0, x2 // extract cache type bits from clidr
and x1, x1, #7 // mask of the bits for current cache only
cmp x1, #2 // see what cache we have at this level
b.lt skip // skip if no cache, or just i-cache
save_and_disable_irqs x9 // make CSSELR and CCSIDR access atomic
msr csselr_el1, x10 // select current cache level in csselr
isb // isb to sych the new cssr&csidr
mrs x1, ccsidr_el1 // read the new ccsidr
restore_irqs x9
and x2, x1, #7 // extract the length of the cache lines
add x2, x2, #4 // add 4 (line length offset)
mov x4, #0x3ff
and x4, x4, x1, lsr #3 // find maximum number on the way size
clz w5, w4 // find bit position of way size increment
mov x7, #0x7fff
and x7, x7, x1, lsr #13 // extract max number of the index size
loop2:
mov x9, x4 // create working copy of max way size
loop3:
lsl x6, x9, x5
orr x11, x10, x6 // factor way and cache number into x11
lsl x6, x7, x2
orr x11, x11, x6 // factor index number into x11
dc cisw, x11 // clean & invalidate by set/way
subs x9, x9, #1 // decrement the way
b.ge loop3
subs x7, x7, #1 // decrement the index
b.ge loop2
skip:
add x10, x10, #2 // increment cache number
cmp x3, x10
b.gt loop1
finished:
mov x10, #0 // swith back to cache level 0
msr csselr_el1, x10 // select current cache level in csselr
dsb sy
isb
ret
ENDPROC(__flush_dcache_all)
/*
* flush_cache_all()
*
* Flush the entire cache system. The data cache flush is now achieved
* using atomic clean / invalidates working outwards from L1 cache. This
* is done using Set/Way based cache maintainance instructions. The
* instruction cache can still be invalidated back to the point of
* unification in a single instruction.
*/
ENTRY(flush_cache_all)
mov x12, lr
bl __flush_dcache_all
mov x0, #0
ic ialluis // I+BTB cache invalidate
ret x12
ENDPROC(flush_cache_all)
/*
* flush_icache_range(start,end)
*
* Ensure that the I and D caches are coherent within specified region.
* This is typically used when code has been written to a memory region,
* and will be executed.
*
* - start - virtual start address of region
* - end - virtual end address of region
*/
ENTRY(flush_icache_range)
/* FALLTHROUGH */
/*
* __flush_cache_user_range(start,end)
*
* Ensure that the I and D caches are coherent within specified region.
* This is typically used when code has been written to a memory region,
* and will be executed.
*
* - start - virtual start address of region
* - end - virtual end address of region
*/
ENTRY(__flush_cache_user_range)
uaccess_ttbr0_enable x2, x3, x4
dcache_line_size x2, x3
sub x3, x2, #1
bic x4, x0, x3
1:
user_alt 9f, "dc cvau, x4", "dc civac, x4", ARM64_WORKAROUND_CLEAN_CACHE
add x4, x4, x2
cmp x4, x1
b.lo 1b
dsb ish
icache_line_size x2, x3
sub x3, x2, #1
bic x4, x0, x3
1:
USER(9f, ic ivau, x4 ) // invalidate I line PoU
add x4, x4, x2
cmp x4, x1
b.lo 1b
dsb ish
isb
mov x0, #0
1:
uaccess_ttbr0_disable x1, x2
ret
9:
mov x0, #-EFAULT
b 1b
ENDPROC(flush_icache_range)
ENDPROC(__flush_cache_user_range)
/*
* __flush_dcache_area(kaddr, size)
*
* Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
* are cleaned and invalidated to the PoC.
*
* - kaddr - kernel address
* - size - size in question
*/
ENTRY(__flush_dcache_area)
dcache_by_line_op civac, sy, x0, x1, x2, x3
ret
ENDPIPROC(__flush_dcache_area)
/*
* __clean_dcache_area_pou(kaddr, size)
*
* Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
* are cleaned to the PoU.
*
* - kaddr - kernel address
* - size - size in question
*/
ENTRY(__clean_dcache_area_pou)
dcache_by_line_op cvau, ish, x0, x1, x2, x3
ret
ENDPROC(__clean_dcache_area_pou)
/*
* __inval_cache_range(start, end)
* - start - start address of region
* - end - end address of region
*/
ENTRY(__inval_cache_range)
/* FALLTHROUGH */
/*
* __dma_inv_range(start, end)
* - start - virtual start address of region
* - end - virtual end address of region
*/
ENTRY(__dma_inv_range)
dcache_line_size x2, x3
sub x3, x2, #1
tst x1, x3 // end cache line aligned?
bic x1, x1, x3
b.eq 1f
dc civac, x1 // clean & invalidate D / U line
1: tst x0, x3 // start cache line aligned?
bic x0, x0, x3
b.eq 2f
dc civac, x0 // clean & invalidate D / U line
b 3f
2: dc ivac, x0 // invalidate D / U line
3: add x0, x0, x2
cmp x0, x1
b.lo 2b
dsb sy
ret
ENDPIPROC(__inval_cache_range)
ENDPROC(__dma_inv_range)
/*
* __dma_clean_range(start, end)
* - start - virtual start address of region
* - end - virtual end address of region
*/
ENTRY(__dma_clean_range)
dcache_line_size x2, x3
sub x3, x2, #1
bic x0, x0, x3
1:
alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
dc cvac, x0
alternative_else
dc civac, x0
alternative_endif
add x0, x0, x2
cmp x0, x1
b.lo 1b
dsb sy
ret
ENDPROC(__dma_clean_range)
/*
* __dma_flush_range(start, end)
* - start - virtual start address of region
* - end - virtual end address of region
*/
ENTRY(__dma_flush_range)
dcache_line_size x2, x3
sub x3, x2, #1
bic x0, x0, x3
1: dc civac, x0 // clean & invalidate D / U line
add x0, x0, x2
cmp x0, x1
b.lo 1b
dsb sy
ret
ENDPIPROC(__dma_flush_range)
/*
* __dma_map_area(start, size, dir)
* - start - kernel virtual start address
* - size - size of region
* - dir - DMA direction
*/
ENTRY(__dma_map_area)
add x1, x1, x0
cmp w2, #DMA_FROM_DEVICE
b.eq __dma_inv_range
b __dma_clean_range
ENDPIPROC(__dma_map_area)
/*
* __dma_unmap_area(start, size, dir)
* - start - kernel virtual start address
* - size - size of region
* - dir - DMA direction
*/
ENTRY(__dma_unmap_area)
add x1, x1, x0
cmp w2, #DMA_TO_DEVICE
b.ne __dma_inv_range
ret
ENDPIPROC(__dma_unmap_area)