Skip to content
Snippets Groups Projects
Commit e1ba1c99 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'riscv-for-linus-4.15-rc2_cleanups' of...

Merge tag 'riscv-for-linus-4.15-rc2_cleanups' of git://git.kernel.org/pub/scm/linux/kernel/git/palmer/linux

Pull RISC-V cleanups and ABI fixes from Palmer Dabbelt:
 "This contains a handful of small cleanups that are a result of
  feedback that didn't make it into our original patch set, either
  because the feedback hadn't been given yet, I missed the original
  emails, or we weren't ready to submit the changes yet.

  I've been maintaining the various cleanup patch sets I have as their
  own branches, which I then merged together and signed. Each merge
  commit has a short summary of the changes, and each branch is based on
  your latest tag (4.15-rc1, in this case). If this isn't the right way
  to do this then feel free to suggest something else, but it seems sane
  to me.

  Here's a short summary of the changes, roughly in order of how
  interesting they are.

   - libgcc.h has been moved from include/lib, where it's the only
     member, to include/linux. This is meant to avoid tab completion
     conflicts.

   - VDSO entries for clock_get/gettimeofday/getcpu have been added.
     These are simple syscalls now, but we want to let glibc use them
     from the start so we can make them faster later.

   - A VDSO entry for instruction cache flushing has been added so
     userspace can flush the instruction cache.

   - The VDSO symbol versions for __vdso_cmpxchg{32,64} have been
     removed, as those VDSO entries don't actually exist.

   - __io_writes has been corrected to respect the given type.

   - A new READ_ONCE in arch_spin_is_locked().

   - __test_and_op_bit_ord() is now actually ordered.

   - Various small fixes throughout the tree to enable allmodconfig to
     build cleanly.

   - Removal of some dead code in our atomic support headers.

   - Improvements to various comments in our atomic support headers"

* tag 'riscv-for-linus-4.15-rc2_cleanups' of git://git.kernel.org/pub/scm/linux/kernel/git/palmer/linux: (23 commits)
  RISC-V: __io_writes should respect the length argument
  move libgcc.h to include/linux
  RISC-V: Clean up an unused include
  RISC-V: Allow userspace to flush the instruction cache
  RISC-V: Flush I$ when making a dirty page executable
  RISC-V: Add missing include
  RISC-V: Use define for get_cycles like other architectures
  RISC-V: Provide stub of setup_profiling_timer()
  RISC-V: Export some expected symbols for modules
  RISC-V: move empty_zero_page definition to C and export it
  RISC-V: io.h: type fixes for warnings
  RISC-V: use RISCV_{INT,SHORT} instead of {INT,SHORT} for asm macros
  RISC-V: use generic serial.h
  RISC-V: remove spin_unlock_wait()
  RISC-V: `sfence.vma` orderes the instruction cache
  RISC-V: Add READ_ONCE in arch_spin_is_locked()
  RISC-V: __test_and_op_bit_ord should be strongly ordered
  RISC-V: Remove smb_mb__{before,after}_spinlock()
  RISC-V: Remove __smp_bp__{before,after}_atomic
  RISC-V: Comment on why {,cmp}xchg is ordered how it is
  ...
parents 4b1967c9 3b62de26
No related branches found
No related tags found
No related merge requests found
Showing
with 286 additions and 135 deletions
...@@ -40,6 +40,7 @@ generic-y += resource.h ...@@ -40,6 +40,7 @@ generic-y += resource.h
generic-y += scatterlist.h generic-y += scatterlist.h
generic-y += sections.h generic-y += sections.h
generic-y += sembuf.h generic-y += sembuf.h
generic-y += serial.h
generic-y += setup.h generic-y += setup.h
generic-y += shmbuf.h generic-y += shmbuf.h
generic-y += shmparam.h generic-y += shmparam.h
......
...@@ -58,17 +58,17 @@ ...@@ -58,17 +58,17 @@
#endif #endif
#if (__SIZEOF_INT__ == 4) #if (__SIZEOF_INT__ == 4)
#define INT __ASM_STR(.word) #define RISCV_INT __ASM_STR(.word)
#define SZINT __ASM_STR(4) #define RISCV_SZINT __ASM_STR(4)
#define LGINT __ASM_STR(2) #define RISCV_LGINT __ASM_STR(2)
#else #else
#error "Unexpected __SIZEOF_INT__" #error "Unexpected __SIZEOF_INT__"
#endif #endif
#if (__SIZEOF_SHORT__ == 2) #if (__SIZEOF_SHORT__ == 2)
#define SHORT __ASM_STR(.half) #define RISCV_SHORT __ASM_STR(.half)
#define SZSHORT __ASM_STR(2) #define RISCV_SZSHORT __ASM_STR(2)
#define LGSHORT __ASM_STR(1) #define RISCV_LGSHORT __ASM_STR(1)
#else #else
#error "Unexpected __SIZEOF_SHORT__" #error "Unexpected __SIZEOF_SHORT__"
#endif #endif
......
...@@ -50,30 +50,30 @@ static __always_inline void atomic64_set(atomic64_t *v, long i) ...@@ -50,30 +50,30 @@ static __always_inline void atomic64_set(atomic64_t *v, long i)
* have the AQ or RL bits set. These don't return anything, so there's only * have the AQ or RL bits set. These don't return anything, so there's only
* one version to worry about. * one version to worry about.
*/ */
#define ATOMIC_OP(op, asm_op, c_op, I, asm_type, c_type, prefix) \ #define ATOMIC_OP(op, asm_op, I, asm_type, c_type, prefix) \
static __always_inline void atomic##prefix##_##op(c_type i, atomic##prefix##_t *v) \ static __always_inline void atomic##prefix##_##op(c_type i, atomic##prefix##_t *v) \
{ \ { \
__asm__ __volatile__ ( \ __asm__ __volatile__ ( \
"amo" #asm_op "." #asm_type " zero, %1, %0" \ "amo" #asm_op "." #asm_type " zero, %1, %0" \
: "+A" (v->counter) \ : "+A" (v->counter) \
: "r" (I) \ : "r" (I) \
: "memory"); \ : "memory"); \
} }
#ifdef CONFIG_GENERIC_ATOMIC64 #ifdef CONFIG_GENERIC_ATOMIC64
#define ATOMIC_OPS(op, asm_op, c_op, I) \ #define ATOMIC_OPS(op, asm_op, I) \
ATOMIC_OP (op, asm_op, c_op, I, w, int, ) ATOMIC_OP (op, asm_op, I, w, int, )
#else #else
#define ATOMIC_OPS(op, asm_op, c_op, I) \ #define ATOMIC_OPS(op, asm_op, I) \
ATOMIC_OP (op, asm_op, c_op, I, w, int, ) \ ATOMIC_OP (op, asm_op, I, w, int, ) \
ATOMIC_OP (op, asm_op, c_op, I, d, long, 64) ATOMIC_OP (op, asm_op, I, d, long, 64)
#endif #endif
ATOMIC_OPS(add, add, +, i) ATOMIC_OPS(add, add, i)
ATOMIC_OPS(sub, add, +, -i) ATOMIC_OPS(sub, add, -i)
ATOMIC_OPS(and, and, &, i) ATOMIC_OPS(and, and, i)
ATOMIC_OPS( or, or, |, i) ATOMIC_OPS( or, or, i)
ATOMIC_OPS(xor, xor, ^, i) ATOMIC_OPS(xor, xor, i)
#undef ATOMIC_OP #undef ATOMIC_OP
#undef ATOMIC_OPS #undef ATOMIC_OPS
...@@ -83,7 +83,7 @@ ATOMIC_OPS(xor, xor, ^, i) ...@@ -83,7 +83,7 @@ ATOMIC_OPS(xor, xor, ^, i)
* There's two flavors of these: the arithmatic ops have both fetch and return * There's two flavors of these: the arithmatic ops have both fetch and return
* versions, while the logical ops only have fetch versions. * versions, while the logical ops only have fetch versions.
*/ */
#define ATOMIC_FETCH_OP(op, asm_op, c_op, I, asm_or, c_or, asm_type, c_type, prefix) \ #define ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, asm_type, c_type, prefix) \
static __always_inline c_type atomic##prefix##_fetch_##op##c_or(c_type i, atomic##prefix##_t *v) \ static __always_inline c_type atomic##prefix##_fetch_##op##c_or(c_type i, atomic##prefix##_t *v) \
{ \ { \
register c_type ret; \ register c_type ret; \
...@@ -103,13 +103,13 @@ static __always_inline c_type atomic##prefix##_##op##_return##c_or(c_type i, ato ...@@ -103,13 +103,13 @@ static __always_inline c_type atomic##prefix##_##op##_return##c_or(c_type i, ato
#ifdef CONFIG_GENERIC_ATOMIC64 #ifdef CONFIG_GENERIC_ATOMIC64
#define ATOMIC_OPS(op, asm_op, c_op, I, asm_or, c_or) \ #define ATOMIC_OPS(op, asm_op, c_op, I, asm_or, c_or) \
ATOMIC_FETCH_OP (op, asm_op, c_op, I, asm_or, c_or, w, int, ) \ ATOMIC_FETCH_OP (op, asm_op, I, asm_or, c_or, w, int, ) \
ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_or, c_or, w, int, ) ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_or, c_or, w, int, )
#else #else
#define ATOMIC_OPS(op, asm_op, c_op, I, asm_or, c_or) \ #define ATOMIC_OPS(op, asm_op, c_op, I, asm_or, c_or) \
ATOMIC_FETCH_OP (op, asm_op, c_op, I, asm_or, c_or, w, int, ) \ ATOMIC_FETCH_OP (op, asm_op, I, asm_or, c_or, w, int, ) \
ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_or, c_or, w, int, ) \ ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_or, c_or, w, int, ) \
ATOMIC_FETCH_OP (op, asm_op, c_op, I, asm_or, c_or, d, long, 64) \ ATOMIC_FETCH_OP (op, asm_op, I, asm_or, c_or, d, long, 64) \
ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_or, c_or, d, long, 64) ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_or, c_or, d, long, 64)
#endif #endif
...@@ -126,28 +126,28 @@ ATOMIC_OPS(sub, add, +, -i, .aqrl, ) ...@@ -126,28 +126,28 @@ ATOMIC_OPS(sub, add, +, -i, .aqrl, )
#undef ATOMIC_OPS #undef ATOMIC_OPS
#ifdef CONFIG_GENERIC_ATOMIC64 #ifdef CONFIG_GENERIC_ATOMIC64
#define ATOMIC_OPS(op, asm_op, c_op, I, asm_or, c_or) \ #define ATOMIC_OPS(op, asm_op, I, asm_or, c_or) \
ATOMIC_FETCH_OP(op, asm_op, c_op, I, asm_or, c_or, w, int, ) ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, w, int, )
#else #else
#define ATOMIC_OPS(op, asm_op, c_op, I, asm_or, c_or) \ #define ATOMIC_OPS(op, asm_op, I, asm_or, c_or) \
ATOMIC_FETCH_OP(op, asm_op, c_op, I, asm_or, c_or, w, int, ) \ ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, w, int, ) \
ATOMIC_FETCH_OP(op, asm_op, c_op, I, asm_or, c_or, d, long, 64) ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, d, long, 64)
#endif #endif
ATOMIC_OPS(and, and, &, i, , _relaxed) ATOMIC_OPS(and, and, i, , _relaxed)
ATOMIC_OPS(and, and, &, i, .aq , _acquire) ATOMIC_OPS(and, and, i, .aq , _acquire)
ATOMIC_OPS(and, and, &, i, .rl , _release) ATOMIC_OPS(and, and, i, .rl , _release)
ATOMIC_OPS(and, and, &, i, .aqrl, ) ATOMIC_OPS(and, and, i, .aqrl, )
ATOMIC_OPS( or, or, |, i, , _relaxed) ATOMIC_OPS( or, or, i, , _relaxed)
ATOMIC_OPS( or, or, |, i, .aq , _acquire) ATOMIC_OPS( or, or, i, .aq , _acquire)
ATOMIC_OPS( or, or, |, i, .rl , _release) ATOMIC_OPS( or, or, i, .rl , _release)
ATOMIC_OPS( or, or, |, i, .aqrl, ) ATOMIC_OPS( or, or, i, .aqrl, )
ATOMIC_OPS(xor, xor, ^, i, , _relaxed) ATOMIC_OPS(xor, xor, i, , _relaxed)
ATOMIC_OPS(xor, xor, ^, i, .aq , _acquire) ATOMIC_OPS(xor, xor, i, .aq , _acquire)
ATOMIC_OPS(xor, xor, ^, i, .rl , _release) ATOMIC_OPS(xor, xor, i, .rl , _release)
ATOMIC_OPS(xor, xor, ^, i, .aqrl, ) ATOMIC_OPS(xor, xor, i, .aqrl, )
#undef ATOMIC_OPS #undef ATOMIC_OPS
...@@ -182,13 +182,13 @@ ATOMIC_OPS(add_negative, add, <, 0) ...@@ -182,13 +182,13 @@ ATOMIC_OPS(add_negative, add, <, 0)
#undef ATOMIC_OP #undef ATOMIC_OP
#undef ATOMIC_OPS #undef ATOMIC_OPS
#define ATOMIC_OP(op, func_op, c_op, I, c_type, prefix) \ #define ATOMIC_OP(op, func_op, I, c_type, prefix) \
static __always_inline void atomic##prefix##_##op(atomic##prefix##_t *v) \ static __always_inline void atomic##prefix##_##op(atomic##prefix##_t *v) \
{ \ { \
atomic##prefix##_##func_op(I, v); \ atomic##prefix##_##func_op(I, v); \
} }
#define ATOMIC_FETCH_OP(op, func_op, c_op, I, c_type, prefix) \ #define ATOMIC_FETCH_OP(op, func_op, I, c_type, prefix) \
static __always_inline c_type atomic##prefix##_fetch_##op(atomic##prefix##_t *v) \ static __always_inline c_type atomic##prefix##_fetch_##op(atomic##prefix##_t *v) \
{ \ { \
return atomic##prefix##_fetch_##func_op(I, v); \ return atomic##prefix##_fetch_##func_op(I, v); \
...@@ -202,16 +202,16 @@ static __always_inline c_type atomic##prefix##_##op##_return(atomic##prefix##_t ...@@ -202,16 +202,16 @@ static __always_inline c_type atomic##prefix##_##op##_return(atomic##prefix##_t
#ifdef CONFIG_GENERIC_ATOMIC64 #ifdef CONFIG_GENERIC_ATOMIC64
#define ATOMIC_OPS(op, asm_op, c_op, I) \ #define ATOMIC_OPS(op, asm_op, c_op, I) \
ATOMIC_OP (op, asm_op, c_op, I, int, ) \ ATOMIC_OP (op, asm_op, I, int, ) \
ATOMIC_FETCH_OP (op, asm_op, c_op, I, int, ) \ ATOMIC_FETCH_OP (op, asm_op, I, int, ) \
ATOMIC_OP_RETURN(op, asm_op, c_op, I, int, ) ATOMIC_OP_RETURN(op, asm_op, c_op, I, int, )
#else #else
#define ATOMIC_OPS(op, asm_op, c_op, I) \ #define ATOMIC_OPS(op, asm_op, c_op, I) \
ATOMIC_OP (op, asm_op, c_op, I, int, ) \ ATOMIC_OP (op, asm_op, I, int, ) \
ATOMIC_FETCH_OP (op, asm_op, c_op, I, int, ) \ ATOMIC_FETCH_OP (op, asm_op, I, int, ) \
ATOMIC_OP_RETURN(op, asm_op, c_op, I, int, ) \ ATOMIC_OP_RETURN(op, asm_op, c_op, I, int, ) \
ATOMIC_OP (op, asm_op, c_op, I, long, 64) \ ATOMIC_OP (op, asm_op, I, long, 64) \
ATOMIC_FETCH_OP (op, asm_op, c_op, I, long, 64) \ ATOMIC_FETCH_OP (op, asm_op, I, long, 64) \
ATOMIC_OP_RETURN(op, asm_op, c_op, I, long, 64) ATOMIC_OP_RETURN(op, asm_op, c_op, I, long, 64)
#endif #endif
...@@ -300,8 +300,13 @@ static __always_inline long atomic64_inc_not_zero(atomic64_t *v) ...@@ -300,8 +300,13 @@ static __always_inline long atomic64_inc_not_zero(atomic64_t *v)
/* /*
* atomic_{cmp,}xchg is required to have exactly the same ordering semantics as * atomic_{cmp,}xchg is required to have exactly the same ordering semantics as
* {cmp,}xchg and the operations that return, so they need a barrier. We just * {cmp,}xchg and the operations that return, so they need a barrier.
* use the other implementations directly. */
/*
* FIXME: atomic_cmpxchg_{acquire,release,relaxed} are all implemented by
* assigning the same barrier to both the LR and SC operations, but that might
* not make any sense. We're waiting on a memory model specification to
* determine exactly what the right thing to do is here.
*/ */
#define ATOMIC_OP(c_t, prefix, c_or, size, asm_or) \ #define ATOMIC_OP(c_t, prefix, c_or, size, asm_or) \
static __always_inline c_t atomic##prefix##_cmpxchg##c_or(atomic##prefix##_t *v, c_t o, c_t n) \ static __always_inline c_t atomic##prefix##_cmpxchg##c_or(atomic##prefix##_t *v, c_t o, c_t n) \
......
...@@ -38,29 +38,6 @@ ...@@ -38,29 +38,6 @@
#define smp_rmb() RISCV_FENCE(r,r) #define smp_rmb() RISCV_FENCE(r,r)
#define smp_wmb() RISCV_FENCE(w,w) #define smp_wmb() RISCV_FENCE(w,w)
/*
* These fences exist to enforce ordering around the relaxed AMOs. The
* documentation defines that
* "
* atomic_fetch_add();
* is equivalent to:
* smp_mb__before_atomic();
* atomic_fetch_add_relaxed();
* smp_mb__after_atomic();
* "
* So we emit full fences on both sides.
*/
#define __smb_mb__before_atomic() smp_mb()
#define __smb_mb__after_atomic() smp_mb()
/*
* These barriers prevent accesses performed outside a spinlock from being moved
* inside a spinlock. Since RISC-V sets the aq/rl bits on our spinlock only
* enforce release consistency, we need full fences here.
*/
#define smb_mb__before_spinlock() smp_mb()
#define smb_mb__after_spinlock() smp_mb()
#include <asm-generic/barrier.h> #include <asm-generic/barrier.h>
#endif /* __ASSEMBLY__ */ #endif /* __ASSEMBLY__ */
......
...@@ -67,7 +67,7 @@ ...@@ -67,7 +67,7 @@
: "memory"); : "memory");
#define __test_and_op_bit(op, mod, nr, addr) \ #define __test_and_op_bit(op, mod, nr, addr) \
__test_and_op_bit_ord(op, mod, nr, addr, ) __test_and_op_bit_ord(op, mod, nr, addr, .aqrl)
#define __op_bit(op, mod, nr, addr) \ #define __op_bit(op, mod, nr, addr) \
__op_bit_ord(op, mod, nr, addr, ) __op_bit_ord(op, mod, nr, addr, )
......
...@@ -27,8 +27,8 @@ ...@@ -27,8 +27,8 @@
typedef u32 bug_insn_t; typedef u32 bug_insn_t;
#ifdef CONFIG_GENERIC_BUG_RELATIVE_POINTERS #ifdef CONFIG_GENERIC_BUG_RELATIVE_POINTERS
#define __BUG_ENTRY_ADDR INT " 1b - 2b" #define __BUG_ENTRY_ADDR RISCV_INT " 1b - 2b"
#define __BUG_ENTRY_FILE INT " %0 - 2b" #define __BUG_ENTRY_FILE RISCV_INT " %0 - 2b"
#else #else
#define __BUG_ENTRY_ADDR RISCV_PTR " 1b" #define __BUG_ENTRY_ADDR RISCV_PTR " 1b"
#define __BUG_ENTRY_FILE RISCV_PTR " %0" #define __BUG_ENTRY_FILE RISCV_PTR " %0"
...@@ -38,7 +38,7 @@ typedef u32 bug_insn_t; ...@@ -38,7 +38,7 @@ typedef u32 bug_insn_t;
#define __BUG_ENTRY \ #define __BUG_ENTRY \
__BUG_ENTRY_ADDR "\n\t" \ __BUG_ENTRY_ADDR "\n\t" \
__BUG_ENTRY_FILE "\n\t" \ __BUG_ENTRY_FILE "\n\t" \
SHORT " %1" RISCV_SHORT " %1"
#else #else
#define __BUG_ENTRY \ #define __BUG_ENTRY \
__BUG_ENTRY_ADDR __BUG_ENTRY_ADDR
......
...@@ -18,22 +18,44 @@ ...@@ -18,22 +18,44 @@
#undef flush_icache_range #undef flush_icache_range
#undef flush_icache_user_range #undef flush_icache_user_range
#undef flush_dcache_page
static inline void local_flush_icache_all(void) static inline void local_flush_icache_all(void)
{ {
asm volatile ("fence.i" ::: "memory"); asm volatile ("fence.i" ::: "memory");
} }
#define PG_dcache_clean PG_arch_1
static inline void flush_dcache_page(struct page *page)
{
if (test_bit(PG_dcache_clean, &page->flags))
clear_bit(PG_dcache_clean, &page->flags);
}
/*
* RISC-V doesn't have an instruction to flush parts of the instruction cache,
* so instead we just flush the whole thing.
*/
#define flush_icache_range(start, end) flush_icache_all()
#define flush_icache_user_range(vma, pg, addr, len) flush_icache_all()
#ifndef CONFIG_SMP #ifndef CONFIG_SMP
#define flush_icache_range(start, end) local_flush_icache_all() #define flush_icache_all() local_flush_icache_all()
#define flush_icache_user_range(vma, pg, addr, len) local_flush_icache_all() #define flush_icache_mm(mm, local) flush_icache_all()
#else /* CONFIG_SMP */ #else /* CONFIG_SMP */
#define flush_icache_range(start, end) sbi_remote_fence_i(0) #define flush_icache_all() sbi_remote_fence_i(0)
#define flush_icache_user_range(vma, pg, addr, len) sbi_remote_fence_i(0) void flush_icache_mm(struct mm_struct *mm, bool local);
#endif /* CONFIG_SMP */ #endif /* CONFIG_SMP */
/*
* Bits in sys_riscv_flush_icache()'s flags argument.
*/
#define SYS_RISCV_FLUSH_ICACHE_LOCAL 1UL
#define SYS_RISCV_FLUSH_ICACHE_ALL (SYS_RISCV_FLUSH_ICACHE_LOCAL)
#endif /* _ASM_RISCV_CACHEFLUSH_H */ #endif /* _ASM_RISCV_CACHEFLUSH_H */
...@@ -19,6 +19,8 @@ ...@@ -19,6 +19,8 @@
#ifndef _ASM_RISCV_IO_H #ifndef _ASM_RISCV_IO_H
#define _ASM_RISCV_IO_H #define _ASM_RISCV_IO_H
#include <linux/types.h>
#ifdef CONFIG_MMU #ifdef CONFIG_MMU
extern void __iomem *ioremap(phys_addr_t offset, unsigned long size); extern void __iomem *ioremap(phys_addr_t offset, unsigned long size);
...@@ -32,7 +34,7 @@ extern void __iomem *ioremap(phys_addr_t offset, unsigned long size); ...@@ -32,7 +34,7 @@ extern void __iomem *ioremap(phys_addr_t offset, unsigned long size);
#define ioremap_wc(addr, size) ioremap((addr), (size)) #define ioremap_wc(addr, size) ioremap((addr), (size))
#define ioremap_wt(addr, size) ioremap((addr), (size)) #define ioremap_wt(addr, size) ioremap((addr), (size))
extern void iounmap(void __iomem *addr); extern void iounmap(volatile void __iomem *addr);
#endif /* CONFIG_MMU */ #endif /* CONFIG_MMU */
...@@ -250,7 +252,7 @@ static inline u64 __raw_readq(const volatile void __iomem *addr) ...@@ -250,7 +252,7 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
const ctype *buf = buffer; \ const ctype *buf = buffer; \
\ \
do { \ do { \
__raw_writeq(*buf++, addr); \ __raw_write ## len(*buf++, addr); \
} while (--count); \ } while (--count); \
} \ } \
afence; \ afence; \
...@@ -266,9 +268,9 @@ __io_reads_ins(reads, u32, l, __io_br(), __io_ar()) ...@@ -266,9 +268,9 @@ __io_reads_ins(reads, u32, l, __io_br(), __io_ar())
__io_reads_ins(ins, u8, b, __io_pbr(), __io_par()) __io_reads_ins(ins, u8, b, __io_pbr(), __io_par())
__io_reads_ins(ins, u16, w, __io_pbr(), __io_par()) __io_reads_ins(ins, u16, w, __io_pbr(), __io_par())
__io_reads_ins(ins, u32, l, __io_pbr(), __io_par()) __io_reads_ins(ins, u32, l, __io_pbr(), __io_par())
#define insb(addr, buffer, count) __insb((void __iomem *)addr, buffer, count) #define insb(addr, buffer, count) __insb((void __iomem *)(long)addr, buffer, count)
#define insw(addr, buffer, count) __insw((void __iomem *)addr, buffer, count) #define insw(addr, buffer, count) __insw((void __iomem *)(long)addr, buffer, count)
#define insl(addr, buffer, count) __insl((void __iomem *)addr, buffer, count) #define insl(addr, buffer, count) __insl((void __iomem *)(long)addr, buffer, count)
__io_writes_outs(writes, u8, b, __io_bw(), __io_aw()) __io_writes_outs(writes, u8, b, __io_bw(), __io_aw())
__io_writes_outs(writes, u16, w, __io_bw(), __io_aw()) __io_writes_outs(writes, u16, w, __io_bw(), __io_aw())
...@@ -280,9 +282,9 @@ __io_writes_outs(writes, u32, l, __io_bw(), __io_aw()) ...@@ -280,9 +282,9 @@ __io_writes_outs(writes, u32, l, __io_bw(), __io_aw())
__io_writes_outs(outs, u8, b, __io_pbw(), __io_paw()) __io_writes_outs(outs, u8, b, __io_pbw(), __io_paw())
__io_writes_outs(outs, u16, w, __io_pbw(), __io_paw()) __io_writes_outs(outs, u16, w, __io_pbw(), __io_paw())
__io_writes_outs(outs, u32, l, __io_pbw(), __io_paw()) __io_writes_outs(outs, u32, l, __io_pbw(), __io_paw())
#define outsb(addr, buffer, count) __outsb((void __iomem *)addr, buffer, count) #define outsb(addr, buffer, count) __outsb((void __iomem *)(long)addr, buffer, count)
#define outsw(addr, buffer, count) __outsw((void __iomem *)addr, buffer, count) #define outsw(addr, buffer, count) __outsw((void __iomem *)(long)addr, buffer, count)
#define outsl(addr, buffer, count) __outsl((void __iomem *)addr, buffer, count) #define outsl(addr, buffer, count) __outsl((void __iomem *)(long)addr, buffer, count)
#ifdef CONFIG_64BIT #ifdef CONFIG_64BIT
__io_reads_ins(reads, u64, q, __io_br(), __io_ar()) __io_reads_ins(reads, u64, q, __io_br(), __io_ar())
......
...@@ -19,6 +19,10 @@ ...@@ -19,6 +19,10 @@
typedef struct { typedef struct {
void *vdso; void *vdso;
#ifdef CONFIG_SMP
/* A local icache flush is needed before user execution can resume. */
cpumask_t icache_stale_mask;
#endif
} mm_context_t; } mm_context_t;
#endif /* __ASSEMBLY__ */ #endif /* __ASSEMBLY__ */
......
/* /*
* Copyright (C) 2012 Regents of the University of California * Copyright (C) 2012 Regents of the University of California
* Copyright (C) 2017 SiFive
* *
* This program is free software; you can redistribute it and/or * This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License * modify it under the terms of the GNU General Public License
...@@ -14,11 +15,13 @@ ...@@ -14,11 +15,13 @@
#ifndef _ASM_RISCV_MMU_CONTEXT_H #ifndef _ASM_RISCV_MMU_CONTEXT_H
#define _ASM_RISCV_MMU_CONTEXT_H #define _ASM_RISCV_MMU_CONTEXT_H
#include <linux/mm_types.h>
#include <asm-generic/mm_hooks.h> #include <asm-generic/mm_hooks.h>
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/sched.h> #include <linux/sched.h>
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
#include <asm/cacheflush.h>
static inline void enter_lazy_tlb(struct mm_struct *mm, static inline void enter_lazy_tlb(struct mm_struct *mm,
struct task_struct *task) struct task_struct *task)
...@@ -46,12 +49,54 @@ static inline void set_pgdir(pgd_t *pgd) ...@@ -46,12 +49,54 @@ static inline void set_pgdir(pgd_t *pgd)
csr_write(sptbr, virt_to_pfn(pgd) | SPTBR_MODE); csr_write(sptbr, virt_to_pfn(pgd) | SPTBR_MODE);
} }
/*
* When necessary, performs a deferred icache flush for the given MM context,
* on the local CPU. RISC-V has no direct mechanism for instruction cache
* shoot downs, so instead we send an IPI that informs the remote harts they
* need to flush their local instruction caches. To avoid pathologically slow
* behavior in a common case (a bunch of single-hart processes on a many-hart
* machine, ie 'make -j') we avoid the IPIs for harts that are not currently
* executing a MM context and instead schedule a deferred local instruction
* cache flush to be performed before execution resumes on each hart. This
* actually performs that local instruction cache flush, which implicitly only
* refers to the current hart.
*/
static inline void flush_icache_deferred(struct mm_struct *mm)
{
#ifdef CONFIG_SMP
unsigned int cpu = smp_processor_id();
cpumask_t *mask = &mm->context.icache_stale_mask;
if (cpumask_test_cpu(cpu, mask)) {
cpumask_clear_cpu(cpu, mask);
/*
* Ensure the remote hart's writes are visible to this hart.
* This pairs with a barrier in flush_icache_mm.
*/
smp_mb();
local_flush_icache_all();
}
#endif
}
static inline void switch_mm(struct mm_struct *prev, static inline void switch_mm(struct mm_struct *prev,
struct mm_struct *next, struct task_struct *task) struct mm_struct *next, struct task_struct *task)
{ {
if (likely(prev != next)) { if (likely(prev != next)) {
/*
* Mark the current MM context as inactive, and the next as
* active. This is at least used by the icache flushing
* routines in order to determine who should
*/
unsigned int cpu = smp_processor_id();
cpumask_clear_cpu(cpu, mm_cpumask(prev));
cpumask_set_cpu(cpu, mm_cpumask(next));
set_pgdir(next->pgd); set_pgdir(next->pgd);
local_flush_tlb_all(); local_flush_tlb_all();
flush_icache_deferred(next);
} }
} }
......
...@@ -178,28 +178,6 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long addr) ...@@ -178,28 +178,6 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long addr)
#define pte_offset_map(dir, addr) pte_offset_kernel((dir), (addr)) #define pte_offset_map(dir, addr) pte_offset_kernel((dir), (addr))
#define pte_unmap(pte) ((void)(pte)) #define pte_unmap(pte) ((void)(pte))
/*
* Certain architectures need to do special things when PTEs within
* a page table are directly modified. Thus, the following hook is
* made available.
*/
static inline void set_pte(pte_t *ptep, pte_t pteval)
{
*ptep = pteval;
}
static inline void set_pte_at(struct mm_struct *mm,
unsigned long addr, pte_t *ptep, pte_t pteval)
{
set_pte(ptep, pteval);
}
static inline void pte_clear(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
{
set_pte_at(mm, addr, ptep, __pte(0));
}
static inline int pte_present(pte_t pte) static inline int pte_present(pte_t pte)
{ {
return (pte_val(pte) & _PAGE_PRESENT); return (pte_val(pte) & _PAGE_PRESENT);
...@@ -210,21 +188,22 @@ static inline int pte_none(pte_t pte) ...@@ -210,21 +188,22 @@ static inline int pte_none(pte_t pte)
return (pte_val(pte) == 0); return (pte_val(pte) == 0);
} }
/* static inline int pte_read(pte_t pte) */
static inline int pte_write(pte_t pte) static inline int pte_write(pte_t pte)
{ {
return pte_val(pte) & _PAGE_WRITE; return pte_val(pte) & _PAGE_WRITE;
} }
static inline int pte_exec(pte_t pte)
{
return pte_val(pte) & _PAGE_EXEC;
}
static inline int pte_huge(pte_t pte) static inline int pte_huge(pte_t pte)
{ {
return pte_present(pte) return pte_present(pte)
&& (pte_val(pte) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)); && (pte_val(pte) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC));
} }
/* static inline int pte_exec(pte_t pte) */
static inline int pte_dirty(pte_t pte) static inline int pte_dirty(pte_t pte)
{ {
return pte_val(pte) & _PAGE_DIRTY; return pte_val(pte) & _PAGE_DIRTY;
...@@ -311,6 +290,33 @@ static inline int pte_same(pte_t pte_a, pte_t pte_b) ...@@ -311,6 +290,33 @@ static inline int pte_same(pte_t pte_a, pte_t pte_b)
return pte_val(pte_a) == pte_val(pte_b); return pte_val(pte_a) == pte_val(pte_b);
} }
/*
* Certain architectures need to do special things when PTEs within
* a page table are directly modified. Thus, the following hook is
* made available.
*/
static inline void set_pte(pte_t *ptep, pte_t pteval)
{
*ptep = pteval;
}
void flush_icache_pte(pte_t pte);
static inline void set_pte_at(struct mm_struct *mm,
unsigned long addr, pte_t *ptep, pte_t pteval)
{
if (pte_present(pteval) && pte_exec(pteval))
flush_icache_pte(pteval);
set_pte(ptep, pteval);
}
static inline void pte_clear(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
{
set_pte_at(mm, addr, ptep, __pte(0));
}
#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
static inline int ptep_set_access_flags(struct vm_area_struct *vma, static inline int ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long address, pte_t *ptep, unsigned long address, pte_t *ptep,
......
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
/* FIXME: Replace this with a ticket lock, like MIPS. */ /* FIXME: Replace this with a ticket lock, like MIPS. */
#define arch_spin_is_locked(x) ((x)->lock != 0) #define arch_spin_is_locked(x) (READ_ONCE((x)->lock) != 0)
static inline void arch_spin_unlock(arch_spinlock_t *lock) static inline void arch_spin_unlock(arch_spinlock_t *lock)
{ {
...@@ -58,15 +58,6 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) ...@@ -58,15 +58,6 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
} }
} }
static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
{
smp_rmb();
do {
cpu_relax();
} while (arch_spin_is_locked(lock));
smp_acquire__after_ctrl_dep();
}
/***********************************************************/ /***********************************************************/
static inline void arch_read_lock(arch_rwlock_t *lock) static inline void arch_read_lock(arch_rwlock_t *lock)
......
...@@ -18,7 +18,7 @@ ...@@ -18,7 +18,7 @@
typedef unsigned long cycles_t; typedef unsigned long cycles_t;
static inline cycles_t get_cycles(void) static inline cycles_t get_cycles_inline(void)
{ {
cycles_t n; cycles_t n;
...@@ -27,6 +27,7 @@ static inline cycles_t get_cycles(void) ...@@ -27,6 +27,7 @@ static inline cycles_t get_cycles(void)
: "=r" (n)); : "=r" (n));
return n; return n;
} }
#define get_cycles get_cycles_inline
#ifdef CONFIG_64BIT #ifdef CONFIG_64BIT
static inline uint64_t get_cycles64(void) static inline uint64_t get_cycles64(void)
......
...@@ -17,7 +17,12 @@ ...@@ -17,7 +17,12 @@
#ifdef CONFIG_MMU #ifdef CONFIG_MMU
/* Flush entire local TLB */ #include <linux/mm_types.h>
/*
* Flush entire local TLB. 'sfence.vma' implicitly fences with the instruction
* cache as well, so a 'fence.i' is not necessary.
*/
static inline void local_flush_tlb_all(void) static inline void local_flush_tlb_all(void)
{ {
__asm__ __volatile__ ("sfence.vma" : : : "memory"); __asm__ __volatile__ ("sfence.vma" : : : "memory");
......
/*
* Copyright (C) 2017 SiFive
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _ASM_RISCV_VDSO_SYSCALLS_H
#define _ASM_RISCV_VDSO_SYSCALLS_H
#ifdef CONFIG_SMP
/* These syscalls are only used by the vDSO and are not in the uapi. */
#define __NR_riscv_flush_icache (__NR_arch_specific_syscall + 15)
__SYSCALL(__NR_riscv_flush_icache, sys_riscv_flush_icache)
#endif
#endif /* _ASM_RISCV_VDSO_H */
...@@ -38,4 +38,8 @@ struct vdso_data { ...@@ -38,4 +38,8 @@ struct vdso_data {
(void __user *)((unsigned long)(base) + __vdso_##name); \ (void __user *)((unsigned long)(base) + __vdso_##name); \
}) })
#ifdef CONFIG_SMP
asmlinkage long sys_riscv_flush_icache(uintptr_t, uintptr_t, uintptr_t);
#endif
#endif /* _ASM_RISCV_VDSO_H */ #endif /* _ASM_RISCV_VDSO_H */
...@@ -152,6 +152,3 @@ END(_start) ...@@ -152,6 +152,3 @@ END(_start)
__PAGE_ALIGNED_BSS __PAGE_ALIGNED_BSS
/* Empty zero page */ /* Empty zero page */
.balign PAGE_SIZE .balign PAGE_SIZE
ENTRY(empty_zero_page)
.fill (empty_zero_page + PAGE_SIZE) - ., 1, 0x00
END(empty_zero_page)
...@@ -12,4 +12,7 @@ ...@@ -12,4 +12,7 @@
/* /*
* Assembly functions that may be used (directly or indirectly) by modules * Assembly functions that may be used (directly or indirectly) by modules
*/ */
EXPORT_SYMBOL(__clear_user);
EXPORT_SYMBOL(__copy_user); EXPORT_SYMBOL(__copy_user);
EXPORT_SYMBOL(memset);
EXPORT_SYMBOL(memcpy);
...@@ -58,7 +58,12 @@ static char __initdata builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE; ...@@ -58,7 +58,12 @@ static char __initdata builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE;
#endif /* CONFIG_CMDLINE_BOOL */ #endif /* CONFIG_CMDLINE_BOOL */
unsigned long va_pa_offset; unsigned long va_pa_offset;
EXPORT_SYMBOL(va_pa_offset);
unsigned long pfn_base; unsigned long pfn_base;
EXPORT_SYMBOL(pfn_base);
unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss;
EXPORT_SYMBOL(empty_zero_page);
/* The lucky hart to first increment this variable will boot the other cores */ /* The lucky hart to first increment this variable will boot the other cores */
atomic_t hart_lottery; atomic_t hart_lottery;
......
...@@ -38,6 +38,13 @@ enum ipi_message_type { ...@@ -38,6 +38,13 @@ enum ipi_message_type {
IPI_MAX IPI_MAX
}; };
/* Unsupported */
int setup_profiling_timer(unsigned int multiplier)
{
return -EINVAL;
}
irqreturn_t handle_ipi(void) irqreturn_t handle_ipi(void)
{ {
unsigned long *pending_ipis = &ipi_data[smp_processor_id()].bits; unsigned long *pending_ipis = &ipi_data[smp_processor_id()].bits;
...@@ -108,3 +115,51 @@ void smp_send_reschedule(int cpu) ...@@ -108,3 +115,51 @@ void smp_send_reschedule(int cpu)
{ {
send_ipi_message(cpumask_of(cpu), IPI_RESCHEDULE); send_ipi_message(cpumask_of(cpu), IPI_RESCHEDULE);
} }
/*
* Performs an icache flush for the given MM context. RISC-V has no direct
* mechanism for instruction cache shoot downs, so instead we send an IPI that
* informs the remote harts they need to flush their local instruction caches.
* To avoid pathologically slow behavior in a common case (a bunch of
* single-hart processes on a many-hart machine, ie 'make -j') we avoid the
* IPIs for harts that are not currently executing a MM context and instead
* schedule a deferred local instruction cache flush to be performed before
* execution resumes on each hart.
*/
void flush_icache_mm(struct mm_struct *mm, bool local)
{
unsigned int cpu;
cpumask_t others, *mask;
preempt_disable();
/* Mark every hart's icache as needing a flush for this MM. */
mask = &mm->context.icache_stale_mask;
cpumask_setall(mask);
/* Flush this hart's I$ now, and mark it as flushed. */
cpu = smp_processor_id();
cpumask_clear_cpu(cpu, mask);
local_flush_icache_all();
/*
* Flush the I$ of other harts concurrently executing, and mark them as
* flushed.
*/
cpumask_andnot(&others, mm_cpumask(mm), cpumask_of(cpu));
local |= cpumask_empty(&others);
if (mm != current->active_mm || !local)
sbi_remote_fence_i(others.bits);
else {
/*
* It's assumed that at least one strongly ordered operation is
* performed on this hart between setting a hart's cpumask bit
* and scheduling this MM context on that hart. Sending an SBI
* remote message will do this, but in the case where no
* messages are sent we still need to order this hart's writes
* with flush_icache_deferred().
*/
smp_mb();
}
preempt_enable();
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment