Linux 2.6/mipsのSMP実装について#2
ロック機構のSMP対応がどうなっているかについて確認。
/* * atomic_add - add integer to atomic variable * @i: integer value to add * @v: pointer of type atomic_t * * Atomically adds @i to @v. */ static __inline__ void atomic_add(int i, atomic_t * v) { if (cpu_has_llsc && R10000_LLSC_WAR) { unsigned long temp; __asm__ __volatile__( " .set mips3 \n" "1: ll %0, %1 # atomic_add \n" " addu %0, %2 \n" " sc %0, %1 \n" " beqzl %0, 1b \n" " .set mips0 \n" : "=&r" (temp), "=m" (v->counter) : "Ir" (i), "m" (v->counter)); } else if (cpu_has_llsc) { unsigned long temp; __asm__ __volatile__( " .set mips3 \n" "1: ll %0, %1 # atomic_add \n" " addu %0, %2 \n" " sc %0, %1 \n" " beqz %0, 2f \n" " .subsection 2 \n" "2: b 1b \n" " .previous \n" " .set mips0 \n" : "=&r" (temp), "=m" (v->counter) : "Ir" (i), "m" (v->counter)); } else { unsigned long flags; raw_local_irq_save(flags); v->counter += i; raw_local_irq_restore(flags); } }
なんかアセンブリで書いた方が読みやすかったんじゃ?というのは置いといて。
特に何にも対策していないように見える。
/* * Same as above, but return the result value */ static __inline__ int atomic_add_return(int i, atomic_t * v) { unsigned long result; smp_mb(); if (cpu_has_llsc && R10000_LLSC_WAR) { unsigned long temp; __asm__ __volatile__( " .set mips3 \n" "1: ll %1, %2 # atomic_add_return \n" " addu %0, %1, %3 \n" " sc %0, %2 \n" " beqzl %0, 1b \n" " addu %0, %1, %3 \n" " .set mips0 \n" : "=&r" (result), "=&r" (temp), "=m" (v->counter) : "Ir" (i), "m" (v->counter) : "memory"); } else if (cpu_has_llsc) { unsigned long temp; __asm__ __volatile__( " .set mips3 \n" "1: ll %1, %2 # atomic_add_return \n" " addu %0, %1, %3 \n" " sc %0, %2 \n" " beqz %0, 2f \n" " addu %0, %1, %3 \n" " .subsection 2 \n" "2: b 1b \n" " .previous \n" " .set mips0 \n" : "=&r" (result), "=&r" (temp), "=m" (v->counter) : "Ir" (i), "m" (v->counter) : "memory"); } else { unsigned long flags; raw_local_irq_save(flags); result = v->counter; result += i; v->counter = result; raw_local_irq_restore(flags); } smp_mb(); return result; }
ところが、関数名がatomic_add_return()になると何故かsmp_mb()を呼ぶようになる。
#if defined(CONFIG_WEAK_ORDERING) && defined(CONFIG_SMP) #define __WEAK_ORDERING_MB " sync \n" #else #define __WEAK_ORDERING_MB " \n" #endif #define smp_mb() __asm__ __volatile__(__WEAK_ORDERING_MB : : :"memory") #define smp_rmb() __asm__ __volatile__(__WEAK_ORDERING_MB : : :"memory") #define smp_wmb() __asm__ __volatile__(__WEAK_ORDERING_MB : : :"memory") #define set_mb(var, value) \ do { var = value; smp_mb(); } while (0)
これもsyncって書けば良いような気がしなくもないんだが、ともかくsyncを実行するマクロらしい。
仮想メモリまわりについても確認。違った、キャッシュまわりだった。
arch/mips/mm/c-sb1.cをまずはみてみる。
static inline void sb1_on_each_cpu(void (*func) (void *info), void *info, int retry, int wait) { preempt_disable(); smp_call_function(func, info, retry, wait); func(info); preempt_enable(); }
/* * Run a function on all other CPUs. * <func> The function to run. This must be fast and non-blocking. * <info> An arbitrary pointer to pass to the function. * <retry> If true, keep retrying until ready. * <wait> If true, wait until function has completed on other CPUs. * [RETURNS] 0 on success, else a negative status code. * * Does not return until remote CPUs are nearly ready to execute <func> * or are or have executed. * * You must not call this function with disabled interrupts or from a * hardware interrupt handler or from a bottom half handler: * * CPU A CPU B * Disable interrupts * smp_call_function() * Take call_lock * Send IPIs * Wait for all cpus to acknowledge IPI * CPU A has not responded, spin waiting * for cpu A to respond, holding call_lock * smp_call_function() * Spin waiting for call_lock * Deadlock Deadlock */ int smp_call_function (void (*func) (void *info), void *info, int retry, int wait) { struct call_data_struct data; int i, cpus = num_online_cpus() - 1; int cpu = smp_processor_id(); /* * Can die spectacularly if this CPU isn't yet marked online */ BUG_ON(!cpu_online(cpu)); if (!cpus) return 0; /* Can deadlock when called with interrupts disabled */ WARN_ON(irqs_disabled()); data.func = func; data.info = info; atomic_set(&data.started, 0); data.wait = wait; if (wait) atomic_set(&data.finished, 0); spin_lock(&smp_call_lock); call_data = &data; smp_mb(); /* Send a message to all other CPUs and wait for them to respond */ for_each_online_cpu(i) if (i != cpu) core_send_ipi(i, SMP_CALL_FUNCTION); /* Wait for response */ /* FIXME: lock-up detection, backtrace on lock-up */ while (atomic_read(&data.started) != cpus) barrier(); if (wait) while (atomic_read(&data.finished) != cpus) barrier(); call_data = NULL; spin_unlock(&smp_call_lock); return 0; }
どうも、コア間通信用の割り込みを起こして言う事を聞いて貰う(或いはCPUを乗っ取るとも言えるか?)為の関数っぽい。
/* * These are routines for dealing with the sb1250 smp capabilities * independent of board/firmware */ /* * Simple enough; everything is set up, so just poke the appropriate mailbox * register, and we should be set */ void core_send_ipi(int cpu, unsigned int action) { __raw_writeq((((u64)action) << 48), mailbox_set_regs[cpu]); }
sb1用のcore_send_ipi()はこんな感じ。
void smp_call_function_interrupt(void) { void (*func) (void *info) = call_data->func; void *info = call_data->info; int wait = call_data->wait; /* * Notify initiating CPU that I've grabbed the data and am * about to execute the function. */ smp_mb(); atomic_inc(&call_data->started); /* * At this point the info structure may be out of scope unless wait==1. */ irq_enter(); (*func)(info); irq_exit(); if (wait) { smp_mb(); atomic_inc(&call_data->finished); } }
多分、これがハンドラー。
これで隣のコアへ割り込んで任意の関数を実行できる訳だ。
でもって、こんな風に使ってる:
#ifdef CONFIG_SMP struct flush_cache_page_args { struct vm_area_struct *vma; unsigned long addr; unsigned long pfn; }; static void sb1_flush_cache_page_ipi(void *info) { struct flush_cache_page_args *args = info; local_sb1_flush_cache_page(args->vma, args->addr, args->pfn); } /* Dirty dcache could be on another CPU, so do the IPIs */ static void sb1_flush_cache_page(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn) { struct flush_cache_page_args args; if (!(vma->vm_flags & VM_EXEC)) return; addr &= PAGE_MASK; args.vma = vma; args.addr = addr; args.pfn = pfn; sb1_on_each_cpu(sb1_flush_cache_page_ipi, (void *) &args, 1, 1); } #else void sb1_flush_cache_page(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn) __attribute__((alias("local_sb1_flush_cache_page"))); #endif
シングルコアの時は単にlocal_sb1_flush_cache_page()を実行するだけの関数だったが、SMP環境では全てのCPU上でこの関数を実行したい為、sb1_on_each_cpu()を使ってIPI割り込み経由でこれを実現している。
他の処理でもやっている事はみな同じ:
#ifdef CONFIG_SMP static void sb1_flush_cache_data_page_ipi(void *info) { unsigned long start = (unsigned long)info; __sb1_writeback_inv_dcache_range(start, start + PAGE_SIZE); } static void sb1_flush_cache_data_page(unsigned long addr) { if (in_atomic()) __sb1_writeback_inv_dcache_range(addr, addr + PAGE_SIZE); else on_each_cpu(sb1_flush_cache_data_page_ipi, (void *) addr, 1, 1); } #else static void local_sb1_flush_cache_data_page(unsigned long addr) { __sb1_writeback_inv_dcache_range(addr, addr + PAGE_SIZE); } void sb1_flush_cache_data_page(unsigned long) __attribute__((alias("local_sb1_flush_cache_data_page"))); #endif
CPUのローカルキャッシュは隣のCPUから消す事が出来ないので、こうしないとイケないという訳か。
なるほど。