Linux 2.6/mipsのSMP実装について#2

ロック機構のSMP対応がどうなっているかについて確認。

/*
 * atomic_add - add integer to atomic variable
 * @i: integer value to add
 * @v: pointer of type atomic_t
 *
 * Atomically adds @i to @v.
 */
static __inline__ void atomic_add(int i, atomic_t * v)
{
	if (cpu_has_llsc && R10000_LLSC_WAR) {
		unsigned long temp;

		__asm__ __volatile__(
		"	.set	mips3					\n"
		"1:	ll	%0, %1		# atomic_add		\n"
		"	addu	%0, %2					\n"
		"	sc	%0, %1					\n"
		"	beqzl	%0, 1b					\n"
		"	.set	mips0					\n"
		: "=&r" (temp), "=m" (v->counter)
		: "Ir" (i), "m" (v->counter));
	} else if (cpu_has_llsc) {
		unsigned long temp;

		__asm__ __volatile__(
		"	.set	mips3					\n"
		"1:	ll	%0, %1		# atomic_add		\n"
		"	addu	%0, %2					\n"
		"	sc	%0, %1					\n"
		"	beqz	%0, 2f					\n"
		"	.subsection 2					\n"
		"2:	b	1b					\n"
		"	.previous					\n"
		"	.set	mips0					\n"
		: "=&r" (temp), "=m" (v->counter)
		: "Ir" (i), "m" (v->counter));
	} else {
		unsigned long flags;

		raw_local_irq_save(flags);
		v->counter += i;
		raw_local_irq_restore(flags);
	}
}

なんかアセンブリで書いた方が読みやすかったんじゃ？というのは置いといて。
特に何にも対策していないように見える。

/*
 * Same as above, but return the result value
 */
static __inline__ int atomic_add_return(int i, atomic_t * v)
{
	unsigned long result;

	smp_mb();

	if (cpu_has_llsc && R10000_LLSC_WAR) {
		unsigned long temp;

		__asm__ __volatile__(
		"	.set	mips3					\n"
		"1:	ll	%1, %2		# atomic_add_return	\n"
		"	addu	%0, %1, %3				\n"
		"	sc	%0, %2					\n"
		"	beqzl	%0, 1b					\n"
		"	addu	%0, %1, %3				\n"
		"	.set	mips0					\n"
		: "=&r" (result), "=&r" (temp), "=m" (v->counter)
		: "Ir" (i), "m" (v->counter)
		: "memory");
	} else if (cpu_has_llsc) {
		unsigned long temp;

		__asm__ __volatile__(
		"	.set	mips3					\n"
		"1:	ll	%1, %2		# atomic_add_return	\n"
		"	addu	%0, %1, %3				\n"
		"	sc	%0, %2					\n"
		"	beqz	%0, 2f					\n"
		"	addu	%0, %1, %3				\n"
		"	.subsection 2					\n"
		"2:	b	1b					\n"
		"	.previous					\n"
		"	.set	mips0					\n"
		: "=&r" (result), "=&r" (temp), "=m" (v->counter)
		: "Ir" (i), "m" (v->counter)
		: "memory");
	} else {
		unsigned long flags;

		raw_local_irq_save(flags);
		result = v->counter;
		result += i;
		v->counter = result;
		raw_local_irq_restore(flags);
	}

	smp_mb();

	return result;
}

ところが、関数名がatomic_add_return()になると何故かsmp_mb()を呼ぶようになる。

#if defined(CONFIG_WEAK_ORDERING) && defined(CONFIG_SMP)
#define __WEAK_ORDERING_MB	"       sync	\n"
#else
#define __WEAK_ORDERING_MB	"		\n"
#endif

#define smp_mb()	__asm__ __volatile__(__WEAK_ORDERING_MB : : :"memory")
#define smp_rmb()	__asm__ __volatile__(__WEAK_ORDERING_MB : : :"memory")
#define smp_wmb()	__asm__ __volatile__(__WEAK_ORDERING_MB : : :"memory")

#define set_mb(var, value) \
	do { var = value; smp_mb(); } while (0)

これもsyncって書けば良いような気がしなくもないんだが、ともかくsyncを実行するマクロらしい。

仮想メモリまわりについても確認。違った、キャッシュまわりだった。

arch/mips/mm/c-sb1.cをまずはみてみる。

static inline void sb1_on_each_cpu(void (*func) (void *info), void *info,
				   int retry, int wait)
{
	preempt_disable();
	smp_call_function(func, info, retry, wait);
	func(info);
	preempt_enable();
}

/*
 * Run a function on all other CPUs.
 *  <func>      The function to run. This must be fast and non-blocking.
 *  <info>      An arbitrary pointer to pass to the function.
 *  <retry>     If true, keep retrying until ready.
 *  <wait>      If true, wait until function has completed on other CPUs.
 *  [RETURNS]   0 on success, else a negative status code.
 *
 * Does not return until remote CPUs are nearly ready to execute <func>
 * or are or have executed.
 *
 * You must not call this function with disabled interrupts or from a
 * hardware interrupt handler or from a bottom half handler:
 *
 * CPU A                               CPU B
 * Disable interrupts
 *                                     smp_call_function()
 *                                     Take call_lock
 *                                     Send IPIs
 *                                     Wait for all cpus to acknowledge IPI
 *                                     CPU A has not responded, spin waiting
 *                                     for cpu A to respond, holding call_lock
 * smp_call_function()
 * Spin waiting for call_lock
 * Deadlock                            Deadlock
 */
int smp_call_function (void (*func) (void *info), void *info, int retry,
								int wait)
{
	struct call_data_struct data;
	int i, cpus = num_online_cpus() - 1;
	int cpu = smp_processor_id();

	/*
	 * Can die spectacularly if this CPU isn't yet marked online
	 */
	BUG_ON(!cpu_online(cpu));

	if (!cpus)
		return 0;

	/* Can deadlock when called with interrupts disabled */
	WARN_ON(irqs_disabled());

	data.func = func;
	data.info = info;
	atomic_set(&data.started, 0);
	data.wait = wait;
	if (wait)
		atomic_set(&data.finished, 0);

	spin_lock(&smp_call_lock);
	call_data = &data;
	smp_mb();

	/* Send a message to all other CPUs and wait for them to respond */
	for_each_online_cpu(i)
		if (i != cpu)
			core_send_ipi(i, SMP_CALL_FUNCTION);

	/* Wait for response */
	/* FIXME: lock-up detection, backtrace on lock-up */
	while (atomic_read(&data.started) != cpus)
		barrier();

	if (wait)
		while (atomic_read(&data.finished) != cpus)
			barrier();
	call_data = NULL;
	spin_unlock(&smp_call_lock);

	return 0;
}

どうも、コア間通信用の割り込みを起こして言う事を聞いて貰う（或いはCPUを乗っ取るとも言えるか？）為の関数っぽい。

/*
 * These are routines for dealing with the sb1250 smp capabilities
 * independent of board/firmware
 */

/*
 * Simple enough; everything is set up, so just poke the appropriate mailbox
 * register, and we should be set
 */
void core_send_ipi(int cpu, unsigned int action)
{
	__raw_writeq((((u64)action) << 48), mailbox_set_regs[cpu]);
}

sb1用のcore_send_ipi()はこんな感じ。

void smp_call_function_interrupt(void)
{
	void (*func) (void *info) = call_data->func;
	void *info = call_data->info;
	int wait = call_data->wait;

	/*
	 * Notify initiating CPU that I've grabbed the data and am
	 * about to execute the function.
	 */
	smp_mb();
	atomic_inc(&call_data->started);

	/*
	 * At this point the info structure may be out of scope unless wait==1.
	 */
	irq_enter();
	(*func)(info);
	irq_exit();

	if (wait) {
		smp_mb();
		atomic_inc(&call_data->finished);
	}
}

多分、これがハンドラー。
これで隣のコアへ割り込んで任意の関数を実行できる訳だ。

でもって、こんな風に使ってる：

#ifdef CONFIG_SMP
struct flush_cache_page_args {
	struct vm_area_struct *vma;
	unsigned long addr;
	unsigned long pfn;
};

static void sb1_flush_cache_page_ipi(void *info)
{
	struct flush_cache_page_args *args = info;

	local_sb1_flush_cache_page(args->vma, args->addr, args->pfn);
}

/* Dirty dcache could be on another CPU, so do the IPIs */
static void sb1_flush_cache_page(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn)
{
	struct flush_cache_page_args args;

	if (!(vma->vm_flags & VM_EXEC))
		return;

	addr &= PAGE_MASK;
	args.vma = vma;
	args.addr = addr;
	args.pfn = pfn;
	sb1_on_each_cpu(sb1_flush_cache_page_ipi, (void *) &args, 1, 1);
}
#else
void sb1_flush_cache_page(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn)
	__attribute__((alias("local_sb1_flush_cache_page")));
#endif

シングルコアの時は単にlocal_sb1_flush_cache_page()を実行するだけの関数だったが、SMP環境では全てのCPU上でこの関数を実行したい為、sb1_on_each_cpu()を使ってIPI割り込み経由でこれを実現している。

他の処理でもやっている事はみな同じ：

#ifdef CONFIG_SMP
static void sb1_flush_cache_data_page_ipi(void *info)
{
	unsigned long start = (unsigned long)info;

	__sb1_writeback_inv_dcache_range(start, start + PAGE_SIZE);
}

static void sb1_flush_cache_data_page(unsigned long addr)
{
	if (in_atomic())
		__sb1_writeback_inv_dcache_range(addr, addr + PAGE_SIZE);
	else
		on_each_cpu(sb1_flush_cache_data_page_ipi, (void *) addr, 1, 1);
}
#else

static void local_sb1_flush_cache_data_page(unsigned long addr)
{
	__sb1_writeback_inv_dcache_range(addr, addr + PAGE_SIZE);
}

void sb1_flush_cache_data_page(unsigned long)
	__attribute__((alias("local_sb1_flush_cache_data_page")));
#endif

CPUのローカルキャッシュは隣のCPUから消す事が出来ないので、こうしないとイケないという訳か。
なるほど。