NetBSD-current/Linuxのpowerpc実装 - キャッシュまわり
NetBSDではcacheの同期にipiを使っていないのだろうか?と思い、powerpcのソースコードを眺めてみたが、それらしい所は見当たらなかった。
以下はpowerpc/ibm4xx/cpu.cのキャッシュ操作部分と思われるコードだが、特にipiは使っていない:
/* * These small routines may have to be replaced, * if/when we support processors other that the 604. */ void dcache_flush_page(vaddr_t va) { int i; if (curcpu()->ci_ci.dcache_line_size) for (i = 0; i < PAGE_SIZE; i += curcpu()->ci_ci.dcache_line_size) __asm volatile("dcbf %0,%1" : : "r" (va), "r" (i)); __asm volatile("sync;isync" : : ); } void icache_flush_page(vaddr_t va) { int i; if (curcpu()->ci_ci.icache_line_size) for (i = 0; i < PAGE_SIZE; i += curcpu()->ci_ci.icache_line_size) __asm volatile("icbi %0,%1" : : "r" (va), "r" (i)); __asm volatile("sync;isync" : : ); } void dcache_flush(vaddr_t va, vsize_t len) { int i; if (len == 0) return; /* Make sure we flush all cache lines */ len += va & (curcpu()->ci_ci.dcache_line_size-1); if (curcpu()->ci_ci.dcache_line_size) for (i = 0; i < len; i += curcpu()->ci_ci.dcache_line_size) __asm volatile("dcbf %0,%1" : : "r" (va), "r" (i)); __asm volatile("sync;isync" : : ); } void icache_flush(vaddr_t va, vsize_t len) { int i; if (len == 0) return; /* Make sure we flush all cache lines */ len += va & (curcpu()->ci_ci.icache_line_size-1); if (curcpu()->ci_ci.icache_line_size) for (i = 0; i < len; i += curcpu()->ci_ci.icache_line_size) __asm volatile("icbi %0,%1" : : "r" (va), "r" (i)); __asm volatile("sync;isync" : : ); }
そもそもLinuxでもpowerpcのコードでそういう事はしてるんだろうか?と思い、確認してみると、やってなかったりした:
/* * Flush instruction cache. * This is a no-op on the 601. */ _GLOBAL(flush_instruction_cache) #if defined(CONFIG_8xx) isync lis r5, IDC_INVALL@h mtspr SPRN_IC_CST, r5 #elif defined(CONFIG_4xx) #ifdef CONFIG_403GCX li r3, 512 mtctr r3 lis r4, KERNELBASE@h 1: iccci 0, r4 addi r4, r4, 16 bdnz 1b #else lis r3, KERNELBASE@h iccci 0,r3 #endif #elif CONFIG_FSL_BOOKE BEGIN_FTR_SECTION mfspr r3,SPRN_L1CSR0 ori r3,r3,L1CSR0_CFI|L1CSR0_CLFC /* msync; isync recommended here */ mtspr SPRN_L1CSR0,r3 isync blr END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE) mfspr r3,SPRN_L1CSR1 ori r3,r3,L1CSR1_ICFI|L1CSR1_ICLFR mtspr SPRN_L1CSR1,r3 #else mfspr r3,SPRN_PVR rlwinm r3,r3,16,16,31 cmpwi 0,r3,1 beqlr /* for 601, do nothing */ /* 603/604 processor - use invalidate-all bit in HID0 */ mfspr r3,SPRN_HID0 ori r3,r3,HID0_ICFI mtspr SPRN_HID0,r3 #endif /* CONFIG_8xx/4xx */ isync blr /* * Write any modified data cache blocks out to memory * and invalidate the corresponding instruction cache blocks. * This is a no-op on the 601. * * flush_icache_range(unsigned long start, unsigned long stop) */ _GLOBAL(__flush_icache_range) BEGIN_FTR_SECTION blr /* for 601, do nothing */ END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE) li r5,L1_CACHE_BYTES-1 andc r3,r3,r5 subf r4,r3,r4 add r4,r4,r5 srwi. r4,r4,L1_CACHE_SHIFT beqlr mtctr r4 mr r6,r3 1: dcbst 0,r3 addi r3,r3,L1_CACHE_BYTES bdnz 1b sync /* wait for dcbst's to get to ram */ mtctr r4 2: icbi 0,r6 addi r6,r6,L1_CACHE_BYTES bdnz 2b sync /* additional sync needed on g4 */ isync blr /* * Write any modified data cache blocks out to memory. * Does not invalidate the corresponding cache lines (especially for * any corresponding instruction cache). * * clean_dcache_range(unsigned long start, unsigned long stop) */ _GLOBAL(clean_dcache_range) li r5,L1_CACHE_BYTES-1 andc r3,r3,r5 subf r4,r3,r4 add r4,r4,r5 srwi. r4,r4,L1_CACHE_SHIFT beqlr mtctr r4 1: dcbst 0,r3 addi r3,r3,L1_CACHE_BYTES bdnz 1b sync /* wait for dcbst's to get to ram */ blr /* * Write any modified data cache blocks out to memory and invalidate them. * Does not invalidate the corresponding instruction cache blocks. * * flush_dcache_range(unsigned long start, unsigned long stop) */ _GLOBAL(flush_dcache_range) li r5,L1_CACHE_BYTES-1 andc r3,r3,r5 subf r4,r3,r4 add r4,r4,r5 srwi. r4,r4,L1_CACHE_SHIFT beqlr mtctr r4 1: dcbf 0,r3 addi r3,r3,L1_CACHE_BYTES bdnz 1b sync /* wait for dcbst's to get to ram */ blr /* * Like above, but invalidate the D-cache. This is used by the 8xx * to invalidate the cache so the PPC core doesn't get stale data * from the CPM (no cache snooping here :-). * * invalidate_dcache_range(unsigned long start, unsigned long stop) */ _GLOBAL(invalidate_dcache_range) li r5,L1_CACHE_BYTES-1 andc r3,r3,r5 subf r4,r3,r4 add r4,r4,r5 srwi. r4,r4,L1_CACHE_SHIFT beqlr mtctr r4 1: dcbi 0,r3 addi r3,r3,L1_CACHE_BYTES bdnz 1b sync /* wait for dcbi's to get to ram */ blr /* * Flush a particular page from the data cache to RAM. * Note: this is necessary because the instruction cache does *not* * snoop from the data cache. * This is a no-op on the 601 which has a unified cache. * * void __flush_dcache_icache(void *page) */ _GLOBAL(__flush_dcache_icache) BEGIN_FTR_SECTION blr /* for 601, do nothing */ END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE) rlwinm r3,r3,0,0,19 /* Get page base address */ li r4,4096/L1_CACHE_BYTES /* Number of lines in a page */ mtctr r4 mr r6,r3 0: dcbst 0,r3 /* Write line to ram */ addi r3,r3,L1_CACHE_BYTES bdnz 0b sync mtctr r4 1: icbi 0,r6 addi r6,r6,L1_CACHE_BYTES bdnz 1b sync isync blr /* * Flush a particular page from the data cache to RAM, identified * by its physical address. We turn off the MMU so we can just use * the physical address (this may be a highmem page without a kernel * mapping). * * void __flush_dcache_icache_phys(unsigned long physaddr) */ _GLOBAL(__flush_dcache_icache_phys) BEGIN_FTR_SECTION blr /* for 601, do nothing */ END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE) mfmsr r10 rlwinm r0,r10,0,28,26 /* clear DR */ mtmsr r0 isync rlwinm r3,r3,0,0,19 /* Get page base address */ li r4,4096/L1_CACHE_BYTES /* Number of lines in a page */ mtctr r4 mr r6,r3 0: dcbst 0,r3 /* Write line to ram */ addi r3,r3,L1_CACHE_BYTES bdnz 0b sync mtctr r4 1: icbi 0,r6 addi r6,r6,L1_CACHE_BYTES bdnz 1b sync mtmsr r10 /* restore DR */ isync blr
ここらへんの書き方はCPUのキャッシュ機構の構造に依存するんだろう。
とはいえ、timebaseの同期など別の用途においてはきちんと使われていた(NetBSDの話。):
void md_start_timebase(volatile struct cpu_hatch_data *h) { int i; #ifdef OPENPIC if (!openpic_base) { #endif /* * wait for secondary spin up (1.5ms @ 604/200MHz) * XXX we cannot use delay() here because timebase is not * running. */ for (i = 0; i < 100000; i++) if (h->running) break; /* Start timebase. */ out32(0xf2800000, 0x100); ppc_send_ipi(1, PPC_IPI_NOMESG); #ifdef OPENPIC } #endif }