patch-2.4.20 linux-2.4.20/arch/ppc64/kernel/htab.c

Next file: linux-2.4.20/arch/ppc64/kernel/iSeries_IoMmTable.c
Previous file: linux-2.4.20/arch/ppc64/kernel/head.S
Back to the patch index
Back to the overall index

diff -urN linux-2.4.19/arch/ppc64/kernel/htab.c linux-2.4.20/arch/ppc64/kernel/htab.c
@@ -1,10 +1,11 @@
 /*
- * 
- *
  * PowerPC64 port by Mike Corrigan and Dave Engebretsen
  *   {mikejc|engebret}@us.ibm.com
  *
  *    Copyright (c) 2000 Mike Corrigan <mikejc@us.ibm.com>
+ *
+ * SMP scalability work:
+ *    Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
  * 
  *    Module name: htab.c
  *
@@ -18,17 +19,7 @@
  */
 
 #include <linux/config.h>
-#include <asm/processor.h>
-#include <asm/pgtable.h>
-#include <asm/mmu.h>
-#include <asm/mmu_context.h>
-#include <asm/page.h>
-#include <asm/types.h>
-#include <asm/init.h>
-#include <asm/system.h>
-#include <asm/iSeries/LparData.h>
 #include <linux/spinlock.h>
-#include <asm/ppcdebug.h>
 #include <linux/errno.h>
 #include <linux/sched.h>
 #include <linux/proc_fs.h>
@@ -36,62 +27,74 @@
 #include <linux/sysctl.h>
 #include <linux/ctype.h>
 #include <linux/cache.h>
+
+#include <asm/ppcdebug.h>
+#include <asm/processor.h>
+#include <asm/pgtable.h>
+#include <asm/mmu.h>
+#include <asm/mmu_context.h>
+#include <asm/page.h>
+#include <asm/types.h>
 #include <asm/uaccess.h>
 #include <asm/naca.h>
-#include <asm/system.h>
 #include <asm/pmc.h>
 #include <asm/machdep.h>
 #include <asm/lmb.h>
+#include <asm/abs_addr.h>
+#include <asm/io.h>
 #include <asm/eeh.h>
-
-/* For iSeries */
+#include <asm/hvcall.h>
+#include <asm/iSeries/LparData.h>
 #include <asm/iSeries/HvCallHpt.h>
 
-/* Note:  pte   --> Linux PTE
+/*
+ * Note:  pte   --> Linux PTE
  *        HPTE  --> PowerPC Hashed Page Table Entry
+ *
+ * Execution context:
+ *   htab_initialize is called with the MMU off (of course), but
+ *   the kernel has been copied down to zero so it can directly
+ *   reference global data.  At this point it is very difficult
+ *   to print debug info.
+ *
  */
 
 HTAB htab_data = {NULL, 0, 0, 0, 0};
 
-int proc_dol2crvec(ctl_table *table, int write, struct file *filp,
-		   void *buffer, size_t *lenp);
-
-void htab_initialize(void);
-void make_pte_LPAR(HPTE *htab,
-	      unsigned long va, unsigned long pa, int mode,
-	      unsigned long hash_mask, int large);
-
-extern unsigned long reloc_offset(void);
-extern unsigned long get_kernel_vsid( unsigned long ea );
-extern void cacheable_memzero( void *, unsigned int );
-
 extern unsigned long _SDR1;
 extern unsigned long klimit;
 
-extern unsigned long _ASR;
-extern inline void make_ste(unsigned long stab,
-			    unsigned long esid, unsigned long vsid);
-
-extern char _stext[], _etext[], __start_naca[], __end_stab[];
-
-static spinlock_t hash_table_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
-
-#define PTRRELOC(x)	((typeof(x))((unsigned long)(x) - offset))
-#define PTRUNRELOC(x)	((typeof(x))((unsigned long)(x) + offset))
-#define RELOC(x)	(*PTRRELOC(&(x)))
-
-extern unsigned long htab_size( unsigned long );
-unsigned long hpte_getword0_iSeries( unsigned long slot );
+void make_pte(HPTE *htab, unsigned long va, unsigned long pa,
+	      int mode, unsigned long hash_mask, int large);
+long plpar_pte_enter(unsigned long flags,
+		     unsigned long ptex,
+		     unsigned long new_pteh, unsigned long new_ptel,
+		     unsigned long *old_pteh_ret, unsigned long *old_ptel_ret);
+static long hpte_remove(unsigned long hpte_group);
+static long rpa_lpar_hpte_remove(unsigned long hpte_group);
+static long iSeries_hpte_remove(unsigned long hpte_group);
+
+static spinlock_t pSeries_tlbie_lock = SPIN_LOCK_UNLOCKED;
+static spinlock_t pSeries_lpar_tlbie_lock = SPIN_LOCK_UNLOCKED;
+spinlock_t hash_table_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
 
 #define KB (1024)
 #define MB (1024*KB)
+
+static inline void
+loop_forever(void)
+{
+	volatile unsigned long x = 1;
+	for(;x;x|=1)
+		;
+}
+
 static inline void
 create_pte_mapping(unsigned long start, unsigned long end,
 		   unsigned long mode, unsigned long mask, int large)
 {
-	unsigned long addr, offset = reloc_offset();
-	HTAB *_htab_data = PTRRELOC(&htab_data);
-	HPTE  *htab  = (HPTE *)__v2a(_htab_data->htab);
+	unsigned long addr;
+	HPTE *htab = (HPTE *)__v2a(htab_data.htab);
 	unsigned int step;
 
 	if (large)
@@ -102,8 +105,8 @@
 	for (addr = start; addr < end; addr += step) {
 		unsigned long vsid = get_kernel_vsid(addr);
 		unsigned long va = (vsid << 28) | (addr & 0xfffffff);
-		make_pte(htab, va, (unsigned long)__v2a(addr), mode, mask,
-				large);
+		make_pte(htab, va, (unsigned long)__v2a(addr), 
+			 mode, mask, large);
 	}
 }
 
@@ -112,16 +115,21 @@
 {
 	unsigned long table, htab_size_bytes;
 	unsigned long pteg_count;
-	unsigned long mode_ro, mode_rw, mask;
-	unsigned long offset = reloc_offset();
-	struct naca_struct *_naca = RELOC(naca);
-	HTAB *_htab_data = PTRRELOC(&htab_data);
+	unsigned long mode_rw, mask;
 
+#if 0
+	/* Can't really do the call below since it calls the normal RTAS
+	 * entry point and we're still relocate off at the moment.
+	 * Temporarily diabling until it can call through the relocate off
+	 * RTAS entry point.  -Peter
+	 */
+	ppc64_boot_msg(0x05, "htab init");
+#endif
 	/*
 	 * Calculate the required size of the htab.  We want the number of
 	 * PTEGs to equal one half the number of real pages.
 	 */ 
-	htab_size_bytes = 1UL << _naca->pftSize;
+	htab_size_bytes = 1UL << naca->pftSize;
 	pteg_count = htab_size_bytes >> 7;
 
 	/* For debug, make the HTAB 1/8 as big as it normally would be. */
@@ -130,335 +138,499 @@
 		htab_size_bytes = pteg_count << 7;
 	}
 
-	_htab_data->htab_num_ptegs = pteg_count;
-	_htab_data->htab_hash_mask = pteg_count - 1;
+	htab_data.htab_num_ptegs = pteg_count;
+	htab_data.htab_hash_mask = pteg_count - 1;
 
-	if(_naca->platform == PLATFORM_PSERIES) {
+	if(naca->platform == PLATFORM_PSERIES) {
 		/* Find storage for the HPT.  Must be contiguous in
 		 * the absolute address space.
 		 */
 		table = lmb_alloc(htab_size_bytes, htab_size_bytes);
-		if ( !table )
-			panic("ERROR, cannot find space for HPTE\n");
-		_htab_data->htab = (HPTE *)__a2v(table);
+		if ( !table ) {
+			ppc64_terminate_msg(0x20, "hpt space");
+			loop_forever();
+		}
+		htab_data.htab = (HPTE *)__a2v(table);
 
 		/* htab absolute addr + encoded htabsize */
-		RELOC(_SDR1) = table + __ilog2(pteg_count) - 11;
+		_SDR1 = table + __ilog2(pteg_count) - 11;
 
 		/* Initialize the HPT with no entries */
-		cacheable_memzero((void *)table, htab_size_bytes);
+		memset((void *)table, 0, htab_size_bytes);
 	} else {
-		_htab_data->htab = NULL;
-		RELOC(_SDR1) = 0; 
+		/* Using a hypervisor which owns the htab */
+		htab_data.htab = NULL;
+		_SDR1 = 0; 
 	}
 
-	mode_ro = _PAGE_ACCESSED | _PAGE_COHERENT | PP_RXRX;
 	mode_rw = _PAGE_ACCESSED | _PAGE_COHERENT | PP_RWXX;
 	mask = pteg_count-1;
 
-	/* Create PTE's for the kernel text and data sections plus
-	 * the HPT and HPTX arrays.  Make the assumption that
-	 * (addr & KERNELBASE) == 0 (ie they are disjoint).
-	 * We also assume that the va is <= 64 bits.
-	 */
-#if 0
-	create_pte_mapping((unsigned long)_stext,       (unsigned long)__start_naca,                 mode_ro, mask);
-	create_pte_mapping((unsigned long)__start_naca, (unsigned long)__end_stab,                   mode_rw, mask);
-	create_pte_mapping((unsigned long)__end_stab,   (unsigned long)_etext,                       mode_ro, mask);
-	create_pte_mapping((unsigned long)_etext,       RELOC(klimit),                               mode_rw, mask);
-	create_pte_mapping((unsigned long)__a2v(table), (unsigned long)__a2v(table+htab_size_bytes), mode_rw, mask);
-#else
-#ifndef CONFIG_PPC_ISERIES
-	if (__is_processor(PV_POWER4) && _naca->physicalMemorySize > 256*MB) {
+	/* XXX we currently map kernel text rw, should fix this */
+	if ((naca->platform & PLATFORM_PSERIES) &&
+	   cpu_has_largepage() && (naca->physicalMemorySize > 256*MB)) {
 		create_pte_mapping((unsigned long)KERNELBASE, 
 				   KERNELBASE + 256*MB, mode_rw, mask, 0);
 		create_pte_mapping((unsigned long)KERNELBASE + 256*MB, 
-				   KERNELBASE + (_naca->physicalMemorySize), 
+				   KERNELBASE + (naca->physicalMemorySize), 
 				   mode_rw, mask, 1);
-		return;
+	} else {
+		create_pte_mapping((unsigned long)KERNELBASE, 
+				   KERNELBASE+(naca->physicalMemorySize), 
+				   mode_rw, mask, 0);
 	}
-#endif
-	create_pte_mapping((unsigned long)KERNELBASE, 
-			   KERNELBASE+(_naca->physicalMemorySize), 
-			   mode_rw, mask, 0);
+#if 0
+	/* Can't really do the call below since it calls the normal RTAS
+	 * entry point and we're still relocate off at the moment.
+	 * Temporarily diabling until it can call through the relocate off
+	 * RTAS entry point.  -Peter
+	 */
+	ppc64_boot_msg(0x06, "htab done");
 #endif
 }
 #undef KB
 #undef MB
 
 /*
- * Create a pte.  Used during initialization only.
+ * Create a pte. Used during initialization only.
  * We assume the PTE will fit in the primary PTEG.
  */
-void make_pte(HPTE *htab,
-	      unsigned long va, unsigned long pa, int mode,
-	      unsigned long hash_mask, int large)
+void make_pte(HPTE *htab, unsigned long va, unsigned long pa,
+	      int mode, unsigned long hash_mask, int large)
 {
-	HPTE  *hptep;
-	unsigned long hash, i;
-	volatile unsigned long x = 1;
-	unsigned long vpn;
-
-#ifdef CONFIG_PPC_PSERIES
-	if(naca->platform == PLATFORM_PSERIES_LPAR) {
-		make_pte_LPAR(htab, va, pa, mode, hash_mask, large); 
-		return;
-	}
-#endif
+	HPTE *hptep, local_hpte, rhpte;
+	unsigned long hash, vpn, flags, lpar_rc;
+	unsigned long i, dummy1, dummy2;
+	long slot;
 
 	if (large)
-		vpn = va >> 24;
+		vpn = va >> LARGE_PAGE_SHIFT;
 	else
-		vpn = va >> 12;
+		vpn = va >> PAGE_SHIFT;
 
 	hash = hpt_hash(vpn, large);
 
-	hptep  = htab +  ((hash & hash_mask)*HPTES_PER_GROUP);
-
-	for (i = 0; i < 8; ++i, ++hptep) {
-		if ( hptep->dw0.dw0.v == 0 ) {		/* !valid */
-			hptep->dw1.dword1 = pa | mode;
-			hptep->dw0.dword0 = 0;
-			hptep->dw0.dw0.avpn = va >> 23;
-			hptep->dw0.dw0.bolted = 1;	/* bolted */
-			hptep->dw0.dw0.v = 1;		/* make valid */
-			return;
+	local_hpte.dw1.dword1 = pa | mode;
+	local_hpte.dw0.dword0 = 0;
+	local_hpte.dw0.dw0.avpn = va >> 23;
+	local_hpte.dw0.dw0.bolted = 1;		/* bolted */
+	if (large) {
+		local_hpte.dw0.dw0.l = 1;	/* large page */
+		local_hpte.dw0.dw0.avpn &= ~0x1UL;
+	}
+	local_hpte.dw0.dw0.v = 1;
+
+	if (naca->platform == PLATFORM_PSERIES) {
+		hptep  = htab + ((hash & hash_mask)*HPTES_PER_GROUP);
+
+		for (i = 0; i < 8; ++i, ++hptep) {
+			if (hptep->dw0.dw0.v == 0) {		/* !valid */
+				*hptep = local_hpte;
+				return;
+			}
 		}
+	} else if (naca->platform == PLATFORM_PSERIES_LPAR) {
+		slot = ((hash & hash_mask)*HPTES_PER_GROUP);
+		
+		/* Set CEC cookie to 0                   */
+		/* Zero page = 0                         */
+		/* I-cache Invalidate = 0                */
+		/* I-cache synchronize = 0               */
+		/* Exact = 0 - modify any entry in group */
+		flags = 0;
+		
+		lpar_rc =  plpar_pte_enter(flags, slot, local_hpte.dw0.dword0,
+					   local_hpte.dw1.dword1, 
+					   &dummy1, &dummy2);
+		if (lpar_rc != H_Success) {
+			ppc64_terminate_msg(0x21, "hpte enter");
+			loop_forever();
+		}
+		return;
+	} else if (naca->platform == PLATFORM_ISERIES_LPAR) {
+		slot = HvCallHpt_findValid(&rhpte, vpn);
+		if (slot < 0) {
+			/* Must find space in primary group */
+			panic("hash_page: hpte already exists\n");
+		}
+		HvCallHpt_addValidate(slot, 0, (HPTE *)&local_hpte );
+		return;
 	}
 
 	/* We should _never_ get here and too early to call xmon. */
-	for(;x;x|=1);
+	ppc64_terminate_msg(0x22, "hpte platform");
+	loop_forever();
+}
+
+/*
+ * find_linux_pte returns the address of a linux pte for a given 
+ * effective address and directory.  If not found, it returns zero.
+ */
+pte_t *find_linux_pte(pgd_t *pgdir, unsigned long ea)
+{
+	pgd_t *pg;
+	pmd_t *pm;
+	pte_t *pt = NULL;
+	pte_t pte;
+
+	pg = pgdir + pgd_index(ea);
+	if (!pgd_none(*pg)) {
+		pm = pmd_offset(pg, ea);
+		if (!pmd_none(*pm)) { 
+			pt = pte_offset(pm, ea);
+			pte = *pt;
+			if (!pte_present(pte))
+				pt = NULL;
+		}
+	}
+
+	return pt;
 }
 
-/* Functions to invalidate a HPTE */
-static void hpte_invalidate_iSeries( unsigned long slot )
+static inline unsigned long computeHptePP(unsigned long pte)
 {
-	HvCallHpt_invalidateSetSwBitsGet( slot, 0, 0 );
+	return (pte & _PAGE_USER) |
+		(((pte & _PAGE_USER) >> 1) &
+		 ((~((pte >> 2) &	/* _PAGE_RW */
+		     (pte >> 7))) &	/* _PAGE_DIRTY */
+		  1));
 }
 
-static void hpte_invalidate_pSeries( unsigned long slot )
+/*
+ * Handle a fault by adding an HPTE. If the address can't be determined
+ * to be valid via Linux page tables, return 1. If handled return 0
+ */
+int __hash_page(unsigned long ea, unsigned long access, 
+		unsigned long vsid, pte_t *ptep)
 {
-	/* Local copy of the first doubleword of the HPTE */
-	union {
-		unsigned long d;
-		Hpte_dword0   h;
-	} hpte_dw0;
+	unsigned long va, vpn;
+	unsigned long newpp, prpn;
+	unsigned long hpteflags;
+	long slot;
+	pte_t old_pte, new_pte;
 
-	/* Locate the HPTE */
-	HPTE  * hptep  = htab_data.htab  + slot;
+	/* Search the Linux page table for a match with va */
+	va = (vsid << 28) | (ea & 0x0fffffff);
+	vpn = va >> PAGE_SHIFT;
 
-	/* Get the first doubleword of the HPTE */
-	hpte_dw0.d = hptep->dw0.dword0;
+	/* Acquire the hash table lock to guarantee that the linux
+	 * pte we fetch will not change
+	 */
+	spin_lock( &hash_table_lock );
+	
+	/* 
+	 * Check the user's access rights to the page.  If access should be
+	 * prevented then send the problem up to do_page_fault.
+	 */
+	access |= _PAGE_PRESENT;
+	if (unlikely(access & ~(pte_val(*ptep)))) {
+		spin_unlock( &hash_table_lock );
+		return 1;
+	}
 
-	/* Invalidate the hpte */
-	hptep->dw0.dword0 = 0;
+	/* 
+	 * We have found a pte (which was present).
+	 * The spinlocks prevent this status from changing
+	 * The hash_table_lock prevents the _PAGE_HASHPTE status
+	 * from changing (RPN, DIRTY and ACCESSED too)
+	 * The page_table_lock prevents the pte from being 
+	 * invalidated or modified
+	 */
 
-	/* Invalidate the tlb   */
-	{
-		unsigned long vsid, group, pi, pi_high;
-
-		vsid = hpte_dw0.h.avpn >> 5;
-		group = slot >> 3;
-		if(hpte_dw0.h.h) {
-			group = ~group;
-		} 
-		pi = (vsid ^ group) & 0x7ff;
-		pi_high = (hpte_dw0.h.avpn & 0x1f) << 11;
-		pi |= pi_high;
-		_tlbie(pi << 12);
-	}
-}
+	/*
+	 * At this point, we have a pte (old_pte) which can be used to build
+	 * or update an HPTE. There are 2 cases:
+	 *
+	 * 1. There is a valid (present) pte with no associated HPTE (this is 
+	 *	the most common case)
+	 * 2. There is a valid (present) pte with an associated HPTE. The
+	 *	current values of the pp bits in the HPTE prevent access
+	 *	because we are doing software DIRTY bit management and the
+	 *	page is currently not DIRTY. 
+	 */
 
+	old_pte = *ptep;
+	new_pte = old_pte;
 
-/* Select an available HPT slot for a new HPTE
- *   return slot index (if in primary group)
- *   return -slot index (if in secondary group) 
- */
-static long hpte_selectslot_iSeries( unsigned long vpn )
-{
-	HPTE hpte;
-	long ret_slot, orig_slot;
-	unsigned long primary_hash;
-	unsigned long hpteg_slot;
-	unsigned long slot;
-	unsigned i, k;
-	union {
-		unsigned long	d;
-		Hpte_dword0	h;
-	} hpte_dw0;
+	/* If the attempted access was a store */
+	if (access & _PAGE_RW)
+		pte_val(new_pte) |= _PAGE_ACCESSED | _PAGE_DIRTY;
+	else
+		pte_val(new_pte) |= _PAGE_ACCESSED;
+
+	newpp = computeHptePP(pte_val(new_pte));
+	
+	/* Check if pte already has an hpte (case 2) */
+	if (unlikely(pte_val(old_pte) & _PAGE_HASHPTE)) {
+		/* There MIGHT be an HPTE for this pte */
+		unsigned long hash, slot, secondary;
+
+		/* XXX fix large pte flag */
+		hash = hpt_hash(vpn, 0);
+		secondary = (pte_val(old_pte) & _PAGE_SECONDARY) >> 15;
+		if (secondary)
+			hash = ~hash;
+		slot = (hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP;
+		slot += (pte_val(old_pte) & _PAGE_GROUP_IX) >> 12;
 
-	ret_slot = orig_slot = HvCallHpt_findValid( &hpte, vpn );
-	if ( hpte.dw0.dw0.v ) {		/* If valid ...what do we do now? */
-		udbg_printf( "hpte_selectslot_iSeries: vpn 0x%016lx already valid at slot 0x%016lx\n", vpn, ret_slot );
-		udbg_printf( "hpte_selectslot_iSeries: returned hpte 0x%016lx 0x%016lx\n", hpte.dw0.dword0, hpte.dw1.dword1 );
-
-		return (0x8000000000000000); 
-		/*			panic("select_hpte_slot found entry already valid\n"); */
-	}
-	if ( ret_slot == -1 ) {		/* -1 indicates no available slots */
-
-		/* No available entry found in secondary group */
-
-		PMC_SW_SYSTEM(htab_capacity_castouts);
-
-		primary_hash = hpt_hash(vpn, 0);
-		hpteg_slot = ( primary_hash & htab_data.htab_hash_mask ) * HPTES_PER_GROUP;
-		k = htab_data.next_round_robin++ & 0x7;
-
-		for ( i=0; i<HPTES_PER_GROUP; ++i ) {
-			if ( k == HPTES_PER_GROUP )
-				k = 0;
-			slot = hpteg_slot + k;
-			hpte_dw0.d = hpte_getword0_iSeries( slot );
-			if ( !hpte_dw0.h.bolted ) {
-				hpte_invalidate_iSeries( slot );
-				ret_slot = slot;
+		/* XXX fix large pte flag */
+		if (ppc_md.hpte_updatepp(slot, secondary, 
+					 newpp, va, 0) == -1) {
+			pte_val(old_pte) &= ~_PAGE_HPTEFLAGS;
+		} else {
+			if (!pte_same(old_pte, new_pte)) {
+				*ptep = new_pte;
 			}
-			++k;
-		}
-	} else {
-		if ( ret_slot < 0 ) {
-			PMC_SW_SYSTEM(htab_primary_overflows);
-			ret_slot &= 0x7fffffffffffffff;
-			ret_slot = -ret_slot;
 		}
 	}
-	if ( ret_slot == -1 ) {
-		/* No non-bolted entry found in primary group - time to panic */
-        	udbg_printf("hpte_selectslot_pSeries - No non-bolted HPTE in group 0x%lx! \n", hpteg_slot/HPTES_PER_GROUP);
-        	panic("No non-bolted HPTE in group %lx", (unsigned long)hpteg_slot/HPTES_PER_GROUP);
+
+	if (likely(!(pte_val(old_pte) & _PAGE_HASHPTE))) {
+		/* Update the linux pte with the HPTE slot */
+		pte_val(new_pte) &= ~_PAGE_HPTEFLAGS;
+		pte_val(new_pte) |= _PAGE_HASHPTE;
+		prpn = pte_val(old_pte) >> PTE_SHIFT;
+
+		/* copy appropriate flags from linux pte */
+		hpteflags = (pte_val(new_pte) & 0x1f8) | newpp;
+
+		slot = ppc_md.hpte_insert(vpn, prpn, hpteflags, 0, 0);
+
+		pte_val(new_pte) |= ((slot<<12) & 
+				     (_PAGE_GROUP_IX | _PAGE_SECONDARY));
+
+		*ptep = new_pte;
 	}
-	PPCDBG(PPCDBG_MM, "hpte_selectslot_iSeries: vpn=0x%016lx, orig_slot=0x%016lx, ret_slot=0x%016lx \n",
-	       vpn, orig_slot, ret_slot );	
-	return ret_slot;
+
+	spin_unlock(&hash_table_lock);
+
+	return 0;
 }
 
-static long hpte_selectslot_pSeries(unsigned long vpn)
+/*
+ * Handle a fault by adding an HPTE. If the address can't be determined
+ * to be valid via Linux page tables, return 1. If handled return 0
+ */
+int hash_page(unsigned long ea, unsigned long access)
 {
-	HPTE * hptep;
-	unsigned long primary_hash;
-	unsigned long hpteg_slot;
-	unsigned i, k;
+	void *pgdir;
+	unsigned long vsid;
+	struct mm_struct *mm;
+	pte_t *ptep;
+	int ret;
 
-	/* Search the primary group for an available slot */
+	/* Check for invalid addresses. */
+	if (!IS_VALID_EA(ea)) return 1;
 
-	primary_hash = hpt_hash(vpn, 0);
-	hpteg_slot = ( primary_hash & htab_data.htab_hash_mask ) * HPTES_PER_GROUP;
-	hptep = htab_data.htab + hpteg_slot;
-	
-	for (i=0; i<HPTES_PER_GROUP; ++i) {
-		if ( hptep->dw0.dw0.v == 0 ) {
-			/* If an available slot found, return it */
-			return hpteg_slot + i;
-		}
-		hptep++;
+ 	switch (REGION_ID(ea)) {
+	case USER_REGION_ID:
+		mm = current->mm;
+		if (mm == NULL) return 1;
+		vsid = get_vsid(mm->context, ea);
+		break;
+	case IO_REGION_ID:
+		mm = &ioremap_mm;
+		vsid = get_kernel_vsid(ea);
+		break;
+	case VMALLOC_REGION_ID:
+		mm = &init_mm;
+		vsid = get_kernel_vsid(ea);
+		break;
+	case IO_UNMAPPED_REGION_ID:
+		udbg_printf("EEH Error ea = 0x%lx\n", ea);
+		PPCDBG_ENTER_DEBUGGER();
+		panic("EEH Error ea = 0x%lx\n", ea);
+		break;
+	case KERNEL_REGION_ID:
+		/*
+		 * As htab_initialize is now, we shouldn't ever get here since
+		 * we're bolting the entire 0xC0... region.
+		 */
+		udbg_printf("Little faulted on kernel address 0x%lx\n", ea);
+		PPCDBG_ENTER_DEBUGGER();
+		panic("Little faulted on kernel address 0x%lx\n", ea);
+		break;
+	default:
+		/* Not a valid range, send the problem up to do_page_fault */
+		return 1;
+		break;
 	}
 
-	/* No available entry found in primary group */
-
-	PMC_SW_SYSTEM(htab_primary_overflows);
-
-	/* Search the secondary group */
+	pgdir = mm->pgd;
+	if (pgdir == NULL) return 1;
 
-	hpteg_slot = ( ~primary_hash & htab_data.htab_hash_mask ) * HPTES_PER_GROUP;
-	hptep = htab_data.htab + hpteg_slot;
+	/*
+	 * Lock the Linux page table to prevent mmap and kswapd
+	 * from modifying entries while we search and update
+	 */
+	spin_lock(&mm->page_table_lock);
 
-	for (i=0; i<HPTES_PER_GROUP; ++i) {
-		if ( hptep->dw0.dw0.v == 0 ) {
-			/* If an available slot found, return it */
-			return -(hpteg_slot + i);
-		}
-		hptep++;
+	ptep = find_linux_pte(pgdir, ea);
+	/*
+	 * If no pte found or not present, send the problem up to
+	 * do_page_fault
+	 */
+	if (ptep && pte_present(*ptep)) {
+		ret = __hash_page(ea, access, vsid, ptep);
+	} else {	
+		/* If no pte, send the problem up to do_page_fault */
+		ret = 1;
 	}
 
-	/* No available entry found in secondary group */
+	spin_unlock(&mm->page_table_lock);
+
+	return ret;
+}
 
-	PMC_SW_SYSTEM(htab_capacity_castouts);
+void flush_hash_page(unsigned long context, unsigned long ea, pte_t *ptep)
+{
+	unsigned long vsid, vpn, va, hash, secondary, slot, flags;
+	unsigned long large = 0, local = 0;
+	pte_t pte;
 
-	/* Select an entry in the primary group to replace */
+	if ((ea >= USER_START) && (ea <= USER_END))
+		vsid = get_vsid(context, ea);
+	else
+		vsid = get_kernel_vsid(ea);
 
-	hpteg_slot = ( primary_hash & htab_data.htab_hash_mask ) * HPTES_PER_GROUP;
-	hptep = htab_data.htab + hpteg_slot;
-	k = htab_data.next_round_robin++ & 0x7;
+	va = (vsid << 28) | (ea & 0x0fffffff);
+	if (large)
+		vpn = va >> LARGE_PAGE_SHIFT;
+	else
+		vpn = va >> PAGE_SHIFT;
+	hash = hpt_hash(vpn, large);
 
-	for (i=0; i<HPTES_PER_GROUP; ++i) {
-		if (k == HPTES_PER_GROUP)
-			k = 0;
+	spin_lock_irqsave( &hash_table_lock, flags);
 
-		if (!hptep[k].dw0.dw0.bolted) {
-			hpteg_slot += k;
-			/* Invalidate the current entry */
-			ppc_md.hpte_invalidate(hpteg_slot); 
-			return hpteg_slot;
-		}
-		++k;
+	pte = __pte(pte_update(ptep, _PAGE_HPTEFLAGS, 0));
+	secondary = (pte_val(pte) & _PAGE_SECONDARY) >> 15;
+	if (secondary) hash = ~hash;
+	slot = (hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP;
+	slot += (pte_val(pte) & _PAGE_GROUP_IX) >> 12;
+	
+	if (pte_val(pte) & _PAGE_HASHPTE) {
+		ppc_md.hpte_invalidate(slot, secondary, va, large, local);
 	}
 
-	/* No non-bolted entry found in primary group - time to panic */
-        udbg_printf("hpte_selectslot_pSeries - No non-bolted HPTE in group 0x%lx! \n", hpteg_slot/HPTES_PER_GROUP);
-	/*      xmon(0); */
-        panic("No non-bolted HPTE in group %lx", (unsigned long)hpteg_slot/HPTES_PER_GROUP);
+	spin_unlock_irqrestore( &hash_table_lock, flags );
+}
 
-	/* keep the compiler happy */
-	return 0;
+long plpar_pte_enter(unsigned long flags,
+		     unsigned long ptex,
+		     unsigned long new_pteh, unsigned long new_ptel,
+		     unsigned long *old_pteh_ret, unsigned long *old_ptel_ret)
+{
+	unsigned long dummy, ret;
+	ret = plpar_hcall(H_ENTER, flags, ptex, new_pteh, new_ptel,
+			   old_pteh_ret, old_ptel_ret, &dummy);
+	return(ret);
 }
 
-unsigned long hpte_getword0_iSeries( unsigned long slot )
+long plpar_pte_remove(unsigned long flags,
+		      unsigned long ptex,
+		      unsigned long avpn,
+		      unsigned long *old_pteh_ret, unsigned long *old_ptel_ret)
 {
-	unsigned long dword0;
+	unsigned long dummy;
+	return plpar_hcall(H_REMOVE, flags, ptex, avpn, 0,
+			   old_pteh_ret, old_ptel_ret, &dummy);
+}
 
-	HPTE hpte;
-	HvCallHpt_get( &hpte, slot );
-	dword0 = hpte.dw0.dword0;
+long plpar_pte_read(unsigned long flags,
+		    unsigned long ptex,
+		    unsigned long *old_pteh_ret, unsigned long *old_ptel_ret)
+{
+	unsigned long dummy;
+	return plpar_hcall(H_READ, flags, ptex, 0, 0,
+			   old_pteh_ret, old_ptel_ret, &dummy);
+}
 
-	return dword0;
+long plpar_pte_protect(unsigned long flags,
+		       unsigned long ptex,
+		       unsigned long avpn)
+{
+	return plpar_hcall_norets(H_PROTECT, flags, ptex, avpn);
+}
+
+static __inline__ void set_pp_bit(unsigned long pp, HPTE *addr)
+{
+	unsigned long old;
+	unsigned long *p = &addr->dw1.dword1;
+
+	__asm__ __volatile__(
+        "1:	ldarx	%0,0,%3\n\
+                rldimi  %0,%2,0,62\n\
+                stdcx.	%0,0,%3\n\
+            	bne	1b"
+        : "=&r" (old), "=m" (*p)
+        : "r" (pp), "r" (p), "m" (*p)
+        : "cc");
 }
 
-unsigned long hpte_getword0_pSeries( unsigned long slot )
+/*
+ * Functions used to retrieve word 0 of a given page table entry.
+ *
+ * Input : slot : PTE index within the page table of the entry to retrieve 
+ * Output: Contents of word 0 of the specified entry
+ */
+static unsigned long rpa_lpar_hpte_getword0(unsigned long slot)
 {
 	unsigned long dword0;
-	HPTE * hptep = htab_data.htab + slot;
+	unsigned long lpar_rc;
+	unsigned long dummy_word1;
+	unsigned long flags;
+
+	/* Read 1 pte at a time                        */
+	/* Do not need RPN to logical page translation */
+	/* No cross CEC PFT access                     */
+	flags = 0;
+	
+	lpar_rc = plpar_pte_read(flags, slot, &dword0, &dummy_word1);
+
+	if (lpar_rc != H_Success)
+		panic("Error on pte read in get_hpte0 rc = %lx\n", lpar_rc);
 
-	dword0 = hptep->dw0.dword0;
 	return dword0;
 }
 
-static long hpte_find_iSeries(unsigned long vpn)
+unsigned long iSeries_hpte_getword0(unsigned long slot)
 {
+	unsigned long dword0;
+
 	HPTE hpte;
-	long slot;
+	HvCallHpt_get(&hpte, slot);
+	dword0 = hpte.dw0.dword0;
 
-	slot = HvCallHpt_findValid( &hpte, vpn );
-	if ( hpte.dw0.dw0.v ) {
-		if ( slot < 0 ) {
-			slot &= 0x7fffffffffffffff;
-			slot = -slot;
-		}
-	} else
-		slot = -1;
-	return slot;
+	return dword0;
 }
 
-static long hpte_find_pSeries(unsigned long vpn)
+/*
+ * Functions used to find the PTE for a particular virtual address. 
+ * Only used during boot when bolting pages.
+ *
+ * Input : vpn      : virtual page number
+ * Output: PTE index within the page table of the entry
+ *         -1 on failure
+ */
+static long hpte_find(unsigned long vpn)
 {
-	union {
-		unsigned long d;
-		Hpte_dword0   h;
-	} hpte_dw0;
-	long slot;
+	HPTE *hptep;
 	unsigned long hash;
-	unsigned long i,j;
+	unsigned long i, j;
+	long slot;
+	Hpte_dword0 dw0;
 
 	hash = hpt_hash(vpn, 0);
-	for ( j=0; j<2; ++j ) {
+
+	for (j = 0; j < 2; j++) {
 		slot = (hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP;
-		for ( i=0; i<HPTES_PER_GROUP; ++i ) {
-			hpte_dw0.d = hpte_getword0_pSeries( slot );
-			if ( ( hpte_dw0.h.avpn == ( vpn >> 11 ) ) &&
-			     ( hpte_dw0.h.v ) &&
-			     ( hpte_dw0.h.h == j ) ) {
+		for (i = 0; i < HPTES_PER_GROUP; i++) {
+			hptep = htab_data.htab + slot;
+			dw0 = hptep->dw0.dw0;
+
+			if ((dw0.avpn == (vpn >> 11)) && dw0.v &&
+			    (dw0.h == j)) {
 				/* HPTE matches */
-				if ( j )
+				if (j)
 					slot = -slot;
 				return slot;
 			}
@@ -466,247 +638,320 @@
 		}
 		hash = ~hash;
 	}
+
 	return -1;
-} 
+}
 
-/* This function is called by iSeries setup when initializing the hpt */
-void build_valid_hpte( unsigned long vsid, unsigned long ea, unsigned long pa,
-		       pte_t * ptep, unsigned hpteflags, unsigned bolted )
-{
-	unsigned long vpn, flags;
-	long hpte_slot;
-	unsigned hash;
-	pte_t pte;
+static long rpa_lpar_hpte_find(unsigned long vpn)
+{
+	unsigned long hash;
+	unsigned long i, j;
+	long slot;
+	union {
+		unsigned long dword0;
+		Hpte_dword0 dw0;
+	} hpte_dw0;
+	Hpte_dword0 dw0;
 
-	vpn = ((vsid << 28) | ( ea & 0xffff000 )) >> 12;
+	hash = hpt_hash(vpn, 0);
 
-	spin_lock_irqsave( &hash_table_lock, flags );
+	for (j = 0; j < 2; j++) {
+		slot = (hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP;
+		for (i = 0; i < HPTES_PER_GROUP; i++) {
+			hpte_dw0.dword0 = rpa_lpar_hpte_getword0(slot);
+			dw0 = hpte_dw0.dw0;
 
-	hpte_slot = ppc_md.hpte_selectslot( vpn );
-	hash = 0;
-	if ( hpte_slot < 0 ) {
-		if ( hpte_slot == 0x8000000000000000 ) {
-			udbg_printf("hash_page: ptep    = 0x%016lx\n", 
-				    (unsigned long)ptep );
-			udbg_printf("hash_page: ea      = 0x%016lx\n", ea );
-			udbg_printf("hash_page: vpn     = 0x%016lx\n", vpn );
-               
-			panic("hash_page: hpte already exists\n");
+			if ((dw0.avpn == (vpn >> 11)) && dw0.v &&
+			    (dw0.h == j)) {
+				/* HPTE matches */
+				if (j)
+					slot = -slot;
+				return slot;
+			}
+			++slot;
 		}
-		hash = 1;
-		hpte_slot = -hpte_slot;
+		hash = ~hash;
 	}
-	ppc_md.hpte_create_valid( hpte_slot, vpn, pa >> 12, hash, ptep,
-				  hpteflags, bolted );
-
-	if ( ptep ) {
-		/* Get existing pte flags */
-		pte = *ptep;
-		pte_val(pte) &= ~_PAGE_HPTEFLAGS;
-
-		/* Add in the has hpte flag */
-		pte_val(pte) |= _PAGE_HASHPTE;
-
-		/* Add in the _PAGE_SECONDARY flag */
-		pte_val(pte) |= hash << 15;
-
-		/* Add in the hpte slot */
-		pte_val(pte) |= (hpte_slot << 12) & _PAGE_GROUP_IX;
-               
-		/* Save the new pte.  */
-		*ptep = pte;
-               
-	}
-	spin_unlock_irqrestore( &hash_table_lock, flags );
-}
 
+	return -1;
+} 
 
-/* Create an HPTE and validate it
- *   It is assumed that the HPT slot currently is invalid.
- *   The HPTE is set with the vpn, rpn (converted to absolute)
- *   and flags
- */
-static void hpte_create_valid_iSeries(unsigned long slot, unsigned long vpn,
-				      unsigned long prpn, unsigned hash, 
-				      void * ptep, unsigned hpteflags, 
-				      unsigned bolted )
-{
-	/* Local copy of HPTE */
-	struct {
-		/* Local copy of first doubleword of HPTE */
-		union {
-			unsigned long d;
-			Hpte_dword0   h;
-		} dw0;
-		/* Local copy of second doubleword of HPTE */
-		union {
-			unsigned long     d;
-			Hpte_dword1       h;
-			Hpte_dword1_flags f;
-		} dw1;
-	} lhpte;
-	
-	unsigned long avpn = vpn >> 11;
-	unsigned long arpn = physRpn_to_absRpn( prpn );
+static long iSeries_hpte_find(unsigned long vpn)
+{
+	HPTE hpte;
+	long slot;
 
-	/* Fill in the local HPTE with absolute rpn, avpn and flags */
-	lhpte.dw1.d        = 0;
-	lhpte.dw1.h.rpn    = arpn;
-	lhpte.dw1.f.flags  = hpteflags;
-
-	lhpte.dw0.d        = 0;
-	lhpte.dw0.h.avpn   = avpn;
-	lhpte.dw0.h.h      = hash;
-	lhpte.dw0.h.bolted = bolted;
-	lhpte.dw0.h.v      = 1;
+	/*
+	 * The HvCallHpt_findValid interface is as follows:
+	 * 0xffffffffffffffff : No entry found.
+	 * 0x00000000xxxxxxxx : Entry found in primary group, slot x
+	 * 0x80000000xxxxxxxx : Entry found in secondary group, slot x
+	 */
+	slot = HvCallHpt_findValid(&hpte, vpn); 
+	if (hpte.dw0.dw0.v) {
+		if (slot < 0) {
+			slot &= 0x7fffffffffffffff;
+			slot = -slot;
+		}
+	} else {
+		slot = -1;
+	}
 
-	/* Now fill in the actual HPTE */
-	HvCallHpt_addValidate( slot, hash, (HPTE *)&lhpte );
+	return slot;
 }
 
-static void hpte_create_valid_pSeries(unsigned long slot, unsigned long vpn,
-				      unsigned long prpn, unsigned hash, 
-				      void * ptep, unsigned hpteflags, 
-				      unsigned bolted)
-{
-	/* Local copy of HPTE */
-	struct {
-		/* Local copy of first doubleword of HPTE */
-		union {
-			unsigned long d;
-			Hpte_dword0   h;
-		} dw0;
-		/* Local copy of second doubleword of HPTE */
-		union {
-			unsigned long     d;
-			Hpte_dword1       h;
-			Hpte_dword1_flags f;
-		} dw1;
-	} lhpte;
-	
-	unsigned long avpn = vpn >> 11;
-	unsigned long arpn = physRpn_to_absRpn( prpn );
+/*
+ * Functions used to invalidate a page table entry from the page table
+ * and tlb.
+ *
+ * Input : slot  : PTE index within the page table of the entry to invalidated
+ *         va    : Virtual address of the entry being invalidated
+ *         large : 1 = large page (16M)
+ *         local : 1 = Use tlbiel to only invalidate the local tlb 
+ */
+static void hpte_invalidate(unsigned long slot, 
+			    unsigned long secondary,
+			    unsigned long va,
+			    int large, int local)
+{
+	HPTE *hptep = htab_data.htab + slot;
+	Hpte_dword0 dw0;
+	unsigned long vpn, avpn;
+	unsigned long flags;
 
-	HPTE *hptep;
+	if (large)
+		vpn = va >> LARGE_PAGE_SHIFT;
+	else
+		vpn = va >> PAGE_SHIFT;
 
-	/* Fill in the local HPTE with absolute rpn, avpn and flags */
-	lhpte.dw1.d        = 0;
-	lhpte.dw1.h.rpn    = arpn;
-	lhpte.dw1.f.flags  = hpteflags;
-
-	lhpte.dw0.d        = 0;
-	lhpte.dw0.h.avpn   = avpn;
-	lhpte.dw0.h.h      = hash;
-	lhpte.dw0.h.bolted = bolted;
-	lhpte.dw0.h.v      = 1;
+	avpn = vpn >> 11;
 
-	/* Now fill in the actual HPTE */
-	hptep  = htab_data.htab  + slot;
+	dw0 = hptep->dw0.dw0;
 
-	/* Set the second dword first so that the valid bit
-	 * is the last thing set
+	/*
+	 * Do not remove bolted entries.  Alternatively, we could check
+	 * the AVPN, hash group, and valid bits.  By doing it this way,
+	 * it is common with the pSeries LPAR optimal path.
 	 */
-	
-	hptep->dw1.dword1 = lhpte.dw1.d;
+	if (dw0.bolted) return;
 
-	/* Guarantee the second dword is visible before
-	 * the valid bit
+	/* Invalidate the hpte. */
+	hptep->dw0.dword0 = 0;
+
+	/* Invalidate the tlb */
+	spin_lock_irqsave(&pSeries_tlbie_lock, flags);
+	_tlbie(va, large);
+	spin_unlock_irqrestore(&pSeries_tlbie_lock, flags);
+}
+
+static void rpa_lpar_hpte_invalidate(unsigned long slot, 
+				     unsigned long secondary,
+				     unsigned long va,
+				     int large, int local)
+{
+	unsigned long lpar_rc;
+	unsigned long dummy1, dummy2;
+
+	/* 
+	 * Don't remove a bolted entry.  This case can occur when we bolt
+	 * pages dynamically after initial boot.
 	 */
-	
-	__asm__ __volatile__ ("eieio" : : : "memory");
+	lpar_rc = plpar_pte_remove(H_ANDCOND, slot, (0x1UL << 4), 
+				   &dummy1, &dummy2);
+
+	if (lpar_rc != H_Success)
+		panic("Bad return code from invalidate rc = %lx\n", lpar_rc);
+}
 
-	/* Now set the first dword including the valid bit */
-	hptep->dw0.dword0 = lhpte.dw0.d;
+static void iSeries_hpte_invalidate(unsigned long slot, 
+				    unsigned long secondary,
+				    unsigned long va,
+				    int large, int local)
+{
+	HPTE lhpte;
+	unsigned long vpn, avpn;
 
-	__asm__ __volatile__ ("ptesync" : : : "memory");
+	if (large)
+		vpn = va >> LARGE_PAGE_SHIFT;
+	else
+		vpn = va >> PAGE_SHIFT;
+
+	avpn = vpn >> 11;
+
+	lhpte.dw0.dword0 = iSeries_hpte_getword0(slot);
+	
+	if ((lhpte.dw0.dw0.avpn == avpn) && 
+	    (lhpte.dw0.dw0.v) &&
+	    (lhpte.dw0.dw0.h == secondary)) {
+		HvCallHpt_invalidateSetSwBitsGet(slot, 0, 0);
+	}
 }
 
-/* find_linux_pte returns the address of a linux pte for a given 
- * effective address and directory.  If not found, it returns zero.
+/*
+ * Functions used to update page protection bits.
+ *
+ * Input : slot  : PTE index within the page table of the entry to update
+ *         newpp : new page protection bits
+ *         va    : Virtual address of the entry being updated
+ *         large : 1 = large page (16M)
+ * Output: 0 on success, -1 on failure
  */
+static long hpte_updatepp(unsigned long slot, 
+			  unsigned long secondary,
+			  unsigned long newpp,
+			  unsigned long va, int large)
+{
+	HPTE *hptep = htab_data.htab + slot;
+	Hpte_dword0 dw0;
+	Hpte_dword1 dw1;
+	unsigned long vpn, avpn;
+	unsigned long flags;
 
-pte_t  * find_linux_pte( pgd_t * pgdir, unsigned long ea )
-{
-	pgd_t *pg;
-	pmd_t *pm;
-	pte_t *pt = NULL;
-	pte_t pte;
-	pg = pgdir + pgd_index( ea );
-	if ( ! pgd_none( *pg ) ) {
+	if (large)
+		vpn = va >> LARGE_PAGE_SHIFT;
+	else
+		vpn = va >> PAGE_SHIFT;
 
-		pm = pmd_offset( pg, ea );
-		if ( ! pmd_none( *pm ) ) { 
-			pt = pte_offset( pm, ea );
-			pte = *pt;
-			if ( ! pte_present( pte ) )
-				pt = NULL;
-		}
-	}
+	avpn = vpn >> 11;
 
-	return pt;
+	dw0 = hptep->dw0.dw0;
+	if ((dw0.avpn == avpn) && 
+	    (dw0.v) && (dw0.h == secondary)) {
+		/* Turn off valid bit in HPTE */
+		dw0.v = 0;
+		hptep->dw0.dw0 = dw0;
+		
+		/* Ensure it is out of the tlb too */
+		spin_lock_irqsave(&pSeries_tlbie_lock, flags);
+		_tlbie(va, large);
+		spin_unlock_irqrestore(&pSeries_tlbie_lock, flags);
+		
+		/* Insert the new pp bits into the HPTE */
+		dw1 = hptep->dw1.dw1;
+		dw1.pp = newpp;
+		hptep->dw1.dw1 = dw1;
+		
+		/* Ensure it is visible before validating */
+		__asm__ __volatile__ ("eieio" : : : "memory");
+		
+		/* Turn the valid bit back on in HPTE */
+		dw0.v = 1;
+		hptep->dw0.dw0 = dw0;
+		
+		__asm__ __volatile__ ("ptesync" : : : "memory");
+		
+		return 0;
+	}
 
+	return -1;
 }
 
-static inline unsigned long computeHptePP( unsigned long pte )
-{
-	return (     pte & _PAGE_USER )           |
-		( ( ( pte & _PAGE_USER )    >> 1 ) &
-		  ( ( ~( ( pte >> 2 ) &		/* _PAGE_RW */
-		         ( pte >> 7 ) ) ) &     /* _PAGE_DIRTY */
-			 1 ) );
+static long rpa_lpar_hpte_updatepp(unsigned long slot, 
+				   unsigned long secondary,
+				   unsigned long newpp,
+				   unsigned long va, int large)
+{
+	unsigned long lpar_rc;
+	unsigned long flags = (newpp & 7);
+	unsigned long avpn = va >> 23;
+	HPTE hpte;
+
+	lpar_rc = plpar_pte_read(0, slot, &hpte.dw0.dword0, &hpte.dw1.dword1);
+
+	if ((hpte.dw0.dw0.avpn == avpn) &&
+	    (hpte.dw0.dw0.v) && 
+	    (hpte.dw0.dw0.h == secondary)) {
+		lpar_rc = plpar_pte_protect(flags, slot, 0);
+		if (lpar_rc != H_Success)
+			panic("bad return code from pte protect rc = %lx\n", 
+			      lpar_rc);
+		return 0;
+	}
+
+	return -1;
 }
 
-static void hpte_updatepp_iSeries(long slot, unsigned long newpp, unsigned long va)
+static long iSeries_hpte_updatepp(unsigned long slot, 
+				  unsigned long secondary,
+				  unsigned long newpp, 
+				  unsigned long va, int large)
 {
-	HvCallHpt_setPp( slot, newpp );
+	unsigned long vpn, avpn;
+	HPTE hpte;
+
+	if (large)
+		vpn = va >> LARGE_PAGE_SHIFT;
+	else
+		vpn = va >> PAGE_SHIFT;
+
+	avpn = vpn >> 11;
+
+	HvCallHpt_get(&hpte, slot);
+	if ((hpte.dw0.dw0.avpn == avpn) && 
+	    (hpte.dw0.dw0.v) &&
+	    (hpte.dw0.dw0.h == secondary)) {
+		HvCallHpt_setPp(slot, newpp);
+		return 0;
+	}
+	return -1;
 }
 
-static void hpte_updatepp_pSeries(long slot, unsigned long newpp, unsigned long va)
+/*
+ * Functions used to update the page protection bits. Intended to be used 
+ * to create guard pages for kernel data structures on pages which are bolted
+ * in the HPT. Assumes pages being operated on will not be stolen.
+ * Does not work on large pages. No need to lock here because we are the 
+ * only user.
+ * 
+ * Input : newpp : page protection flags
+ *         ea    : effective kernel address to bolt.
+ */
+static void hpte_updateboltedpp(unsigned long newpp, unsigned long ea)
 {
-	/* Local copy of first doubleword of HPTE */
-	union {
-		unsigned long d;
-		Hpte_dword0   h;
-	} hpte_dw0;
-	
-	/* Local copy of second doubleword of HPTE */
-	union {
-		unsigned long     d;
-		Hpte_dword1       h;
-		Hpte_dword1_flags f;
-	} hpte_dw1;	
-
-	HPTE *  hptep  = htab_data.htab  + slot;
-
-	/* Turn off valid bit in HPTE */
-	hpte_dw0.d = hptep->dw0.dword0;
-	hpte_dw0.h.v = 0;
-	hptep->dw0.dword0 = hpte_dw0.d;
+	unsigned long vsid, va, vpn, flags;
+	long slot;
+	HPTE *hptep;
+
+	vsid = get_kernel_vsid(ea);
+	va = (vsid << 28) | (ea & 0x0fffffff);
+	vpn = va >> PAGE_SHIFT;
+
+	slot = hpte_find(vpn);
+	if (slot == -1)
+		panic("could not find page to bolt\n");
+	hptep = htab_data.htab + slot;
+
+	set_pp_bit(newpp, hptep);
 
 	/* Ensure it is out of the tlb too */
-	_tlbie( va );
+	spin_lock_irqsave(&pSeries_tlbie_lock, flags);
+	_tlbie(va, 0);
+	spin_unlock_irqrestore(&pSeries_tlbie_lock, flags);
+}
 
-	/* Insert the new pp bits into the HPTE */
-	hpte_dw1.d = hptep->dw1.dword1;
-	hpte_dw1.h.pp = newpp;
-	hptep->dw1.dword1 = hpte_dw1.d;
+static void rpa_lpar_hpte_updateboltedpp(unsigned long newpp, unsigned long ea)
+{
+	unsigned long lpar_rc;
+	unsigned long vsid, va, vpn, flags;
+	long slot;
 
-	/* Ensure it is visible before validating */
-	__asm__ __volatile__ ("eieio" : : : "memory");
+	vsid = get_kernel_vsid(ea);
+	va = (vsid << 28) | (ea & 0x0fffffff);
+	vpn = va >> PAGE_SHIFT;
 
-	/* Turn the valid bit back on in HPTE */
-	hpte_dw0.h.v = 1;
-	hptep->dw0.dword0 = hpte_dw0.d;
+	slot = rpa_lpar_hpte_find(vpn);
+	if (slot == -1)
+		panic("updateboltedpp: Could not find page to bolt\n");
 
-	__asm__ __volatile__ ("ptesync" : : : "memory");
+	flags = newpp & 3;
+	lpar_rc = plpar_pte_protect(flags, slot, 0);
+
+	if (lpar_rc != H_Success)
+		panic("Bad return code from pte bolted protect rc = %lx\n",
+		      lpar_rc); 
 }
 
-/*
- * Update the page protection bits.  Intended to be used to create
- * guard pages for kernel data structures on pages which are bolted
- * in the HPT.  Assumes pages being operated on will not be stolen. 
- */
-void hpte_updateboltedpp_iSeries(unsigned long newpp, unsigned long ea )
+void iSeries_hpte_updateboltedpp(unsigned long newpp, unsigned long ea)
 {
 	unsigned long vsid,va,vpn;
 	long slot;
@@ -715,518 +960,408 @@
 	va = ( vsid << 28 ) | ( ea & 0x0fffffff );
 	vpn = va >> PAGE_SHIFT;
 
-	slot = ppc_md.hpte_find( vpn );
-	HvCallHpt_setPp( slot, newpp );
-}
+	slot = iSeries_hpte_find(vpn); 
+	if (slot == -1)
+		panic("updateboltedpp: Could not find page to bolt\n");
 
-
-static __inline__ void set_pp_bit(unsigned long  pp, HPTE *addr)
-{
-	unsigned long old;
-	unsigned long *p = (unsigned long *)(&(addr->dw1));
-
-	__asm__ __volatile__(
-        "1:	ldarx	%0,0,%3\n\
-                rldimi  %0,%2,0,62\n\
-                stdcx.	%0,0,%3\n\
-            	bne	1b"
-        : "=&r" (old), "=m" (*p)
-        : "r" (pp), "r" (p), "m" (*p)
-        : "cc");
+	HvCallHpt_setPp(slot, newpp);
 }
 
 /*
- * Update the page protection bits.  Intended to be used to create
- * guard pages for kernel data structures on pages which are bolted
- * in the HPT.  Assumes pages being operated on will not be stolen. 
+ * Functions used to insert new hardware page table entries.
+ * Will castout non-bolted entries as necessary using a random
+ * algorithm.
+ *
+ * Input : vpn      : virtual page number
+ *         prpn     : real page number in absolute space
+ *         hpteflags: page protection flags
+ *         bolted   : 1 = bolt the page
+ *         large    : 1 = large page (16M)
+ * Output: hsss, where h = hash group, sss = slot within that group
  */
-void hpte_updateboltedpp_pSeries(unsigned long newpp, unsigned long ea)
+static long hpte_insert(unsigned long vpn, unsigned long prpn,
+			unsigned long hpteflags, int bolted, int large)
 {
-	unsigned long vsid,va,vpn,flags;
-	long slot;
 	HPTE *hptep;
+	Hpte_dword0 dw0;
+	HPTE lhpte;
+	int i, secondary;
+	unsigned long hash = hpt_hash(vpn, 0);
+	unsigned long avpn = vpn >> 11;
+	unsigned long arpn = physRpn_to_absRpn(prpn);
+	unsigned long hpte_group;
 
-	vsid = get_kernel_vsid( ea );
-	va = ( vsid << 28 ) | ( ea & 0x0fffffff );
-	vpn = va >> PAGE_SHIFT;
+repeat:
+	secondary = 0;
+	hpte_group = ((hash & htab_data.htab_hash_mask) *
+		      HPTES_PER_GROUP) & ~0x7UL;
+	hptep = htab_data.htab + hpte_group;
+
+	for (i = 0; i < HPTES_PER_GROUP; i++) {
+		dw0 = hptep->dw0.dw0;
+		if (!dw0.v) {
+			/* retry with lock held */
+			dw0 = hptep->dw0.dw0;
+			if (!dw0.v)
+				break;
+		}
+		hptep++;
+	}
 
-	slot = ppc_md.hpte_find( vpn );
-	hptep = htab_data.htab  + slot;
+	if (i == HPTES_PER_GROUP) {
+		secondary = 1;
+		hpte_group = ((~hash & htab_data.htab_hash_mask) *
+			      HPTES_PER_GROUP) & ~0x7UL;
+		hptep = htab_data.htab + hpte_group;
+
+		for (i = 0; i < HPTES_PER_GROUP; i++) {
+			dw0 = hptep->dw0.dw0;
+			if (!dw0.v) {
+				/* retry with lock held */
+				dw0 = hptep->dw0.dw0;
+				if (!dw0.v)
+					break;
+			}
+			hptep++;
+		}
+		if (i == HPTES_PER_GROUP) {
+			if (mftb() & 0x1)
+				hpte_group=((hash & htab_data.htab_hash_mask)* 
+					    HPTES_PER_GROUP) & ~0x7UL;
+			
+			hpte_remove(hpte_group);
+			goto repeat;
+		}
+	}
 
-	set_pp_bit(newpp , hptep);
+	lhpte.dw1.dword1      = 0;
+	lhpte.dw1.dw1.rpn     = arpn;
+	lhpte.dw1.flags.flags = hpteflags;
 
-	/* Ensure it is out of the tlb too */
-	spin_lock_irqsave( &hash_table_lock, flags );
-	_tlbie( va );
-	spin_unlock_irqrestore( &hash_table_lock, flags );
-}
+	lhpte.dw0.dword0      = 0;
+	lhpte.dw0.dw0.avpn    = avpn;
+	lhpte.dw0.dw0.h       = secondary;
+	lhpte.dw0.dw0.bolted  = bolted;
+	lhpte.dw0.dw0.v       = 1;
 
+	if (large) lhpte.dw0.dw0.l = 1;
 
+	hptep->dw1.dword1 = lhpte.dw1.dword1;
 
-/* This is called very early. */
-void hpte_init_iSeries(void)
-{
-	ppc_md.hpte_invalidate   = hpte_invalidate_iSeries;
-	ppc_md.hpte_updatepp     = hpte_updatepp_iSeries;
-	ppc_md.hpte_updateboltedpp = hpte_updateboltedpp_iSeries;
-	ppc_md.hpte_getword0     = hpte_getword0_iSeries;
-	ppc_md.hpte_selectslot   = hpte_selectslot_iSeries;
-	ppc_md.hpte_create_valid = hpte_create_valid_iSeries;
-	ppc_md.hpte_find	 = hpte_find_iSeries;
+	/* Guarantee the second dword is visible before the valid bit */
+	__asm__ __volatile__ ("eieio" : : : "memory");
+
+	/*
+	 * Now set the first dword including the valid bit
+	 * NOTE: this also unlocks the hpte
+	 */
+	hptep->dw0.dword0 = lhpte.dw0.dword0;
+
+	__asm__ __volatile__ ("ptesync" : : : "memory");
+
+	return ((secondary << 3) | i);
 }
-void hpte_init_pSeries(void)
+
+static long rpa_lpar_hpte_insert(unsigned long vpn, unsigned long prpn,
+				 unsigned long hpteflags,
+				 int bolted, int large)
 {
-	ppc_md.hpte_invalidate   = hpte_invalidate_pSeries;
-	ppc_md.hpte_updatepp     = hpte_updatepp_pSeries;
-	ppc_md.hpte_updateboltedpp = hpte_updateboltedpp_pSeries;
-	ppc_md.hpte_getword0     = hpte_getword0_pSeries;
-	ppc_md.hpte_selectslot   = hpte_selectslot_pSeries;
-	ppc_md.hpte_create_valid = hpte_create_valid_pSeries;
-	ppc_md.hpte_find	 = hpte_find_pSeries;
+	/* XXX fix for large page */
+	unsigned long lpar_rc;
+	unsigned long flags;
+	unsigned long slot;
+	HPTE lhpte;
+	int secondary;
+	unsigned long hash = hpt_hash(vpn, 0);
+	unsigned long avpn = vpn >> 11;
+	unsigned long arpn = physRpn_to_absRpn(prpn);
+	unsigned long hpte_group;
+
+	/* Fill in the local HPTE with absolute rpn, avpn and flags */
+	lhpte.dw1.dword1      = 0;
+	lhpte.dw1.dw1.rpn     = arpn;
+	lhpte.dw1.flags.flags = hpteflags;
+
+	lhpte.dw0.dword0      = 0;
+	lhpte.dw0.dw0.avpn    = avpn;
+	lhpte.dw0.dw0.bolted  = bolted;
+	lhpte.dw0.dw0.v       = 1;
+
+	if (large) lhpte.dw0.dw0.l = 1;
+
+	/* Now fill in the actual HPTE */
+	/* Set CEC cookie to 0         */
+	/* Large page = 0              */
+	/* Zero page = 0               */
+	/* I-cache Invalidate = 0      */
+	/* I-cache synchronize = 0     */
+	/* Exact = 0                   */
+	flags = 0;
+
+	/* XXX why is this here? - Anton */
+	/*   -- Because at one point we hit a case where non cachable
+	 *      pages where marked coherent & this is rejected by the HV.
+	 *      Perhaps it is no longer an issue ... DRENG.
+	 */ 
+	if (hpteflags & (_PAGE_GUARDED|_PAGE_NO_CACHE))
+		lhpte.dw1.flags.flags &= ~_PAGE_COHERENT;
+
+repeat:
+	secondary = 0;
+	lhpte.dw0.dw0.h = secondary;
+	hpte_group = ((hash & htab_data.htab_hash_mask) *
+		      HPTES_PER_GROUP) & ~0x7UL;
+
+	__asm__ __volatile__ (
+		H_ENTER_r3
+		"mr    4, %2\n"
+                "mr    5, %3\n"
+                "mr    6, %4\n"
+                "mr    7, %5\n"
+                HSC    
+                "mr    %0, 3\n"
+                "mr    %1, 4\n"
+		: "=r" (lpar_rc), "=r" (slot)
+		: "r" (flags), "r" (hpte_group), "r" (lhpte.dw0.dword0),
+		"r" (lhpte.dw1.dword1)
+		: "r0", "r3", "r4", "r5", "r6", "r7", 
+		  "r8", "r9", "r10", "r11", "r12", "cc");
+
+	if (lpar_rc == H_PTEG_Full) {
+		secondary = 1;
+		lhpte.dw0.dw0.h = secondary;
+		hpte_group = ((~hash & htab_data.htab_hash_mask) *
+			      HPTES_PER_GROUP) & ~0x7UL;
+
+		__asm__ __volatile__ (
+			      H_ENTER_r3
+			      "mr    4, %2\n"
+			      "mr    5, %3\n"
+			      "mr    6, %4\n"
+			      "mr    7, %5\n"
+			      HSC    
+			      "mr    %0, 3\n"
+			      "mr    %1, 4\n"
+			      : "=r" (lpar_rc), "=r" (slot)
+			      : "r" (flags), "r" (hpte_group), "r" (lhpte.dw0.dword0),
+			      "r" (lhpte.dw1.dword1)
+			      : "r0", "r3", "r4", "r5", "r6", "r7",
+			        "r8", "r9", "r10", "r11", "r12", "cc");
+		if (lpar_rc == H_PTEG_Full) {
+			if (mftb() & 0x1)
+				hpte_group=((hash & htab_data.htab_hash_mask)* 
+					    HPTES_PER_GROUP) & ~0x7UL;
+			
+			rpa_lpar_hpte_remove(hpte_group);
+			goto repeat;
+		}
+	}
+
+	if (lpar_rc != H_Success)
+		panic("Bad return code from pte enter rc = %lx\n", lpar_rc);
+
+	return ((secondary << 3) | (slot & 0x7));
 }
 
-/* Handle a fault by adding an HPTE 
- * If the address can't be determined to be valid
- * via Linux page tables, return 1.  If handled
- * return 0
- */
-int hash_page( unsigned long ea, unsigned long access )
+static long iSeries_hpte_insert(unsigned long vpn, unsigned long prpn,
+				unsigned long hpteflags,
+				int bolted, int large)
 {
-	int rc = 1;
-	void * pgdir = NULL;
-	unsigned long va, vsid, vpn;
-	unsigned long newpp, hash_ind, prpn;
-	unsigned long hpteflags, regionid;
+	HPTE lhpte;
+	unsigned long hash, hpte_group;
+	unsigned long avpn = vpn >> 11;
+	unsigned long arpn = physRpn_to_absRpn( prpn );
+	int secondary = 0;
 	long slot;
-	struct mm_struct * mm;
-	pte_t old_pte, new_pte, *ptep;
 
-	/* Check for invalid addresses. */
-	if (!IS_VALID_EA(ea)) {
-		return 1;
+	hash = hpt_hash(vpn, 0);
+
+repeat:
+	slot = HvCallHpt_findValid(&lhpte, vpn);
+	if (lhpte.dw0.dw0.v) {
+		panic("select_hpte_slot found entry already valid\n");
 	}
 
-	regionid =  REGION_ID(ea);
-	switch ( regionid ) {
-	case USER_REGION_ID:
-		mm = current->mm;
-		if ( mm == NULL ) {
-			PPCDBG(PPCDBG_MM, "hash_page returning; mm = 0\n"); 
-			return 1;
+	if (slot == -1) { /* No available entry found in either group */
+		if (mftb() & 0x1) {
+			hpte_group=((hash & htab_data.htab_hash_mask)* 
+				    HPTES_PER_GROUP) & ~0x7UL;
+		} else {
+			hpte_group=((~hash & htab_data.htab_hash_mask)* 
+				    HPTES_PER_GROUP) & ~0x7UL;
 		}
-		vsid = get_vsid(mm->context, ea );
-		break;
-	case IO_REGION_ID:
-		mm = &ioremap_mm;
-		vsid = get_kernel_vsid( ea );
-		break;
-	case VMALLOC_REGION_ID:
-		mm = &init_mm;
-		vsid = get_kernel_vsid( ea );
-		break;
-	case IO_UNMAPPED_REGION_ID:
-		udbg_printf("EEH Error ea = 0x%lx\n", ea);
- 		PPCDBG_ENTER_DEBUGGER();
-		panic("EEH Error ea = 0x%lx\n", ea);
-		break;
-	case KERNEL_REGION_ID:
-		/* As htab_initialize is now, we shouldn't ever get here since
-		 * we're bolting the entire 0xC0... region.
-		 */
-		udbg_printf("Little faulted on kernel address 0x%lx\n", ea);
- 		PPCDBG_ENTER_DEBUGGER();
-		panic("Little faulted on kernel address 0x%lx\n", ea);
-		break;
-	default:
-		/* Not a valid range, send the problem up to do_page_fault */
-		return 1;
-		break;
-	}
 
-	/* Search the Linux page table for a match with va */
-        va = ( vsid << 28 ) | ( ea & 0x0fffffff );
-	vpn = va >> PAGE_SHIFT;
-	pgdir = mm->pgd;
-	PPCDBG(PPCDBG_MM, "hash_page ea = 0x%16.16lx, va = 0x%16.16lx\n          current = 0x%16.16lx, access = %lx\n", ea, va, current, access); 
-                if ( pgdir == NULL ) {
-                return 1;
-	}
-	
-	/* Lock the Linux page table to prevent mmap and kswapd
-	 * from modifying entries while we search and update
-	 */
-	
-	spin_lock( &mm->page_table_lock );
-	
-	ptep = find_linux_pte( pgdir, ea );
-	/* If no pte found, send the problem up to do_page_fault */
-	if ( ! ptep ) {
-	  spin_unlock( &mm->page_table_lock );
-	  return 1;
-	}
-	
-	/* Acquire the hash table lock to guarantee that the linux
-	 * pte we fetch will not change
-	 */
-	spin_lock( &hash_table_lock );
-	
-	old_pte = *ptep;
-	
-	/* If the pte is not "present" (valid), send the problem
-	 * up to do_page_fault.
-	 */
-	if ( ! pte_present( old_pte ) ) {
-	  spin_unlock( &hash_table_lock );
-	  spin_unlock( &mm->page_table_lock );
-	  return 1;
-	}
-	
-	/* At this point we have found a pte (which was present).
-	 * The spinlocks prevent this status from changing
-	 * The hash_table_lock prevents the _PAGE_HASHPTE status
-	 * from changing (RPN, DIRTY and ACCESSED too)
-	 * The page_table_lock prevents the pte from being 
-	 * invalidated or modified
-	 */
+		hash = hpt_hash(vpn, 0);
+		iSeries_hpte_remove(hpte_group);
+		goto repeat;
+	} else if (slot < 0) {
+		slot &= 0x7fffffffffffffff;
+		secondary = 1;
+	}
+
+	/* Create the HPTE */
+	lhpte.dw1.dword1      = 0;
+	lhpte.dw1.dw1.rpn     = arpn;
+	lhpte.dw1.flags.flags = hpteflags;
+
+	lhpte.dw0.dword0     = 0;
+	lhpte.dw0.dw0.avpn   = avpn;
+	lhpte.dw0.dw0.h      = secondary;
+	lhpte.dw0.dw0.bolted = bolted;
+	lhpte.dw0.dw0.v      = 1;
 
-/* At this point, we have a pte (old_pte) which can be used to build or update
- * an HPTE.   There are 5 cases:
- *
- * 1. There is a valid (present) pte with no associated HPTE (this is 
- *	the most common case)
- * 2. There is a valid (present) pte with an associated HPTE.  The
- *	current values of the pp bits in the HPTE prevent access because the
- *	user doesn't have appropriate access rights.
- * 3. There is a valid (present) pte with an associated HPTE.  The
- *	current values of the pp bits in the HPTE prevent access because we are
- *	doing software DIRTY bit management and the page is currently not DIRTY. 
- * 4. This is a Kernel address (0xC---) for which there is no page directory.
- *	There is an HPTE for this page, but the pp bits prevent access.
- *      Since we always set up kernel pages with R/W access for the kernel
- *	this case only comes about for users trying to access the kernel.
- *	This case is always an error and is not dealt with further here.
- * 5. This is a Kernel address (0xC---) for which there is no page directory.
- *	There is no HPTE for this page.
+	/* Now fill in the actual HPTE */
+	HvCallHpt_addValidate(slot, secondary, (HPTE *)&lhpte);
+	return ((secondary << 3) | (slot & 0x7));
+}
 
- * Check the user's access rights to the page.  If access should be prevented
- * then send the problem up to do_page_fault.
+/*
+ * Functions used to remove hardware page table entries.
+ *
+ * Input : hpte_group: PTE index of the first entry in a group
+ * Output: offset within the group of the entry removed or
+ *         -1 on failure
  */
-
-	access |= _PAGE_PRESENT;
-	if ( 0 == ( access & ~(pte_val(old_pte)) ) ) {
-		/*
-		 * Check if pte might have an hpte, but we have
-		 * no slot information
-		 */
-		if ( pte_val(old_pte) & _PAGE_HPTENOIX ) {
-			unsigned long slot;	
-			pte_val(old_pte) &= ~_PAGE_HPTEFLAGS;
-			slot = ppc_md.hpte_find( vpn );
-			if ( slot != -1 ) {
-				if ( slot < 0 ) {
-					pte_val(old_pte) |= _PAGE_SECONDARY;
-					slot = -slot;
-				}
-				pte_val(old_pte) |= ((slot << 12) & _PAGE_GROUP_IX) | _PAGE_HASHPTE;
-				
-			}
+static long hpte_remove(unsigned long hpte_group)
+{
+	HPTE *hptep;
+	Hpte_dword0 dw0;
+	int i;
+	int slot_offset;
+	unsigned long vsid, group, pi, pi_high;
+	unsigned long slot;
+	unsigned long flags;
+	int large;
+	unsigned long va;
+
+	/* pick a random slot to start at */
+	slot_offset = mftb() & 0x7;
+
+	for (i = 0; i < HPTES_PER_GROUP; i++) {
+		hptep = htab_data.htab + hpte_group + slot_offset;
+		dw0 = hptep->dw0.dw0;
+
+		if (dw0.v && !dw0.bolted) {
+			/* retry with lock held */
+			dw0 = hptep->dw0.dw0;
+			if (dw0.v && !dw0.bolted)
+				break;
 		}
 
-		/* User has appropriate access rights. */
-		new_pte = old_pte;
-		/* If the attempted access was a store */
-		if ( access & _PAGE_RW )
-			pte_val(new_pte) |= _PAGE_ACCESSED |
-				_PAGE_DIRTY;
-		else
-			pte_val(new_pte) |= _PAGE_ACCESSED;
-
-		/* Only cases 1, 3 and 5 still in play */
-
-		newpp = computeHptePP( pte_val(new_pte) );
-
-		/* Check if pte already has an hpte (case 3) */
-		if ( pte_val(old_pte) & _PAGE_HASHPTE ) {
-			/* There MIGHT be an HPTE for this pte */
-			unsigned long hash, slot, secondary;
-			/* Local copy of first doubleword of HPTE */
-			union {
-				unsigned long d;
-				Hpte_dword0   h;
-			} hpte_dw0;
-			hash = hpt_hash(vpn, 0);
-			secondary = (pte_val(old_pte) & _PAGE_SECONDARY) >> 15;
-			if ( secondary )
-				hash = ~hash;
-			slot = (hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP;
-			slot += (pte_val(old_pte) & _PAGE_GROUP_IX) >> 12;
-			/* If there is an HPTE for this page it is indexed by slot */
-			hpte_dw0.d = ppc_md.hpte_getword0( slot );
-			if ( (hpte_dw0.h.avpn == (vpn >> 11) ) &&
-			     (hpte_dw0.h.v) && 
-			     (hpte_dw0.h.h == secondary ) ){
-				/* HPTE matches */
-				ppc_md.hpte_updatepp( slot, newpp, va );
-				if ( !pte_same( old_pte, new_pte ) )
-					*ptep = new_pte;
-			}
-			else {
-				/* HPTE is not for this pte */
-				pte_val(old_pte) &= ~_PAGE_HPTEFLAGS;
-			}
-		}
-		if ( !( pte_val(old_pte) & _PAGE_HASHPTE ) ) {
-			/* Cases 1 and 5 */
-			/* For these cases we need to create a new
-			 * HPTE and update the linux pte (for
-			 * case 1).  For case 5 there is no linux pte.
-			 *
-			 * Find an available HPTE slot
- 			 */
-			slot = ppc_md.hpte_selectslot( vpn );
-
-			/* If hpte_selectslot returns 0x8000000000000000 that means
-			 * that there was already an entry in the HPT even though
-			 * the linux PTE said there couldn't be. 
-			 */
-			/* Debug code */
-			if ( slot == 0x8000000000000000 ) {
-				unsigned long xold_pte = pte_val(old_pte);
-				unsigned long xnew_pte = pte_val(new_pte);
-				
-				udbg_printf("hash_page: ptep    = 0x%016lx\n", (unsigned long)ptep );
-				udbg_printf("hash_page: old_pte = 0x%016lx\n", xold_pte );
-				udbg_printf("hash_page: new_pte = 0x%016lx\n", xnew_pte );
-				udbg_printf("hash_page: ea      = 0x%016lx\n", ea );
-				udbg_printf("hash_page: va      = 0x%016lx\n", va );
-				udbg_printf("hash_page: access  = 0x%016lx\n", access );
-			
-				panic("hash_page: hpte already exists\n");
-			}
-			hash_ind = 0;
-			if ( slot < 0 ) {
-				slot = -slot;
-				hash_ind = 1;
-			}
+		slot_offset++;
+		slot_offset &= 0x7;
+	}
 
-			/* Set the physical address */
-			prpn = pte_val(old_pte) >> PTE_SHIFT;
-			
-			if ( ptep ) {
-				/* Update the linux pte with the HPTE slot */
-				pte_val(new_pte) &= ~_PAGE_HPTEFLAGS;
-				pte_val(new_pte) |= hash_ind << 15;
-				pte_val(new_pte) |= (slot<<12) & _PAGE_GROUP_IX;
-				pte_val(new_pte) |= _PAGE_HASHPTE;
-				/* No need to use ldarx/stdcx here because all
-				 * who might be updating the pte will hold the page_table_lock
-				 * or the hash_table_lock (we hold both)
-				 */
-				*ptep = new_pte;
-			}
+	if (i == HPTES_PER_GROUP)
+		return -1;
 
-			/* copy appropriate flags from linux pte */
-			hpteflags = (pte_val(new_pte) & 0x1f8) | newpp;
+	large = dw0.l;
 
-			/* Create the HPTE */
-			ppc_md.hpte_create_valid( slot, vpn, prpn, hash_ind, ptep, hpteflags, 0 ); 
+	/* Invalidate the hpte. NOTE: this also unlocks it */
+	hptep->dw0.dword0 = 0;
 
-		}
+	/* Invalidate the tlb */
+	vsid = dw0.avpn >> 5;
+	slot = hptep - htab_data.htab;
+	group = slot >> 3;
+	if (dw0.h)
+		group = ~group;
+	pi = (vsid ^ group) & 0x7ff;
+	pi_high = (dw0.avpn & 0x1f) << 11;
+	pi |= pi_high;
 
-		/* Indicate success */
-		rc = 0;
-	}		
-	
-	spin_unlock( &hash_table_lock );
-	if (ptep)
-		spin_unlock( &mm->page_table_lock );
+	if (large)
+		va = pi << LARGE_PAGE_SHIFT;
+	else
+		va = pi << PAGE_SHIFT;
+
+	spin_lock_irqsave(&pSeries_tlbie_lock, flags);
+	_tlbie(va, large);
+	spin_unlock_irqrestore(&pSeries_tlbie_lock, flags);
 
-	return rc;
+	return i;
 }
 
-void flush_hash_page( unsigned long context, unsigned long ea, pte_t *ptep )
+static long rpa_lpar_hpte_remove(unsigned long hpte_group)
 {
-	unsigned long vsid, vpn, va, hash, secondary, slot, flags;
-	/* Local copy of first doubleword of HPTE */
-	union {
-		unsigned long d;
-		Hpte_dword0   h;
-	} hpte_dw0;
-	pte_t pte;
+	unsigned long slot_offset;
+	unsigned long lpar_rc;
+	int i;
+	unsigned long dummy1, dummy2;
 
-	if ( (ea >= USER_START ) && ( ea <= USER_END ) )
-		vsid = get_vsid( context, ea );
-	else
-		vsid = get_kernel_vsid( ea );
-	va = (vsid << 28) | (ea & 0x0fffffff);
-	vpn = va >> PAGE_SHIFT;
-	hash = hpt_hash(vpn, 0);
+	/* pick a random slot to start at */
+	slot_offset = mftb() & 0x7;
 
-	spin_lock_irqsave( &hash_table_lock, flags);
-	pte = __pte(pte_update(ptep, _PAGE_HPTEFLAGS, 0));
-	if ( pte_val(pte) & _PAGE_HASHPTE ) {
-		secondary = (pte_val(pte) & _PAGE_SECONDARY) >> 15;
-		if ( secondary )
-			hash = ~hash;
-		slot = (hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP;
-		slot += (pte_val(pte) & _PAGE_GROUP_IX) >> 12;
-		/* If there is an HPTE for this page it is indexed by slot */
+	for (i = 0; i < HPTES_PER_GROUP; i++) {
 
-		hpte_dw0.d = ppc_md.hpte_getword0( slot );
-		if ( (hpte_dw0.h.avpn == (vpn >> 11) ) &&
-		     (hpte_dw0.h.v) && 
-		     (hpte_dw0.h.h == secondary ) ){
-			/* HPTE matches */
-			ppc_md.hpte_invalidate( slot );	
-		}
-		else {
-			unsigned k;
-			/* Temporarily lets check for the hpte in all possible slots */
-			for ( secondary = 0; secondary < 2; ++secondary ) {
-				hash = hpt_hash(vpn, 0);
-				if ( secondary )
-					hash = ~hash;
-				slot = (hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP;
-				for ( k=0; k<8; ++k ) {
-					hpte_dw0.d = ppc_md.hpte_getword0( slot+k );
-					if ( ( hpte_dw0.h.avpn == (vpn >> 11) ) &&
-					     ( hpte_dw0.h.v ) &&
-					     ( hpte_dw0.h.h == secondary ) ) {
-						while (1) ;
-					}
-				}
-			}
-		}
+		/* Don't remove a bolted entry */
+		lpar_rc = plpar_pte_remove(H_ANDCOND, hpte_group + slot_offset,
+					   (0x1UL << 4), &dummy1, &dummy2);
+
+		if (lpar_rc == H_Success)
+			return i;
+
+		if (lpar_rc != H_Not_Found)
+			panic("Bad return code from pte remove rc = %lx\n",
+			      lpar_rc);
+
+		slot_offset++;
+		slot_offset &= 0x7;
 	}
-	spin_unlock_irqrestore( &hash_table_lock, flags );
+
+	return -1;
 }
 
-int proc_dol2crvec(ctl_table *table, int write, struct file *filp,
-		   void *buffer, size_t *lenp)
+static long iSeries_hpte_remove(unsigned long hpte_group)
 {
-	int vleft, first=1, len, left, val;
-#define TMPBUFLEN 256
-	char buf[TMPBUFLEN], *p;
-	static const char *sizestrings[4] = {
-		"2MB", "256KB", "512KB", "1MB"
-	};
-	static const char *clockstrings[8] = {
-		"clock disabled", "+1 clock", "+1.5 clock", "reserved(3)",
-		"+2 clock", "+2.5 clock", "+3 clock", "reserved(7)"
-	};
-	static const char *typestrings[4] = {
-		"flow-through burst SRAM", "reserved SRAM",
-		"pipelined burst SRAM", "pipelined late-write SRAM"
-	};
-	static const char *holdstrings[4] = {
-		"0.5", "1.0", "(reserved2)", "(reserved3)"
-	};
-
-	if ( ((_get_PVR() >> 16) != 8) && ((_get_PVR() >> 16) != 12))
-		return -EFAULT;
-	
-	if ( /*!table->maxlen ||*/ (filp->f_pos && !write)) {
-		*lenp = 0;
-		return 0;
-	}
-	
-	vleft = table->maxlen / sizeof(int);
-	left = *lenp;
-	
-	for (; left /*&& vleft--*/; first=0) {
-		if (write) {
-			while (left) {
-				char c;
-				if(get_user(c,(char *) buffer))
-					return -EFAULT;
-				if (!isspace(c))
-					break;
-				left--;
-				((char *) buffer)++;
-			}
-			if (!left)
-				break;
-			len = left;
-			if (len > TMPBUFLEN-1)
-				len = TMPBUFLEN-1;
-			if(copy_from_user(buf, buffer, len))
-				return -EFAULT;
-			buf[len] = 0;
-			p = buf;
-			if (*p < '0' || *p > '9')
-				break;
-			val = simple_strtoul(p, &p, 0);
-			len = p-buf;
-			if ((len < left) && *p && !isspace(*p))
-				break;
-			buffer += len;
-			left -= len;
-#if 0
-			/* DRENG need a def */
-			_set_L2CR(0);
-			_set_L2CR(val);
-			while ( _get_L2CR() & 0x1 )
-				/* wait for invalidate to finish */;
-#endif
-			  
-		} else {
-			p = buf;
-			if (!first)
-				*p++ = '\t';
-#if 0
-			/* DRENG need a def */
-			val = _get_L2CR();
-#endif
-			p += sprintf(p, "0x%08x: ", val);
-			p += sprintf(p, " %s", (val >> 31) & 1 ? "enabled" :
-				     "disabled");
-			p += sprintf(p, ", %sparity", (val>>30)&1 ? "" : "no ");
-			p += sprintf(p, ", %s", sizestrings[(val >> 28) & 3]);
-			p += sprintf(p, ", %s", clockstrings[(val >> 25) & 7]);
-			p += sprintf(p, ", %s", typestrings[(val >> 23) & 2]);
-			p += sprintf(p, "%s", (val>>22)&1 ? ", data only" : "");
-			p += sprintf(p, "%s", (val>>20)&1 ? ", ZZ enabled": "");
-			p += sprintf(p, ", %s", (val>>19)&1 ? "write-through" :
-				     "copy-back");
-			p += sprintf(p, "%s", (val>>18)&1 ? ", testing" : "");
-			p += sprintf(p, ", %sns hold",holdstrings[(val>>16)&3]);
-			p += sprintf(p, "%s", (val>>15)&1 ? ", DLL slow" : "");
-			p += sprintf(p, "%s", (val>>14)&1 ? ", diff clock" :"");
-			p += sprintf(p, "%s", (val>>13)&1 ? ", DLL bypass" :"");
-			
-			p += sprintf(p,"\n");
-			
-			len = strlen(buf);
-			if (len > left)
-				len = left;
-			if(copy_to_user(buffer, buf, len))
-				return -EFAULT;
-			left -= len;
-			buffer += len;
-			break;
+	unsigned long slot_offset;
+	int i;
+	HPTE lhpte;
+
+	/* Pick a random slot to start at */
+	slot_offset = mftb() & 0x7;
+
+	for (i = 0; i < HPTES_PER_GROUP; i++) {
+		lhpte.dw0.dword0 = 
+			iSeries_hpte_getword0(hpte_group + slot_offset);
+
+		if (!lhpte.dw0.dw0.bolted) {
+			HvCallHpt_invalidateSetSwBitsGet(hpte_group + 
+							 slot_offset, 0, 0);
+			return i;
 		}
-	}
 
-	if (!write && !first && left) {
-		if(put_user('\n', (char *) buffer))
-			return -EFAULT;
-		left--, buffer++;
-	}
-	if (write) {
-		p = (char *) buffer;
-		while (left) {
-			char c;
-			if(get_user(c, p++))
-				return -EFAULT;
-			if (!isspace(c))
-				break;
-			left--;
-		}
+		slot_offset++;
+		slot_offset &= 0x7;
 	}
-	if (write && first)
-		return -EINVAL;
-	*lenp -= left;
-	filp->f_pos += *lenp;
-	return 0;
+
+	return -1;
+}
+
+void hpte_init_pSeries(void)
+{
+	ppc_md.hpte_invalidate     = hpte_invalidate;
+	ppc_md.hpte_updatepp       = hpte_updatepp;
+	ppc_md.hpte_updateboltedpp = hpte_updateboltedpp;
+	ppc_md.hpte_insert	   = hpte_insert;
+	ppc_md.hpte_remove	   = hpte_remove;
+}
+
+void pSeries_lpar_mm_init(void)
+{
+	ppc_md.hpte_invalidate     = rpa_lpar_hpte_invalidate;
+	ppc_md.hpte_updatepp       = rpa_lpar_hpte_updatepp;
+	ppc_md.hpte_updateboltedpp = rpa_lpar_hpte_updateboltedpp;
+	ppc_md.hpte_insert         = rpa_lpar_hpte_insert;
+	ppc_md.hpte_remove         = rpa_lpar_hpte_remove;
+}
+
+void hpte_init_iSeries(void)
+{
+	ppc_md.hpte_invalidate     = iSeries_hpte_invalidate;
+	ppc_md.hpte_updatepp       = iSeries_hpte_updatepp;
+	ppc_md.hpte_updateboltedpp = iSeries_hpte_updateboltedpp;
+	ppc_md.hpte_insert         = iSeries_hpte_insert;
+	ppc_md.hpte_remove         = iSeries_hpte_remove;
 }
 

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)