patch-2.4.22 linux-2.4.22/arch/x86_64/kernel/pci-gart.c

Next file: linux-2.4.22/arch/x86_64/kernel/pci-irq.c
Previous file: linux-2.4.22/arch/x86_64/kernel/pci-dma.c
Back to the patch index
Back to the overall index

diff -urN linux-2.4.21/arch/x86_64/kernel/pci-gart.c linux-2.4.22/arch/x86_64/kernel/pci-gart.c
@@ -8,25 +8,9 @@
  * See Documentation/DMA-mapping.txt for the interface specification.
  * 
  * Copyright 2002 Andi Kleen, SuSE Labs.
- * $Id: pci-gart.c,v 1.20 2003/03/12 08:23:29 ak Exp $
+ * $Id: pci-gart.c,v 1.27 2003/08/05 18:25:31 ak Exp $
  */
 
-/* 
- * Notebook:
-
-agpgart_be
- check if the simple reservation scheme is enough.
-
-possible future tuning: 
- fast path for sg streaming mappings 
- more intelligent flush strategy - flush only a single NB? flush only when
- gart area fills up and alloc_iommu wraps. 
- don't flush on allocation - need to unmap the gart area first to avoid prefetches
- by the CPU
- move boundary between IOMMU and AGP in GART dynamically
-  
-*/ 
-
 #include <linux/config.h>
 #include <linux/types.h>
 #include <linux/ctype.h>
@@ -36,6 +20,7 @@
 #include <linux/string.h>
 #include <linux/spinlock.h>
 #include <linux/pci.h>
+#include <linux/pci_ids.h>
 #include <linux/module.h>
 #include <asm/io.h>
 #include <asm/mtrr.h>
@@ -72,7 +57,8 @@
 
 #define for_all_nb(dev) \
 	pci_for_each_dev(dev) \
-		if (dev->bus->number == 0 && PCI_FUNC(dev->devfn) == 3 && \
+		if (dev->vendor == PCI_VENDOR_ID_AMD && dev->device==0x1103 &&\
+		    dev->bus->number == 0 && PCI_FUNC(dev->devfn) == 3 && \
 		    (PCI_SLOT(dev->devfn) >= 24) && (PCI_SLOT(dev->devfn) <= 31))
 
 #define EMERGENCY_PAGES 32 /* = 128KB */ 
@@ -90,21 +76,27 @@
 
 static unsigned long next_bit;  /* protected by iommu_bitmap_lock */
 
+static struct pci_dev *northbridges[NR_CPUS + 1];
+static u32 northbridge_flush_word[NR_CPUS + 1];
+static int need_flush; 	/* global flush state. set for each gart wrap */
 static unsigned long alloc_iommu(int size) 
 { 	
 	unsigned long offset, flags;
 
 	spin_lock_irqsave(&iommu_bitmap_lock, flags);	
-
 	offset = find_next_zero_string(iommu_gart_bitmap,next_bit,iommu_pages,size);
-	if (offset == -1) 
+	if (offset == -1) {
+		need_flush = 1;
 	       	offset = find_next_zero_string(iommu_gart_bitmap,0,next_bit,size);
+	}
 	if (offset != -1) { 
 		set_bit_string(iommu_gart_bitmap, offset, size); 
 		next_bit = offset+size; 
-		if (next_bit >= iommu_pages) 
+		if (next_bit >= iommu_pages) { 
+			need_flush = 1;
 			next_bit = 0;
 	} 
+	} 
 	spin_unlock_irqrestore(&iommu_bitmap_lock, flags);      
 	return offset;
 } 
@@ -114,20 +106,38 @@
 	unsigned long flags;
 	spin_lock_irqsave(&iommu_bitmap_lock, flags);
 	clear_bit_string(iommu_gart_bitmap, offset, size);
-	next_bit = offset;
 	spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
 } 
 
-static inline void flush_gart(void) 
+
+/* 
+ * Use global flush state to avoid races with multiple flushers.
+ */
+static void __flush_gart(void)
 { 
-	struct pci_dev *nb; 
-	for_all_nb(nb) { 
-		u32 flag; 
-		pci_read_config_dword(nb, 0x9c, &flag); /* could cache this */ 
-		/* could complain for PTE walk errors here (bit 1 of flag) */ 
-		flag |= 1; 
-		pci_write_config_dword(nb, 0x9c, flag); 
+	unsigned long flags;
+	int flushed = 0;
+	int i;
+
+	spin_lock_irqsave(&iommu_bitmap_lock, flags);
+	/* recheck flush count inside lock */
+	if (need_flush) { 
+		for (i = 0; northbridges[i]; i++) { 
+			pci_write_config_dword(northbridges[i], 0x9c, 
+					       northbridge_flush_word[i] | 1); 
+			flushed++;
+		} 
+		if (!flushed) 
+			printk("nothing to flush?\n");
+		need_flush = 0;
 	} 
+	spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
+} 
+
+static inline void flush_gart(void)
+{ 
+	if (need_flush)
+		__flush_gart();
 } 
 
 void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size,
@@ -150,9 +160,10 @@
 	if (memory == NULL) {
 		return NULL; 
 	} else {
-		int high = (unsigned long)virt_to_bus(memory) + size
-			>= 0xffffffff;
-		int mmu = high;
+		int high = 0, mmu;
+		if (((unsigned long)virt_to_bus(memory) + size) > 0xffffffffUL)
+			high = 1;
+		mmu = high;
 		if (force_mmu && !(gfp & GFP_DMA)) 
 			mmu = 1;
 		if (no_iommu) { 
@@ -199,25 +210,22 @@
 void pci_free_consistent(struct pci_dev *hwdev, size_t size,
 			 void *vaddr, dma_addr_t bus)
 {
-	u64 pte;
 	unsigned long iommu_page;
-	int i;
 
 	size = round_up(size, PAGE_SIZE); 
-	if (bus < iommu_bus_base || bus > iommu_bus_base + iommu_size) { 
-		free_pages((unsigned long)vaddr, get_order(size)); 		
-		return;
-	} 
-	size >>= PAGE_SHIFT;
-	iommu_page = (bus - iommu_bus_base) / PAGE_SIZE;
-	for (i = 0; i < size; i++) {
-		pte = iommu_gatt_base[iommu_page + i];
+	if (bus >= iommu_bus_base && bus <= iommu_bus_base + iommu_size) { 
+		unsigned pages = size >> PAGE_SHIFT;
+		iommu_page = (bus - iommu_bus_base) >> PAGE_SHIFT;
+		vaddr = __va(GPTE_DECODE(iommu_gatt_base[iommu_page]));
+		int i;
+		for (i = 0; i < pages; i++) {
+			u64 pte = iommu_gatt_base[iommu_page + i];
 		BUG_ON((pte & GPTE_VALID) == 0); 
 		iommu_gatt_base[iommu_page + i] = 0; 		
-		free_page((unsigned long) __va(GPTE_DECODE(pte)));
 	} 
-	flush_gart(); 
-	free_iommu(iommu_page, size);
+		free_iommu(iommu_page, pages);
+	}
+	free_pages((unsigned long)vaddr, get_order(size)); 		
 }
 
 #ifdef CONFIG_IOMMU_LEAK
@@ -251,8 +259,7 @@
 	 * Unfortunately the drivers cannot handle this operation properly.
 	 * Return some non mapped prereserved space in the aperture and 
 	 * let the Northbridge deal with it. This will result in garbage
-	 * in the IO operation. When the size exceeds the prereserved space
-	 * memory corruption will occur or random memory will be DMAed 
+	 * in the IO operation. When the size exceeds the prereserved spa	 * memory corruption will occur or random memory will be DMAed 
 	 * out. Hopefully no network devices use single mappings that big.
 	 */ 
 	
@@ -287,8 +294,8 @@
 	return mmu; 
 }
 
-dma_addr_t __pci_map_single(struct pci_dev *dev, void *addr, size_t size,
-			    int dir, int flush)
+dma_addr_t pci_map_single(struct pci_dev *dev, void *addr, size_t size,
+			    int dir)
 { 
 	unsigned long iommu_page;
 	unsigned long phys_mem, bus;
@@ -318,18 +325,12 @@
 		 */
 		iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem);
 
-#ifdef CONFIG_IOMMU_DEBUG
-		/* paranoia check */
-		BUG_ON(GPTE_DECODE(iommu_gatt_base[iommu_page+i]) != phys_mem); 
-#endif
-
 #ifdef CONFIG_IOMMU_LEAK
 		/* XXX need eventually caller of pci_map_sg */
 		if (iommu_leak_tab) 
 			iommu_leak_tab[iommu_page + i] = __builtin_return_address(0); 
 #endif
 	}
-	if (flush)
 	flush_gart(); 
 
 	bus = iommu_bus_base + iommu_page*PAGE_SIZE; 
@@ -343,12 +344,13 @@
 		      size_t size, int direction)
 {
 	unsigned long iommu_page; 
-	int i, npages;
+	int npages;
 	if (dma_addr < iommu_bus_base + EMERGENCY_PAGES*PAGE_SIZE || 
 	    dma_addr > iommu_bus_base + iommu_size)
 		return;
 	iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT;	
 	npages = round_up(size + (dma_addr & ~PAGE_MASK), PAGE_SIZE) >> PAGE_SHIFT;
+	int i;
 	for (i = 0; i < npages; i++) { 
 		iommu_gatt_base[iommu_page + i] = 0; 
 #ifdef CONFIG_IOMMU_LEAK
@@ -356,11 +358,10 @@
 			iommu_leak_tab[iommu_page + i] = 0; 
 #endif
 	}
-	flush_gart(); 
 	free_iommu(iommu_page, npages);
 }
 
-EXPORT_SYMBOL(__pci_map_single);
+EXPORT_SYMBOL(pci_map_single);
 EXPORT_SYMBOL(pci_unmap_single);
 
 static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size)
@@ -444,7 +445,7 @@
 		u32 ctl; 
 		u32 gatt_reg; 
 
-		gatt_reg = ((u64)gatt) >> 12; 
+		gatt_reg = __pa(gatt) >> 12; 
 		gatt_reg <<= 4; 
 		pci_write_config_dword(dev, 0x98, gatt_reg);
 		pci_read_config_dword(dev, 0x90, &ctl); 
@@ -534,7 +535,31 @@
 	iommu_gatt_base = agp_gatt_table + (iommu_start>>PAGE_SHIFT);
 	bad_dma_address = iommu_bus_base;
 
+	/* 
+         * Unmap the IOMMU part of the GART. The alias of the page is always mapped
+	 * with cache enabled and there is no full cache coherency across the GART
+	 * remapping. The unmapping avoids automatic prefetches from the CPU 
+	 * allocating cache lines in there. All CPU accesses are done via the 
+	 * direct mapping to the backing memory. The GART address is only used by PCI 
+	 * devices. 
+	 */
+	clear_kernel_mapping((unsigned long)__va(iommu_bus_base), iommu_size);
+
+	struct pci_dev *dev;
+	for_all_nb(dev) {
+		u32 flag; 
+		int cpu = PCI_SLOT(dev->devfn) - 24;
+		if (cpu >= NR_CPUS)
+			continue;
+		northbridges[cpu] = dev;
+
+		pci_read_config_dword(dev, 0x9c, &flag); /* cache flush word */
+		northbridge_flush_word[cpu] = flag; 
+	}
+
 	asm volatile("wbinvd" ::: "memory");
+
+	flush_gart();
 } 
 
 /* iommu=[size][,noagp][,off][,force][,noforce][,leak][,memaper[=order]]
@@ -543,8 +568,8 @@
    off   don't use the IOMMU
    leak  turn on simple iommu leak tracing (only when CONFIG_IOMMU_LEAK is on)
    memaper[=order] allocate an own aperture over RAM with size 32MB^order.
-   noforce don't force IOMMU usage. Should be fastest.
-   force  Force IOMMU and turn on unmap debugging.
+   noforce don't force IOMMU usage. Default
+   force  Force IOMMU for all devices.
 */
 __init int iommu_setup(char *opt) 
 { 

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)