Merge tag 'dma-buf-for-4.0-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux-drm-fsl-dcu.git] / drivers / iommu / intel-iommu.c
1 /*
2  * Copyright © 2006-2014 Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * Authors: David Woodhouse <dwmw2@infradead.org>,
14  *          Ashok Raj <ashok.raj@intel.com>,
15  *          Shaohua Li <shaohua.li@intel.com>,
16  *          Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
17  *          Fenghua Yu <fenghua.yu@intel.com>
18  */
19
20 #include <linux/init.h>
21 #include <linux/bitmap.h>
22 #include <linux/debugfs.h>
23 #include <linux/export.h>
24 #include <linux/slab.h>
25 #include <linux/irq.h>
26 #include <linux/interrupt.h>
27 #include <linux/spinlock.h>
28 #include <linux/pci.h>
29 #include <linux/dmar.h>
30 #include <linux/dma-mapping.h>
31 #include <linux/mempool.h>
32 #include <linux/memory.h>
33 #include <linux/timer.h>
34 #include <linux/iova.h>
35 #include <linux/iommu.h>
36 #include <linux/intel-iommu.h>
37 #include <linux/syscore_ops.h>
38 #include <linux/tboot.h>
39 #include <linux/dmi.h>
40 #include <linux/pci-ats.h>
41 #include <linux/memblock.h>
42 #include <linux/dma-contiguous.h>
43 #include <asm/irq_remapping.h>
44 #include <asm/cacheflush.h>
45 #include <asm/iommu.h>
46
47 #include "irq_remapping.h"
48
49 #define ROOT_SIZE               VTD_PAGE_SIZE
50 #define CONTEXT_SIZE            VTD_PAGE_SIZE
51
52 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
53 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
54 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
55
56 #define IOAPIC_RANGE_START      (0xfee00000)
57 #define IOAPIC_RANGE_END        (0xfeefffff)
58 #define IOVA_START_ADDR         (0x1000)
59
60 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
61
62 #define MAX_AGAW_WIDTH 64
63 #define MAX_AGAW_PFN_WIDTH      (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
64
65 #define __DOMAIN_MAX_PFN(gaw)  ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
66 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
67
68 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
69    to match. That way, we can use 'unsigned long' for PFNs with impunity. */
70 #define DOMAIN_MAX_PFN(gaw)     ((unsigned long) min_t(uint64_t, \
71                                 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
72 #define DOMAIN_MAX_ADDR(gaw)    (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
73
74 /* IO virtual address start page frame number */
75 #define IOVA_START_PFN          (1)
76
77 #define IOVA_PFN(addr)          ((addr) >> PAGE_SHIFT)
78 #define DMA_32BIT_PFN           IOVA_PFN(DMA_BIT_MASK(32))
79 #define DMA_64BIT_PFN           IOVA_PFN(DMA_BIT_MASK(64))
80
81 /* page table handling */
82 #define LEVEL_STRIDE            (9)
83 #define LEVEL_MASK              (((u64)1 << LEVEL_STRIDE) - 1)
84
85 /*
86  * This bitmap is used to advertise the page sizes our hardware support
87  * to the IOMMU core, which will then use this information to split
88  * physically contiguous memory regions it is mapping into page sizes
89  * that we support.
90  *
91  * Traditionally the IOMMU core just handed us the mappings directly,
92  * after making sure the size is an order of a 4KiB page and that the
93  * mapping has natural alignment.
94  *
95  * To retain this behavior, we currently advertise that we support
96  * all page sizes that are an order of 4KiB.
97  *
98  * If at some point we'd like to utilize the IOMMU core's new behavior,
99  * we could change this to advertise the real page sizes we support.
100  */
101 #define INTEL_IOMMU_PGSIZES     (~0xFFFUL)
102
103 static inline int agaw_to_level(int agaw)
104 {
105         return agaw + 2;
106 }
107
108 static inline int agaw_to_width(int agaw)
109 {
110         return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
111 }
112
113 static inline int width_to_agaw(int width)
114 {
115         return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
116 }
117
118 static inline unsigned int level_to_offset_bits(int level)
119 {
120         return (level - 1) * LEVEL_STRIDE;
121 }
122
123 static inline int pfn_level_offset(unsigned long pfn, int level)
124 {
125         return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
126 }
127
128 static inline unsigned long level_mask(int level)
129 {
130         return -1UL << level_to_offset_bits(level);
131 }
132
133 static inline unsigned long level_size(int level)
134 {
135         return 1UL << level_to_offset_bits(level);
136 }
137
138 static inline unsigned long align_to_level(unsigned long pfn, int level)
139 {
140         return (pfn + level_size(level) - 1) & level_mask(level);
141 }
142
143 static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
144 {
145         return  1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
146 }
147
148 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
149    are never going to work. */
150 static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
151 {
152         return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
153 }
154
155 static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
156 {
157         return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
158 }
159 static inline unsigned long page_to_dma_pfn(struct page *pg)
160 {
161         return mm_to_dma_pfn(page_to_pfn(pg));
162 }
163 static inline unsigned long virt_to_dma_pfn(void *p)
164 {
165         return page_to_dma_pfn(virt_to_page(p));
166 }
167
168 /* global iommu list, set NULL for ignored DMAR units */
169 static struct intel_iommu **g_iommus;
170
171 static void __init check_tylersburg_isoch(void);
172 static int rwbf_quirk;
173
174 /*
175  * set to 1 to panic kernel if can't successfully enable VT-d
176  * (used when kernel is launched w/ TXT)
177  */
178 static int force_on = 0;
179
180 /*
181  * 0: Present
182  * 1-11: Reserved
183  * 12-63: Context Ptr (12 - (haw-1))
184  * 64-127: Reserved
185  */
186 struct root_entry {
187         u64     val;
188         u64     rsvd1;
189 };
190 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
191 static inline bool root_present(struct root_entry *root)
192 {
193         return (root->val & 1);
194 }
195 static inline void set_root_present(struct root_entry *root)
196 {
197         root->val |= 1;
198 }
199 static inline void set_root_value(struct root_entry *root, unsigned long value)
200 {
201         root->val &= ~VTD_PAGE_MASK;
202         root->val |= value & VTD_PAGE_MASK;
203 }
204
205 static inline struct context_entry *
206 get_context_addr_from_root(struct root_entry *root)
207 {
208         return (struct context_entry *)
209                 (root_present(root)?phys_to_virt(
210                 root->val & VTD_PAGE_MASK) :
211                 NULL);
212 }
213
214 /*
215  * low 64 bits:
216  * 0: present
217  * 1: fault processing disable
218  * 2-3: translation type
219  * 12-63: address space root
220  * high 64 bits:
221  * 0-2: address width
222  * 3-6: aval
223  * 8-23: domain id
224  */
225 struct context_entry {
226         u64 lo;
227         u64 hi;
228 };
229
230 static inline bool context_present(struct context_entry *context)
231 {
232         return (context->lo & 1);
233 }
234 static inline void context_set_present(struct context_entry *context)
235 {
236         context->lo |= 1;
237 }
238
239 static inline void context_set_fault_enable(struct context_entry *context)
240 {
241         context->lo &= (((u64)-1) << 2) | 1;
242 }
243
244 static inline void context_set_translation_type(struct context_entry *context,
245                                                 unsigned long value)
246 {
247         context->lo &= (((u64)-1) << 4) | 3;
248         context->lo |= (value & 3) << 2;
249 }
250
251 static inline void context_set_address_root(struct context_entry *context,
252                                             unsigned long value)
253 {
254         context->lo &= ~VTD_PAGE_MASK;
255         context->lo |= value & VTD_PAGE_MASK;
256 }
257
258 static inline void context_set_address_width(struct context_entry *context,
259                                              unsigned long value)
260 {
261         context->hi |= value & 7;
262 }
263
264 static inline void context_set_domain_id(struct context_entry *context,
265                                          unsigned long value)
266 {
267         context->hi |= (value & ((1 << 16) - 1)) << 8;
268 }
269
270 static inline void context_clear_entry(struct context_entry *context)
271 {
272         context->lo = 0;
273         context->hi = 0;
274 }
275
276 /*
277  * 0: readable
278  * 1: writable
279  * 2-6: reserved
280  * 7: super page
281  * 8-10: available
282  * 11: snoop behavior
283  * 12-63: Host physcial address
284  */
285 struct dma_pte {
286         u64 val;
287 };
288
289 static inline void dma_clear_pte(struct dma_pte *pte)
290 {
291         pte->val = 0;
292 }
293
294 static inline u64 dma_pte_addr(struct dma_pte *pte)
295 {
296 #ifdef CONFIG_64BIT
297         return pte->val & VTD_PAGE_MASK;
298 #else
299         /* Must have a full atomic 64-bit read */
300         return  __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
301 #endif
302 }
303
304 static inline bool dma_pte_present(struct dma_pte *pte)
305 {
306         return (pte->val & 3) != 0;
307 }
308
309 static inline bool dma_pte_superpage(struct dma_pte *pte)
310 {
311         return (pte->val & DMA_PTE_LARGE_PAGE);
312 }
313
314 static inline int first_pte_in_page(struct dma_pte *pte)
315 {
316         return !((unsigned long)pte & ~VTD_PAGE_MASK);
317 }
318
319 /*
320  * This domain is a statically identity mapping domain.
321  *      1. This domain creats a static 1:1 mapping to all usable memory.
322  *      2. It maps to each iommu if successful.
323  *      3. Each iommu mapps to this domain if successful.
324  */
325 static struct dmar_domain *si_domain;
326 static int hw_pass_through = 1;
327
328 /* domain represents a virtual machine, more than one devices
329  * across iommus may be owned in one domain, e.g. kvm guest.
330  */
331 #define DOMAIN_FLAG_VIRTUAL_MACHINE     (1 << 0)
332
333 /* si_domain contains mulitple devices */
334 #define DOMAIN_FLAG_STATIC_IDENTITY     (1 << 1)
335
336 struct dmar_domain {
337         int     id;                     /* domain id */
338         int     nid;                    /* node id */
339         DECLARE_BITMAP(iommu_bmp, DMAR_UNITS_SUPPORTED);
340                                         /* bitmap of iommus this domain uses*/
341
342         struct list_head devices;       /* all devices' list */
343         struct iova_domain iovad;       /* iova's that belong to this domain */
344
345         struct dma_pte  *pgd;           /* virtual address */
346         int             gaw;            /* max guest address width */
347
348         /* adjusted guest address width, 0 is level 2 30-bit */
349         int             agaw;
350
351         int             flags;          /* flags to find out type of domain */
352
353         int             iommu_coherency;/* indicate coherency of iommu access */
354         int             iommu_snooping; /* indicate snooping control feature*/
355         int             iommu_count;    /* reference count of iommu */
356         int             iommu_superpage;/* Level of superpages supported:
357                                            0 == 4KiB (no superpages), 1 == 2MiB,
358                                            2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
359         spinlock_t      iommu_lock;     /* protect iommu set in domain */
360         u64             max_addr;       /* maximum mapped address */
361 };
362
363 /* PCI domain-device relationship */
364 struct device_domain_info {
365         struct list_head link;  /* link to domain siblings */
366         struct list_head global; /* link to global list */
367         u8 bus;                 /* PCI bus number */
368         u8 devfn;               /* PCI devfn number */
369         struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
370         struct intel_iommu *iommu; /* IOMMU used by this device */
371         struct dmar_domain *domain; /* pointer to domain */
372 };
373
374 struct dmar_rmrr_unit {
375         struct list_head list;          /* list of rmrr units   */
376         struct acpi_dmar_header *hdr;   /* ACPI header          */
377         u64     base_address;           /* reserved base address*/
378         u64     end_address;            /* reserved end address */
379         struct dmar_dev_scope *devices; /* target devices */
380         int     devices_cnt;            /* target device count */
381 };
382
383 struct dmar_atsr_unit {
384         struct list_head list;          /* list of ATSR units */
385         struct acpi_dmar_header *hdr;   /* ACPI header */
386         struct dmar_dev_scope *devices; /* target devices */
387         int devices_cnt;                /* target device count */
388         u8 include_all:1;               /* include all ports */
389 };
390
391 static LIST_HEAD(dmar_atsr_units);
392 static LIST_HEAD(dmar_rmrr_units);
393
394 #define for_each_rmrr_units(rmrr) \
395         list_for_each_entry(rmrr, &dmar_rmrr_units, list)
396
397 static void flush_unmaps_timeout(unsigned long data);
398
399 static DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0);
400
401 #define HIGH_WATER_MARK 250
402 struct deferred_flush_tables {
403         int next;
404         struct iova *iova[HIGH_WATER_MARK];
405         struct dmar_domain *domain[HIGH_WATER_MARK];
406         struct page *freelist[HIGH_WATER_MARK];
407 };
408
409 static struct deferred_flush_tables *deferred_flush;
410
411 /* bitmap for indexing intel_iommus */
412 static int g_num_of_iommus;
413
414 static DEFINE_SPINLOCK(async_umap_flush_lock);
415 static LIST_HEAD(unmaps_to_do);
416
417 static int timer_on;
418 static long list_size;
419
420 static void domain_exit(struct dmar_domain *domain);
421 static void domain_remove_dev_info(struct dmar_domain *domain);
422 static void domain_remove_one_dev_info(struct dmar_domain *domain,
423                                        struct device *dev);
424 static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
425                                            struct device *dev);
426 static int domain_detach_iommu(struct dmar_domain *domain,
427                                struct intel_iommu *iommu);
428
429 #ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
430 int dmar_disabled = 0;
431 #else
432 int dmar_disabled = 1;
433 #endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
434
435 int intel_iommu_enabled = 0;
436 EXPORT_SYMBOL_GPL(intel_iommu_enabled);
437
438 static int dmar_map_gfx = 1;
439 static int dmar_forcedac;
440 static int intel_iommu_strict;
441 static int intel_iommu_superpage = 1;
442
443 int intel_iommu_gfx_mapped;
444 EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
445
446 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
447 static DEFINE_SPINLOCK(device_domain_lock);
448 static LIST_HEAD(device_domain_list);
449
450 static const struct iommu_ops intel_iommu_ops;
451
452 static int __init intel_iommu_setup(char *str)
453 {
454         if (!str)
455                 return -EINVAL;
456         while (*str) {
457                 if (!strncmp(str, "on", 2)) {
458                         dmar_disabled = 0;
459                         printk(KERN_INFO "Intel-IOMMU: enabled\n");
460                 } else if (!strncmp(str, "off", 3)) {
461                         dmar_disabled = 1;
462                         printk(KERN_INFO "Intel-IOMMU: disabled\n");
463                 } else if (!strncmp(str, "igfx_off", 8)) {
464                         dmar_map_gfx = 0;
465                         printk(KERN_INFO
466                                 "Intel-IOMMU: disable GFX device mapping\n");
467                 } else if (!strncmp(str, "forcedac", 8)) {
468                         printk(KERN_INFO
469                                 "Intel-IOMMU: Forcing DAC for PCI devices\n");
470                         dmar_forcedac = 1;
471                 } else if (!strncmp(str, "strict", 6)) {
472                         printk(KERN_INFO
473                                 "Intel-IOMMU: disable batched IOTLB flush\n");
474                         intel_iommu_strict = 1;
475                 } else if (!strncmp(str, "sp_off", 6)) {
476                         printk(KERN_INFO
477                                 "Intel-IOMMU: disable supported super page\n");
478                         intel_iommu_superpage = 0;
479                 }
480
481                 str += strcspn(str, ",");
482                 while (*str == ',')
483                         str++;
484         }
485         return 0;
486 }
487 __setup("intel_iommu=", intel_iommu_setup);
488
489 static struct kmem_cache *iommu_domain_cache;
490 static struct kmem_cache *iommu_devinfo_cache;
491
492 static inline void *alloc_pgtable_page(int node)
493 {
494         struct page *page;
495         void *vaddr = NULL;
496
497         page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
498         if (page)
499                 vaddr = page_address(page);
500         return vaddr;
501 }
502
503 static inline void free_pgtable_page(void *vaddr)
504 {
505         free_page((unsigned long)vaddr);
506 }
507
508 static inline void *alloc_domain_mem(void)
509 {
510         return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
511 }
512
513 static void free_domain_mem(void *vaddr)
514 {
515         kmem_cache_free(iommu_domain_cache, vaddr);
516 }
517
518 static inline void * alloc_devinfo_mem(void)
519 {
520         return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
521 }
522
523 static inline void free_devinfo_mem(void *vaddr)
524 {
525         kmem_cache_free(iommu_devinfo_cache, vaddr);
526 }
527
528 static inline int domain_type_is_vm(struct dmar_domain *domain)
529 {
530         return domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE;
531 }
532
533 static inline int domain_type_is_vm_or_si(struct dmar_domain *domain)
534 {
535         return domain->flags & (DOMAIN_FLAG_VIRTUAL_MACHINE |
536                                 DOMAIN_FLAG_STATIC_IDENTITY);
537 }
538
539 static inline int domain_pfn_supported(struct dmar_domain *domain,
540                                        unsigned long pfn)
541 {
542         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
543
544         return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
545 }
546
547 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
548 {
549         unsigned long sagaw;
550         int agaw = -1;
551
552         sagaw = cap_sagaw(iommu->cap);
553         for (agaw = width_to_agaw(max_gaw);
554              agaw >= 0; agaw--) {
555                 if (test_bit(agaw, &sagaw))
556                         break;
557         }
558
559         return agaw;
560 }
561
562 /*
563  * Calculate max SAGAW for each iommu.
564  */
565 int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
566 {
567         return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
568 }
569
570 /*
571  * calculate agaw for each iommu.
572  * "SAGAW" may be different across iommus, use a default agaw, and
573  * get a supported less agaw for iommus that don't support the default agaw.
574  */
575 int iommu_calculate_agaw(struct intel_iommu *iommu)
576 {
577         return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
578 }
579
580 /* This functionin only returns single iommu in a domain */
581 static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
582 {
583         int iommu_id;
584
585         /* si_domain and vm domain should not get here. */
586         BUG_ON(domain_type_is_vm_or_si(domain));
587         iommu_id = find_first_bit(domain->iommu_bmp, g_num_of_iommus);
588         if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
589                 return NULL;
590
591         return g_iommus[iommu_id];
592 }
593
594 static void domain_update_iommu_coherency(struct dmar_domain *domain)
595 {
596         struct dmar_drhd_unit *drhd;
597         struct intel_iommu *iommu;
598         int i, found = 0;
599
600         domain->iommu_coherency = 1;
601
602         for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
603                 found = 1;
604                 if (!ecap_coherent(g_iommus[i]->ecap)) {
605                         domain->iommu_coherency = 0;
606                         break;
607                 }
608         }
609         if (found)
610                 return;
611
612         /* No hardware attached; use lowest common denominator */
613         rcu_read_lock();
614         for_each_active_iommu(iommu, drhd) {
615                 if (!ecap_coherent(iommu->ecap)) {
616                         domain->iommu_coherency = 0;
617                         break;
618                 }
619         }
620         rcu_read_unlock();
621 }
622
623 static int domain_update_iommu_snooping(struct intel_iommu *skip)
624 {
625         struct dmar_drhd_unit *drhd;
626         struct intel_iommu *iommu;
627         int ret = 1;
628
629         rcu_read_lock();
630         for_each_active_iommu(iommu, drhd) {
631                 if (iommu != skip) {
632                         if (!ecap_sc_support(iommu->ecap)) {
633                                 ret = 0;
634                                 break;
635                         }
636                 }
637         }
638         rcu_read_unlock();
639
640         return ret;
641 }
642
643 static int domain_update_iommu_superpage(struct intel_iommu *skip)
644 {
645         struct dmar_drhd_unit *drhd;
646         struct intel_iommu *iommu;
647         int mask = 0xf;
648
649         if (!intel_iommu_superpage) {
650                 return 0;
651         }
652
653         /* set iommu_superpage to the smallest common denominator */
654         rcu_read_lock();
655         for_each_active_iommu(iommu, drhd) {
656                 if (iommu != skip) {
657                         mask &= cap_super_page_val(iommu->cap);
658                         if (!mask)
659                                 break;
660                 }
661         }
662         rcu_read_unlock();
663
664         return fls(mask);
665 }
666
667 /* Some capabilities may be different across iommus */
668 static void domain_update_iommu_cap(struct dmar_domain *domain)
669 {
670         domain_update_iommu_coherency(domain);
671         domain->iommu_snooping = domain_update_iommu_snooping(NULL);
672         domain->iommu_superpage = domain_update_iommu_superpage(NULL);
673 }
674
675 static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
676 {
677         struct dmar_drhd_unit *drhd = NULL;
678         struct intel_iommu *iommu;
679         struct device *tmp;
680         struct pci_dev *ptmp, *pdev = NULL;
681         u16 segment = 0;
682         int i;
683
684         if (dev_is_pci(dev)) {
685                 pdev = to_pci_dev(dev);
686                 segment = pci_domain_nr(pdev->bus);
687         } else if (ACPI_COMPANION(dev))
688                 dev = &ACPI_COMPANION(dev)->dev;
689
690         rcu_read_lock();
691         for_each_active_iommu(iommu, drhd) {
692                 if (pdev && segment != drhd->segment)
693                         continue;
694
695                 for_each_active_dev_scope(drhd->devices,
696                                           drhd->devices_cnt, i, tmp) {
697                         if (tmp == dev) {
698                                 *bus = drhd->devices[i].bus;
699                                 *devfn = drhd->devices[i].devfn;
700                                 goto out;
701                         }
702
703                         if (!pdev || !dev_is_pci(tmp))
704                                 continue;
705
706                         ptmp = to_pci_dev(tmp);
707                         if (ptmp->subordinate &&
708                             ptmp->subordinate->number <= pdev->bus->number &&
709                             ptmp->subordinate->busn_res.end >= pdev->bus->number)
710                                 goto got_pdev;
711                 }
712
713                 if (pdev && drhd->include_all) {
714                 got_pdev:
715                         *bus = pdev->bus->number;
716                         *devfn = pdev->devfn;
717                         goto out;
718                 }
719         }
720         iommu = NULL;
721  out:
722         rcu_read_unlock();
723
724         return iommu;
725 }
726
727 static void domain_flush_cache(struct dmar_domain *domain,
728                                void *addr, int size)
729 {
730         if (!domain->iommu_coherency)
731                 clflush_cache_range(addr, size);
732 }
733
734 /* Gets context entry for a given bus and devfn */
735 static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
736                 u8 bus, u8 devfn)
737 {
738         struct root_entry *root;
739         struct context_entry *context;
740         unsigned long phy_addr;
741         unsigned long flags;
742
743         spin_lock_irqsave(&iommu->lock, flags);
744         root = &iommu->root_entry[bus];
745         context = get_context_addr_from_root(root);
746         if (!context) {
747                 context = (struct context_entry *)
748                                 alloc_pgtable_page(iommu->node);
749                 if (!context) {
750                         spin_unlock_irqrestore(&iommu->lock, flags);
751                         return NULL;
752                 }
753                 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
754                 phy_addr = virt_to_phys((void *)context);
755                 set_root_value(root, phy_addr);
756                 set_root_present(root);
757                 __iommu_flush_cache(iommu, root, sizeof(*root));
758         }
759         spin_unlock_irqrestore(&iommu->lock, flags);
760         return &context[devfn];
761 }
762
763 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
764 {
765         struct root_entry *root;
766         struct context_entry *context;
767         int ret;
768         unsigned long flags;
769
770         spin_lock_irqsave(&iommu->lock, flags);
771         root = &iommu->root_entry[bus];
772         context = get_context_addr_from_root(root);
773         if (!context) {
774                 ret = 0;
775                 goto out;
776         }
777         ret = context_present(&context[devfn]);
778 out:
779         spin_unlock_irqrestore(&iommu->lock, flags);
780         return ret;
781 }
782
783 static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
784 {
785         struct root_entry *root;
786         struct context_entry *context;
787         unsigned long flags;
788
789         spin_lock_irqsave(&iommu->lock, flags);
790         root = &iommu->root_entry[bus];
791         context = get_context_addr_from_root(root);
792         if (context) {
793                 context_clear_entry(&context[devfn]);
794                 __iommu_flush_cache(iommu, &context[devfn], \
795                         sizeof(*context));
796         }
797         spin_unlock_irqrestore(&iommu->lock, flags);
798 }
799
800 static void free_context_table(struct intel_iommu *iommu)
801 {
802         struct root_entry *root;
803         int i;
804         unsigned long flags;
805         struct context_entry *context;
806
807         spin_lock_irqsave(&iommu->lock, flags);
808         if (!iommu->root_entry) {
809                 goto out;
810         }
811         for (i = 0; i < ROOT_ENTRY_NR; i++) {
812                 root = &iommu->root_entry[i];
813                 context = get_context_addr_from_root(root);
814                 if (context)
815                         free_pgtable_page(context);
816         }
817         free_pgtable_page(iommu->root_entry);
818         iommu->root_entry = NULL;
819 out:
820         spin_unlock_irqrestore(&iommu->lock, flags);
821 }
822
823 static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
824                                       unsigned long pfn, int *target_level)
825 {
826         struct dma_pte *parent, *pte = NULL;
827         int level = agaw_to_level(domain->agaw);
828         int offset;
829
830         BUG_ON(!domain->pgd);
831
832         if (!domain_pfn_supported(domain, pfn))
833                 /* Address beyond IOMMU's addressing capabilities. */
834                 return NULL;
835
836         parent = domain->pgd;
837
838         while (1) {
839                 void *tmp_page;
840
841                 offset = pfn_level_offset(pfn, level);
842                 pte = &parent[offset];
843                 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
844                         break;
845                 if (level == *target_level)
846                         break;
847
848                 if (!dma_pte_present(pte)) {
849                         uint64_t pteval;
850
851                         tmp_page = alloc_pgtable_page(domain->nid);
852
853                         if (!tmp_page)
854                                 return NULL;
855
856                         domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
857                         pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
858                         if (cmpxchg64(&pte->val, 0ULL, pteval))
859                                 /* Someone else set it while we were thinking; use theirs. */
860                                 free_pgtable_page(tmp_page);
861                         else
862                                 domain_flush_cache(domain, pte, sizeof(*pte));
863                 }
864                 if (level == 1)
865                         break;
866
867                 parent = phys_to_virt(dma_pte_addr(pte));
868                 level--;
869         }
870
871         if (!*target_level)
872                 *target_level = level;
873
874         return pte;
875 }
876
877
878 /* return address's pte at specific level */
879 static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
880                                          unsigned long pfn,
881                                          int level, int *large_page)
882 {
883         struct dma_pte *parent, *pte = NULL;
884         int total = agaw_to_level(domain->agaw);
885         int offset;
886
887         parent = domain->pgd;
888         while (level <= total) {
889                 offset = pfn_level_offset(pfn, total);
890                 pte = &parent[offset];
891                 if (level == total)
892                         return pte;
893
894                 if (!dma_pte_present(pte)) {
895                         *large_page = total;
896                         break;
897                 }
898
899                 if (dma_pte_superpage(pte)) {
900                         *large_page = total;
901                         return pte;
902                 }
903
904                 parent = phys_to_virt(dma_pte_addr(pte));
905                 total--;
906         }
907         return NULL;
908 }
909
910 /* clear last level pte, a tlb flush should be followed */
911 static void dma_pte_clear_range(struct dmar_domain *domain,
912                                 unsigned long start_pfn,
913                                 unsigned long last_pfn)
914 {
915         unsigned int large_page = 1;
916         struct dma_pte *first_pte, *pte;
917
918         BUG_ON(!domain_pfn_supported(domain, start_pfn));
919         BUG_ON(!domain_pfn_supported(domain, last_pfn));
920         BUG_ON(start_pfn > last_pfn);
921
922         /* we don't need lock here; nobody else touches the iova range */
923         do {
924                 large_page = 1;
925                 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
926                 if (!pte) {
927                         start_pfn = align_to_level(start_pfn + 1, large_page + 1);
928                         continue;
929                 }
930                 do {
931                         dma_clear_pte(pte);
932                         start_pfn += lvl_to_nr_pages(large_page);
933                         pte++;
934                 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
935
936                 domain_flush_cache(domain, first_pte,
937                                    (void *)pte - (void *)first_pte);
938
939         } while (start_pfn && start_pfn <= last_pfn);
940 }
941
942 static void dma_pte_free_level(struct dmar_domain *domain, int level,
943                                struct dma_pte *pte, unsigned long pfn,
944                                unsigned long start_pfn, unsigned long last_pfn)
945 {
946         pfn = max(start_pfn, pfn);
947         pte = &pte[pfn_level_offset(pfn, level)];
948
949         do {
950                 unsigned long level_pfn;
951                 struct dma_pte *level_pte;
952
953                 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
954                         goto next;
955
956                 level_pfn = pfn & level_mask(level - 1);
957                 level_pte = phys_to_virt(dma_pte_addr(pte));
958
959                 if (level > 2)
960                         dma_pte_free_level(domain, level - 1, level_pte,
961                                            level_pfn, start_pfn, last_pfn);
962
963                 /* If range covers entire pagetable, free it */
964                 if (!(start_pfn > level_pfn ||
965                       last_pfn < level_pfn + level_size(level) - 1)) {
966                         dma_clear_pte(pte);
967                         domain_flush_cache(domain, pte, sizeof(*pte));
968                         free_pgtable_page(level_pte);
969                 }
970 next:
971                 pfn += level_size(level);
972         } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
973 }
974
975 /* free page table pages. last level pte should already be cleared */
976 static void dma_pte_free_pagetable(struct dmar_domain *domain,
977                                    unsigned long start_pfn,
978                                    unsigned long last_pfn)
979 {
980         BUG_ON(!domain_pfn_supported(domain, start_pfn));
981         BUG_ON(!domain_pfn_supported(domain, last_pfn));
982         BUG_ON(start_pfn > last_pfn);
983
984         dma_pte_clear_range(domain, start_pfn, last_pfn);
985
986         /* We don't need lock here; nobody else touches the iova range */
987         dma_pte_free_level(domain, agaw_to_level(domain->agaw),
988                            domain->pgd, 0, start_pfn, last_pfn);
989
990         /* free pgd */
991         if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
992                 free_pgtable_page(domain->pgd);
993                 domain->pgd = NULL;
994         }
995 }
996
997 /* When a page at a given level is being unlinked from its parent, we don't
998    need to *modify* it at all. All we need to do is make a list of all the
999    pages which can be freed just as soon as we've flushed the IOTLB and we
1000    know the hardware page-walk will no longer touch them.
1001    The 'pte' argument is the *parent* PTE, pointing to the page that is to
1002    be freed. */
1003 static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1004                                             int level, struct dma_pte *pte,
1005                                             struct page *freelist)
1006 {
1007         struct page *pg;
1008
1009         pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1010         pg->freelist = freelist;
1011         freelist = pg;
1012
1013         if (level == 1)
1014                 return freelist;
1015
1016         pte = page_address(pg);
1017         do {
1018                 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1019                         freelist = dma_pte_list_pagetables(domain, level - 1,
1020                                                            pte, freelist);
1021                 pte++;
1022         } while (!first_pte_in_page(pte));
1023
1024         return freelist;
1025 }
1026
1027 static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1028                                         struct dma_pte *pte, unsigned long pfn,
1029                                         unsigned long start_pfn,
1030                                         unsigned long last_pfn,
1031                                         struct page *freelist)
1032 {
1033         struct dma_pte *first_pte = NULL, *last_pte = NULL;
1034
1035         pfn = max(start_pfn, pfn);
1036         pte = &pte[pfn_level_offset(pfn, level)];
1037
1038         do {
1039                 unsigned long level_pfn;
1040
1041                 if (!dma_pte_present(pte))
1042                         goto next;
1043
1044                 level_pfn = pfn & level_mask(level);
1045
1046                 /* If range covers entire pagetable, free it */
1047                 if (start_pfn <= level_pfn &&
1048                     last_pfn >= level_pfn + level_size(level) - 1) {
1049                         /* These suborbinate page tables are going away entirely. Don't
1050                            bother to clear them; we're just going to *free* them. */
1051                         if (level > 1 && !dma_pte_superpage(pte))
1052                                 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1053
1054                         dma_clear_pte(pte);
1055                         if (!first_pte)
1056                                 first_pte = pte;
1057                         last_pte = pte;
1058                 } else if (level > 1) {
1059                         /* Recurse down into a level that isn't *entirely* obsolete */
1060                         freelist = dma_pte_clear_level(domain, level - 1,
1061                                                        phys_to_virt(dma_pte_addr(pte)),
1062                                                        level_pfn, start_pfn, last_pfn,
1063                                                        freelist);
1064                 }
1065 next:
1066                 pfn += level_size(level);
1067         } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1068
1069         if (first_pte)
1070                 domain_flush_cache(domain, first_pte,
1071                                    (void *)++last_pte - (void *)first_pte);
1072
1073         return freelist;
1074 }
1075
1076 /* We can't just free the pages because the IOMMU may still be walking
1077    the page tables, and may have cached the intermediate levels. The
1078    pages can only be freed after the IOTLB flush has been done. */
1079 struct page *domain_unmap(struct dmar_domain *domain,
1080                           unsigned long start_pfn,
1081                           unsigned long last_pfn)
1082 {
1083         struct page *freelist = NULL;
1084
1085         BUG_ON(!domain_pfn_supported(domain, start_pfn));
1086         BUG_ON(!domain_pfn_supported(domain, last_pfn));
1087         BUG_ON(start_pfn > last_pfn);
1088
1089         /* we don't need lock here; nobody else touches the iova range */
1090         freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1091                                        domain->pgd, 0, start_pfn, last_pfn, NULL);
1092
1093         /* free pgd */
1094         if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1095                 struct page *pgd_page = virt_to_page(domain->pgd);
1096                 pgd_page->freelist = freelist;
1097                 freelist = pgd_page;
1098
1099                 domain->pgd = NULL;
1100         }
1101
1102         return freelist;
1103 }
1104
1105 void dma_free_pagelist(struct page *freelist)
1106 {
1107         struct page *pg;
1108
1109         while ((pg = freelist)) {
1110                 freelist = pg->freelist;
1111                 free_pgtable_page(page_address(pg));
1112         }
1113 }
1114
1115 /* iommu handling */
1116 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1117 {
1118         struct root_entry *root;
1119         unsigned long flags;
1120
1121         root = (struct root_entry *)alloc_pgtable_page(iommu->node);
1122         if (!root) {
1123                 pr_err("IOMMU: allocating root entry for %s failed\n",
1124                         iommu->name);
1125                 return -ENOMEM;
1126         }
1127
1128         __iommu_flush_cache(iommu, root, ROOT_SIZE);
1129
1130         spin_lock_irqsave(&iommu->lock, flags);
1131         iommu->root_entry = root;
1132         spin_unlock_irqrestore(&iommu->lock, flags);
1133
1134         return 0;
1135 }
1136
1137 static void iommu_set_root_entry(struct intel_iommu *iommu)
1138 {
1139         void *addr;
1140         u32 sts;
1141         unsigned long flag;
1142
1143         addr = iommu->root_entry;
1144
1145         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1146         dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
1147
1148         writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
1149
1150         /* Make sure hardware complete it */
1151         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1152                       readl, (sts & DMA_GSTS_RTPS), sts);
1153
1154         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1155 }
1156
1157 static void iommu_flush_write_buffer(struct intel_iommu *iommu)
1158 {
1159         u32 val;
1160         unsigned long flag;
1161
1162         if (!rwbf_quirk && !cap_rwbf(iommu->cap))
1163                 return;
1164
1165         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1166         writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
1167
1168         /* Make sure hardware complete it */
1169         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1170                       readl, (!(val & DMA_GSTS_WBFS)), val);
1171
1172         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1173 }
1174
1175 /* return value determine if we need a write buffer flush */
1176 static void __iommu_flush_context(struct intel_iommu *iommu,
1177                                   u16 did, u16 source_id, u8 function_mask,
1178                                   u64 type)
1179 {
1180         u64 val = 0;
1181         unsigned long flag;
1182
1183         switch (type) {
1184         case DMA_CCMD_GLOBAL_INVL:
1185                 val = DMA_CCMD_GLOBAL_INVL;
1186                 break;
1187         case DMA_CCMD_DOMAIN_INVL:
1188                 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1189                 break;
1190         case DMA_CCMD_DEVICE_INVL:
1191                 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1192                         | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1193                 break;
1194         default:
1195                 BUG();
1196         }
1197         val |= DMA_CCMD_ICC;
1198
1199         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1200         dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1201
1202         /* Make sure hardware complete it */
1203         IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1204                 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1205
1206         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1207 }
1208
1209 /* return value determine if we need a write buffer flush */
1210 static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1211                                 u64 addr, unsigned int size_order, u64 type)
1212 {
1213         int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1214         u64 val = 0, val_iva = 0;
1215         unsigned long flag;
1216
1217         switch (type) {
1218         case DMA_TLB_GLOBAL_FLUSH:
1219                 /* global flush doesn't need set IVA_REG */
1220                 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1221                 break;
1222         case DMA_TLB_DSI_FLUSH:
1223                 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1224                 break;
1225         case DMA_TLB_PSI_FLUSH:
1226                 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1227                 /* IH bit is passed in as part of address */
1228                 val_iva = size_order | addr;
1229                 break;
1230         default:
1231                 BUG();
1232         }
1233         /* Note: set drain read/write */
1234 #if 0
1235         /*
1236          * This is probably to be super secure.. Looks like we can
1237          * ignore it without any impact.
1238          */
1239         if (cap_read_drain(iommu->cap))
1240                 val |= DMA_TLB_READ_DRAIN;
1241 #endif
1242         if (cap_write_drain(iommu->cap))
1243                 val |= DMA_TLB_WRITE_DRAIN;
1244
1245         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1246         /* Note: Only uses first TLB reg currently */
1247         if (val_iva)
1248                 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1249         dmar_writeq(iommu->reg + tlb_offset + 8, val);
1250
1251         /* Make sure hardware complete it */
1252         IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1253                 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1254
1255         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1256
1257         /* check IOTLB invalidation granularity */
1258         if (DMA_TLB_IAIG(val) == 0)
1259                 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
1260         if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1261                 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
1262                         (unsigned long long)DMA_TLB_IIRG(type),
1263                         (unsigned long long)DMA_TLB_IAIG(val));
1264 }
1265
1266 static struct device_domain_info *
1267 iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1268                          u8 bus, u8 devfn)
1269 {
1270         int found = 0;
1271         unsigned long flags;
1272         struct device_domain_info *info;
1273         struct pci_dev *pdev;
1274
1275         if (!ecap_dev_iotlb_support(iommu->ecap))
1276                 return NULL;
1277
1278         if (!iommu->qi)
1279                 return NULL;
1280
1281         spin_lock_irqsave(&device_domain_lock, flags);
1282         list_for_each_entry(info, &domain->devices, link)
1283                 if (info->iommu == iommu && info->bus == bus &&
1284                     info->devfn == devfn) {
1285                         found = 1;
1286                         break;
1287                 }
1288         spin_unlock_irqrestore(&device_domain_lock, flags);
1289
1290         if (!found || !info->dev || !dev_is_pci(info->dev))
1291                 return NULL;
1292
1293         pdev = to_pci_dev(info->dev);
1294
1295         if (!pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS))
1296                 return NULL;
1297
1298         if (!dmar_find_matched_atsr_unit(pdev))
1299                 return NULL;
1300
1301         return info;
1302 }
1303
1304 static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1305 {
1306         if (!info || !dev_is_pci(info->dev))
1307                 return;
1308
1309         pci_enable_ats(to_pci_dev(info->dev), VTD_PAGE_SHIFT);
1310 }
1311
1312 static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1313 {
1314         if (!info->dev || !dev_is_pci(info->dev) ||
1315             !pci_ats_enabled(to_pci_dev(info->dev)))
1316                 return;
1317
1318         pci_disable_ats(to_pci_dev(info->dev));
1319 }
1320
1321 static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1322                                   u64 addr, unsigned mask)
1323 {
1324         u16 sid, qdep;
1325         unsigned long flags;
1326         struct device_domain_info *info;
1327
1328         spin_lock_irqsave(&device_domain_lock, flags);
1329         list_for_each_entry(info, &domain->devices, link) {
1330                 struct pci_dev *pdev;
1331                 if (!info->dev || !dev_is_pci(info->dev))
1332                         continue;
1333
1334                 pdev = to_pci_dev(info->dev);
1335                 if (!pci_ats_enabled(pdev))
1336                         continue;
1337
1338                 sid = info->bus << 8 | info->devfn;
1339                 qdep = pci_ats_queue_depth(pdev);
1340                 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1341         }
1342         spin_unlock_irqrestore(&device_domain_lock, flags);
1343 }
1344
1345 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
1346                                   unsigned long pfn, unsigned int pages, int ih, int map)
1347 {
1348         unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1349         uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1350
1351         BUG_ON(pages == 0);
1352
1353         if (ih)
1354                 ih = 1 << 6;
1355         /*
1356          * Fallback to domain selective flush if no PSI support or the size is
1357          * too big.
1358          * PSI requires page size to be 2 ^ x, and the base address is naturally
1359          * aligned to the size
1360          */
1361         if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1362                 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1363                                                 DMA_TLB_DSI_FLUSH);
1364         else
1365                 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
1366                                                 DMA_TLB_PSI_FLUSH);
1367
1368         /*
1369          * In caching mode, changes of pages from non-present to present require
1370          * flush. However, device IOTLB doesn't need to be flushed in this case.
1371          */
1372         if (!cap_caching_mode(iommu->cap) || !map)
1373                 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
1374 }
1375
1376 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1377 {
1378         u32 pmen;
1379         unsigned long flags;
1380
1381         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1382         pmen = readl(iommu->reg + DMAR_PMEN_REG);
1383         pmen &= ~DMA_PMEN_EPM;
1384         writel(pmen, iommu->reg + DMAR_PMEN_REG);
1385
1386         /* wait for the protected region status bit to clear */
1387         IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1388                 readl, !(pmen & DMA_PMEN_PRS), pmen);
1389
1390         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1391 }
1392
1393 static void iommu_enable_translation(struct intel_iommu *iommu)
1394 {
1395         u32 sts;
1396         unsigned long flags;
1397
1398         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1399         iommu->gcmd |= DMA_GCMD_TE;
1400         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1401
1402         /* Make sure hardware complete it */
1403         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1404                       readl, (sts & DMA_GSTS_TES), sts);
1405
1406         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1407 }
1408
1409 static void iommu_disable_translation(struct intel_iommu *iommu)
1410 {
1411         u32 sts;
1412         unsigned long flag;
1413
1414         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1415         iommu->gcmd &= ~DMA_GCMD_TE;
1416         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1417
1418         /* Make sure hardware complete it */
1419         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1420                       readl, (!(sts & DMA_GSTS_TES)), sts);
1421
1422         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1423 }
1424
1425
1426 static int iommu_init_domains(struct intel_iommu *iommu)
1427 {
1428         unsigned long ndomains;
1429         unsigned long nlongs;
1430
1431         ndomains = cap_ndoms(iommu->cap);
1432         pr_debug("IOMMU%d: Number of Domains supported <%ld>\n",
1433                  iommu->seq_id, ndomains);
1434         nlongs = BITS_TO_LONGS(ndomains);
1435
1436         spin_lock_init(&iommu->lock);
1437
1438         /* TBD: there might be 64K domains,
1439          * consider other allocation for future chip
1440          */
1441         iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1442         if (!iommu->domain_ids) {
1443                 pr_err("IOMMU%d: allocating domain id array failed\n",
1444                        iommu->seq_id);
1445                 return -ENOMEM;
1446         }
1447         iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1448                         GFP_KERNEL);
1449         if (!iommu->domains) {
1450                 pr_err("IOMMU%d: allocating domain array failed\n",
1451                        iommu->seq_id);
1452                 kfree(iommu->domain_ids);
1453                 iommu->domain_ids = NULL;
1454                 return -ENOMEM;
1455         }
1456
1457         /*
1458          * if Caching mode is set, then invalid translations are tagged
1459          * with domainid 0. Hence we need to pre-allocate it.
1460          */
1461         if (cap_caching_mode(iommu->cap))
1462                 set_bit(0, iommu->domain_ids);
1463         return 0;
1464 }
1465
1466 static void disable_dmar_iommu(struct intel_iommu *iommu)
1467 {
1468         struct dmar_domain *domain;
1469         int i;
1470
1471         if ((iommu->domains) && (iommu->domain_ids)) {
1472                 for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) {
1473                         /*
1474                          * Domain id 0 is reserved for invalid translation
1475                          * if hardware supports caching mode.
1476                          */
1477                         if (cap_caching_mode(iommu->cap) && i == 0)
1478                                 continue;
1479
1480                         domain = iommu->domains[i];
1481                         clear_bit(i, iommu->domain_ids);
1482                         if (domain_detach_iommu(domain, iommu) == 0 &&
1483                             !domain_type_is_vm(domain))
1484                                 domain_exit(domain);
1485                 }
1486         }
1487
1488         if (iommu->gcmd & DMA_GCMD_TE)
1489                 iommu_disable_translation(iommu);
1490 }
1491
1492 static void free_dmar_iommu(struct intel_iommu *iommu)
1493 {
1494         if ((iommu->domains) && (iommu->domain_ids)) {
1495                 kfree(iommu->domains);
1496                 kfree(iommu->domain_ids);
1497                 iommu->domains = NULL;
1498                 iommu->domain_ids = NULL;
1499         }
1500
1501         g_iommus[iommu->seq_id] = NULL;
1502
1503         /* free context mapping */
1504         free_context_table(iommu);
1505 }
1506
1507 static struct dmar_domain *alloc_domain(int flags)
1508 {
1509         /* domain id for virtual machine, it won't be set in context */
1510         static atomic_t vm_domid = ATOMIC_INIT(0);
1511         struct dmar_domain *domain;
1512
1513         domain = alloc_domain_mem();
1514         if (!domain)
1515                 return NULL;
1516
1517         memset(domain, 0, sizeof(*domain));
1518         domain->nid = -1;
1519         domain->flags = flags;
1520         spin_lock_init(&domain->iommu_lock);
1521         INIT_LIST_HEAD(&domain->devices);
1522         if (flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
1523                 domain->id = atomic_inc_return(&vm_domid);
1524
1525         return domain;
1526 }
1527
1528 static int __iommu_attach_domain(struct dmar_domain *domain,
1529                                  struct intel_iommu *iommu)
1530 {
1531         int num;
1532         unsigned long ndomains;
1533
1534         ndomains = cap_ndoms(iommu->cap);
1535         num = find_first_zero_bit(iommu->domain_ids, ndomains);
1536         if (num < ndomains) {
1537                 set_bit(num, iommu->domain_ids);
1538                 iommu->domains[num] = domain;
1539         } else {
1540                 num = -ENOSPC;
1541         }
1542
1543         return num;
1544 }
1545
1546 static int iommu_attach_domain(struct dmar_domain *domain,
1547                                struct intel_iommu *iommu)
1548 {
1549         int num;
1550         unsigned long flags;
1551
1552         spin_lock_irqsave(&iommu->lock, flags);
1553         num = __iommu_attach_domain(domain, iommu);
1554         spin_unlock_irqrestore(&iommu->lock, flags);
1555         if (num < 0)
1556                 pr_err("IOMMU: no free domain ids\n");
1557
1558         return num;
1559 }
1560
1561 static int iommu_attach_vm_domain(struct dmar_domain *domain,
1562                                   struct intel_iommu *iommu)
1563 {
1564         int num;
1565         unsigned long ndomains;
1566
1567         ndomains = cap_ndoms(iommu->cap);
1568         for_each_set_bit(num, iommu->domain_ids, ndomains)
1569                 if (iommu->domains[num] == domain)
1570                         return num;
1571
1572         return __iommu_attach_domain(domain, iommu);
1573 }
1574
1575 static void iommu_detach_domain(struct dmar_domain *domain,
1576                                 struct intel_iommu *iommu)
1577 {
1578         unsigned long flags;
1579         int num, ndomains;
1580
1581         spin_lock_irqsave(&iommu->lock, flags);
1582         if (domain_type_is_vm_or_si(domain)) {
1583                 ndomains = cap_ndoms(iommu->cap);
1584                 for_each_set_bit(num, iommu->domain_ids, ndomains) {
1585                         if (iommu->domains[num] == domain) {
1586                                 clear_bit(num, iommu->domain_ids);
1587                                 iommu->domains[num] = NULL;
1588                                 break;
1589                         }
1590                 }
1591         } else {
1592                 clear_bit(domain->id, iommu->domain_ids);
1593                 iommu->domains[domain->id] = NULL;
1594         }
1595         spin_unlock_irqrestore(&iommu->lock, flags);
1596 }
1597
1598 static void domain_attach_iommu(struct dmar_domain *domain,
1599                                struct intel_iommu *iommu)
1600 {
1601         unsigned long flags;
1602
1603         spin_lock_irqsave(&domain->iommu_lock, flags);
1604         if (!test_and_set_bit(iommu->seq_id, domain->iommu_bmp)) {
1605                 domain->iommu_count++;
1606                 if (domain->iommu_count == 1)
1607                         domain->nid = iommu->node;
1608                 domain_update_iommu_cap(domain);
1609         }
1610         spin_unlock_irqrestore(&domain->iommu_lock, flags);
1611 }
1612
1613 static int domain_detach_iommu(struct dmar_domain *domain,
1614                                struct intel_iommu *iommu)
1615 {
1616         unsigned long flags;
1617         int count = INT_MAX;
1618
1619         spin_lock_irqsave(&domain->iommu_lock, flags);
1620         if (test_and_clear_bit(iommu->seq_id, domain->iommu_bmp)) {
1621                 count = --domain->iommu_count;
1622                 domain_update_iommu_cap(domain);
1623         }
1624         spin_unlock_irqrestore(&domain->iommu_lock, flags);
1625
1626         return count;
1627 }
1628
1629 static struct iova_domain reserved_iova_list;
1630 static struct lock_class_key reserved_rbtree_key;
1631
1632 static int dmar_init_reserved_ranges(void)
1633 {
1634         struct pci_dev *pdev = NULL;
1635         struct iova *iova;
1636         int i;
1637
1638         init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN,
1639                         DMA_32BIT_PFN);
1640
1641         lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1642                 &reserved_rbtree_key);
1643
1644         /* IOAPIC ranges shouldn't be accessed by DMA */
1645         iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1646                 IOVA_PFN(IOAPIC_RANGE_END));
1647         if (!iova) {
1648                 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1649                 return -ENODEV;
1650         }
1651
1652         /* Reserve all PCI MMIO to avoid peer-to-peer access */
1653         for_each_pci_dev(pdev) {
1654                 struct resource *r;
1655
1656                 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1657                         r = &pdev->resource[i];
1658                         if (!r->flags || !(r->flags & IORESOURCE_MEM))
1659                                 continue;
1660                         iova = reserve_iova(&reserved_iova_list,
1661                                             IOVA_PFN(r->start),
1662                                             IOVA_PFN(r->end));
1663                         if (!iova) {
1664                                 printk(KERN_ERR "Reserve iova failed\n");
1665                                 return -ENODEV;
1666                         }
1667                 }
1668         }
1669         return 0;
1670 }
1671
1672 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1673 {
1674         copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1675 }
1676
1677 static inline int guestwidth_to_adjustwidth(int gaw)
1678 {
1679         int agaw;
1680         int r = (gaw - 12) % 9;
1681
1682         if (r == 0)
1683                 agaw = gaw;
1684         else
1685                 agaw = gaw + 9 - r;
1686         if (agaw > 64)
1687                 agaw = 64;
1688         return agaw;
1689 }
1690
1691 static int domain_init(struct dmar_domain *domain, int guest_width)
1692 {
1693         struct intel_iommu *iommu;
1694         int adjust_width, agaw;
1695         unsigned long sagaw;
1696
1697         init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN,
1698                         DMA_32BIT_PFN);
1699         domain_reserve_special_ranges(domain);
1700
1701         /* calculate AGAW */
1702         iommu = domain_get_iommu(domain);
1703         if (guest_width > cap_mgaw(iommu->cap))
1704                 guest_width = cap_mgaw(iommu->cap);
1705         domain->gaw = guest_width;
1706         adjust_width = guestwidth_to_adjustwidth(guest_width);
1707         agaw = width_to_agaw(adjust_width);
1708         sagaw = cap_sagaw(iommu->cap);
1709         if (!test_bit(agaw, &sagaw)) {
1710                 /* hardware doesn't support it, choose a bigger one */
1711                 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1712                 agaw = find_next_bit(&sagaw, 5, agaw);
1713                 if (agaw >= 5)
1714                         return -ENODEV;
1715         }
1716         domain->agaw = agaw;
1717
1718         if (ecap_coherent(iommu->ecap))
1719                 domain->iommu_coherency = 1;
1720         else
1721                 domain->iommu_coherency = 0;
1722
1723         if (ecap_sc_support(iommu->ecap))
1724                 domain->iommu_snooping = 1;
1725         else
1726                 domain->iommu_snooping = 0;
1727
1728         if (intel_iommu_superpage)
1729                 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1730         else
1731                 domain->iommu_superpage = 0;
1732
1733         domain->nid = iommu->node;
1734
1735         /* always allocate the top pgd */
1736         domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
1737         if (!domain->pgd)
1738                 return -ENOMEM;
1739         __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1740         return 0;
1741 }
1742
1743 static void domain_exit(struct dmar_domain *domain)
1744 {
1745         struct dmar_drhd_unit *drhd;
1746         struct intel_iommu *iommu;
1747         struct page *freelist = NULL;
1748
1749         /* Domain 0 is reserved, so dont process it */
1750         if (!domain)
1751                 return;
1752
1753         /* Flush any lazy unmaps that may reference this domain */
1754         if (!intel_iommu_strict)
1755                 flush_unmaps_timeout(0);
1756
1757         /* remove associated devices */
1758         domain_remove_dev_info(domain);
1759
1760         /* destroy iovas */
1761         put_iova_domain(&domain->iovad);
1762
1763         freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1764
1765         /* clear attached or cached domains */
1766         rcu_read_lock();
1767         for_each_active_iommu(iommu, drhd)
1768                 iommu_detach_domain(domain, iommu);
1769         rcu_read_unlock();
1770
1771         dma_free_pagelist(freelist);
1772
1773         free_domain_mem(domain);
1774 }
1775
1776 static int domain_context_mapping_one(struct dmar_domain *domain,
1777                                       struct intel_iommu *iommu,
1778                                       u8 bus, u8 devfn, int translation)
1779 {
1780         struct context_entry *context;
1781         unsigned long flags;
1782         struct dma_pte *pgd;
1783         int id;
1784         int agaw;
1785         struct device_domain_info *info = NULL;
1786
1787         pr_debug("Set context mapping for %02x:%02x.%d\n",
1788                 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1789
1790         BUG_ON(!domain->pgd);
1791         BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1792                translation != CONTEXT_TT_MULTI_LEVEL);
1793
1794         context = device_to_context_entry(iommu, bus, devfn);
1795         if (!context)
1796                 return -ENOMEM;
1797         spin_lock_irqsave(&iommu->lock, flags);
1798         if (context_present(context)) {
1799                 spin_unlock_irqrestore(&iommu->lock, flags);
1800                 return 0;
1801         }
1802
1803         id = domain->id;
1804         pgd = domain->pgd;
1805
1806         if (domain_type_is_vm_or_si(domain)) {
1807                 if (domain_type_is_vm(domain)) {
1808                         id = iommu_attach_vm_domain(domain, iommu);
1809                         if (id < 0) {
1810                                 spin_unlock_irqrestore(&iommu->lock, flags);
1811                                 pr_err("IOMMU: no free domain ids\n");
1812                                 return -EFAULT;
1813                         }
1814                 }
1815
1816                 /* Skip top levels of page tables for
1817                  * iommu which has less agaw than default.
1818                  * Unnecessary for PT mode.
1819                  */
1820                 if (translation != CONTEXT_TT_PASS_THROUGH) {
1821                         for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1822                                 pgd = phys_to_virt(dma_pte_addr(pgd));
1823                                 if (!dma_pte_present(pgd)) {
1824                                         spin_unlock_irqrestore(&iommu->lock, flags);
1825                                         return -ENOMEM;
1826                                 }
1827                         }
1828                 }
1829         }
1830
1831         context_set_domain_id(context, id);
1832
1833         if (translation != CONTEXT_TT_PASS_THROUGH) {
1834                 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
1835                 translation = info ? CONTEXT_TT_DEV_IOTLB :
1836                                      CONTEXT_TT_MULTI_LEVEL;
1837         }
1838         /*
1839          * In pass through mode, AW must be programmed to indicate the largest
1840          * AGAW value supported by hardware. And ASR is ignored by hardware.
1841          */
1842         if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
1843                 context_set_address_width(context, iommu->msagaw);
1844         else {
1845                 context_set_address_root(context, virt_to_phys(pgd));
1846                 context_set_address_width(context, iommu->agaw);
1847         }
1848
1849         context_set_translation_type(context, translation);
1850         context_set_fault_enable(context);
1851         context_set_present(context);
1852         domain_flush_cache(domain, context, sizeof(*context));
1853
1854         /*
1855          * It's a non-present to present mapping. If hardware doesn't cache
1856          * non-present entry we only need to flush the write-buffer. If the
1857          * _does_ cache non-present entries, then it does so in the special
1858          * domain #0, which we have to flush:
1859          */
1860         if (cap_caching_mode(iommu->cap)) {
1861                 iommu->flush.flush_context(iommu, 0,
1862                                            (((u16)bus) << 8) | devfn,
1863                                            DMA_CCMD_MASK_NOBIT,
1864                                            DMA_CCMD_DEVICE_INVL);
1865                 iommu->flush.flush_iotlb(iommu, id, 0, 0, DMA_TLB_DSI_FLUSH);
1866         } else {
1867                 iommu_flush_write_buffer(iommu);
1868         }
1869         iommu_enable_dev_iotlb(info);
1870         spin_unlock_irqrestore(&iommu->lock, flags);
1871
1872         domain_attach_iommu(domain, iommu);
1873
1874         return 0;
1875 }
1876
1877 struct domain_context_mapping_data {
1878         struct dmar_domain *domain;
1879         struct intel_iommu *iommu;
1880         int translation;
1881 };
1882
1883 static int domain_context_mapping_cb(struct pci_dev *pdev,
1884                                      u16 alias, void *opaque)
1885 {
1886         struct domain_context_mapping_data *data = opaque;
1887
1888         return domain_context_mapping_one(data->domain, data->iommu,
1889                                           PCI_BUS_NUM(alias), alias & 0xff,
1890                                           data->translation);
1891 }
1892
1893 static int
1894 domain_context_mapping(struct dmar_domain *domain, struct device *dev,
1895                        int translation)
1896 {
1897         struct intel_iommu *iommu;
1898         u8 bus, devfn;
1899         struct domain_context_mapping_data data;
1900
1901         iommu = device_to_iommu(dev, &bus, &devfn);
1902         if (!iommu)
1903                 return -ENODEV;
1904
1905         if (!dev_is_pci(dev))
1906                 return domain_context_mapping_one(domain, iommu, bus, devfn,
1907                                                   translation);
1908
1909         data.domain = domain;
1910         data.iommu = iommu;
1911         data.translation = translation;
1912
1913         return pci_for_each_dma_alias(to_pci_dev(dev),
1914                                       &domain_context_mapping_cb, &data);
1915 }
1916
1917 static int domain_context_mapped_cb(struct pci_dev *pdev,
1918                                     u16 alias, void *opaque)
1919 {
1920         struct intel_iommu *iommu = opaque;
1921
1922         return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
1923 }
1924
1925 static int domain_context_mapped(struct device *dev)
1926 {
1927         struct intel_iommu *iommu;
1928         u8 bus, devfn;
1929
1930         iommu = device_to_iommu(dev, &bus, &devfn);
1931         if (!iommu)
1932                 return -ENODEV;
1933
1934         if (!dev_is_pci(dev))
1935                 return device_context_mapped(iommu, bus, devfn);
1936
1937         return !pci_for_each_dma_alias(to_pci_dev(dev),
1938                                        domain_context_mapped_cb, iommu);
1939 }
1940
1941 /* Returns a number of VTD pages, but aligned to MM page size */
1942 static inline unsigned long aligned_nrpages(unsigned long host_addr,
1943                                             size_t size)
1944 {
1945         host_addr &= ~PAGE_MASK;
1946         return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
1947 }
1948
1949 /* Return largest possible superpage level for a given mapping */
1950 static inline int hardware_largepage_caps(struct dmar_domain *domain,
1951                                           unsigned long iov_pfn,
1952                                           unsigned long phy_pfn,
1953                                           unsigned long pages)
1954 {
1955         int support, level = 1;
1956         unsigned long pfnmerge;
1957
1958         support = domain->iommu_superpage;
1959
1960         /* To use a large page, the virtual *and* physical addresses
1961            must be aligned to 2MiB/1GiB/etc. Lower bits set in either
1962            of them will mean we have to use smaller pages. So just
1963            merge them and check both at once. */
1964         pfnmerge = iov_pfn | phy_pfn;
1965
1966         while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
1967                 pages >>= VTD_STRIDE_SHIFT;
1968                 if (!pages)
1969                         break;
1970                 pfnmerge >>= VTD_STRIDE_SHIFT;
1971                 level++;
1972                 support--;
1973         }
1974         return level;
1975 }
1976
1977 static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1978                             struct scatterlist *sg, unsigned long phys_pfn,
1979                             unsigned long nr_pages, int prot)
1980 {
1981         struct dma_pte *first_pte = NULL, *pte = NULL;
1982         phys_addr_t uninitialized_var(pteval);
1983         unsigned long sg_res = 0;
1984         unsigned int largepage_lvl = 0;
1985         unsigned long lvl_pages = 0;
1986
1987         BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
1988
1989         if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1990                 return -EINVAL;
1991
1992         prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
1993
1994         if (!sg) {
1995                 sg_res = nr_pages;
1996                 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1997         }
1998
1999         while (nr_pages > 0) {
2000                 uint64_t tmp;
2001
2002                 if (!sg_res) {
2003                         sg_res = aligned_nrpages(sg->offset, sg->length);
2004                         sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
2005                         sg->dma_length = sg->length;
2006                         pteval = page_to_phys(sg_page(sg)) | prot;
2007                         phys_pfn = pteval >> VTD_PAGE_SHIFT;
2008                 }
2009
2010                 if (!pte) {
2011                         largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2012
2013                         first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
2014                         if (!pte)
2015                                 return -ENOMEM;
2016                         /* It is large page*/
2017                         if (largepage_lvl > 1) {
2018                                 pteval |= DMA_PTE_LARGE_PAGE;
2019                                 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2020                                 /*
2021                                  * Ensure that old small page tables are
2022                                  * removed to make room for superpage,
2023                                  * if they exist.
2024                                  */
2025                                 dma_pte_free_pagetable(domain, iov_pfn,
2026                                                        iov_pfn + lvl_pages - 1);
2027                         } else {
2028                                 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
2029                         }
2030
2031                 }
2032                 /* We don't need lock here, nobody else
2033                  * touches the iova range
2034                  */
2035                 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
2036                 if (tmp) {
2037                         static int dumps = 5;
2038                         printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2039                                iov_pfn, tmp, (unsigned long long)pteval);
2040                         if (dumps) {
2041                                 dumps--;
2042                                 debug_dma_dump_mappings(NULL);
2043                         }
2044                         WARN_ON(1);
2045                 }
2046
2047                 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2048
2049                 BUG_ON(nr_pages < lvl_pages);
2050                 BUG_ON(sg_res < lvl_pages);
2051
2052                 nr_pages -= lvl_pages;
2053                 iov_pfn += lvl_pages;
2054                 phys_pfn += lvl_pages;
2055                 pteval += lvl_pages * VTD_PAGE_SIZE;
2056                 sg_res -= lvl_pages;
2057
2058                 /* If the next PTE would be the first in a new page, then we
2059                    need to flush the cache on the entries we've just written.
2060                    And then we'll need to recalculate 'pte', so clear it and
2061                    let it get set again in the if (!pte) block above.
2062
2063                    If we're done (!nr_pages) we need to flush the cache too.
2064
2065                    Also if we've been setting superpages, we may need to
2066                    recalculate 'pte' and switch back to smaller pages for the
2067                    end of the mapping, if the trailing size is not enough to
2068                    use another superpage (i.e. sg_res < lvl_pages). */
2069                 pte++;
2070                 if (!nr_pages || first_pte_in_page(pte) ||
2071                     (largepage_lvl > 1 && sg_res < lvl_pages)) {
2072                         domain_flush_cache(domain, first_pte,
2073                                            (void *)pte - (void *)first_pte);
2074                         pte = NULL;
2075                 }
2076
2077                 if (!sg_res && nr_pages)
2078                         sg = sg_next(sg);
2079         }
2080         return 0;
2081 }
2082
2083 static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2084                                     struct scatterlist *sg, unsigned long nr_pages,
2085                                     int prot)
2086 {
2087         return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
2088 }
2089
2090 static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2091                                      unsigned long phys_pfn, unsigned long nr_pages,
2092                                      int prot)
2093 {
2094         return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
2095 }
2096
2097 static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
2098 {
2099         if (!iommu)
2100                 return;
2101
2102         clear_context_table(iommu, bus, devfn);
2103         iommu->flush.flush_context(iommu, 0, 0, 0,
2104                                            DMA_CCMD_GLOBAL_INVL);
2105         iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2106 }
2107
2108 static inline void unlink_domain_info(struct device_domain_info *info)
2109 {
2110         assert_spin_locked(&device_domain_lock);
2111         list_del(&info->link);
2112         list_del(&info->global);
2113         if (info->dev)
2114                 info->dev->archdata.iommu = NULL;
2115 }
2116
2117 static void domain_remove_dev_info(struct dmar_domain *domain)
2118 {
2119         struct device_domain_info *info, *tmp;
2120         unsigned long flags;
2121
2122         spin_lock_irqsave(&device_domain_lock, flags);
2123         list_for_each_entry_safe(info, tmp, &domain->devices, link) {
2124                 unlink_domain_info(info);
2125                 spin_unlock_irqrestore(&device_domain_lock, flags);
2126
2127                 iommu_disable_dev_iotlb(info);
2128                 iommu_detach_dev(info->iommu, info->bus, info->devfn);
2129
2130                 if (domain_type_is_vm(domain)) {
2131                         iommu_detach_dependent_devices(info->iommu, info->dev);
2132                         domain_detach_iommu(domain, info->iommu);
2133                 }
2134
2135                 free_devinfo_mem(info);
2136                 spin_lock_irqsave(&device_domain_lock, flags);
2137         }
2138         spin_unlock_irqrestore(&device_domain_lock, flags);
2139 }
2140
2141 /*
2142  * find_domain
2143  * Note: we use struct device->archdata.iommu stores the info
2144  */
2145 static struct dmar_domain *find_domain(struct device *dev)
2146 {
2147         struct device_domain_info *info;
2148
2149         /* No lock here, assumes no domain exit in normal case */
2150         info = dev->archdata.iommu;
2151         if (info)
2152                 return info->domain;
2153         return NULL;
2154 }
2155
2156 static inline struct device_domain_info *
2157 dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2158 {
2159         struct device_domain_info *info;
2160
2161         list_for_each_entry(info, &device_domain_list, global)
2162                 if (info->iommu->segment == segment && info->bus == bus &&
2163                     info->devfn == devfn)
2164                         return info;
2165
2166         return NULL;
2167 }
2168
2169 static struct dmar_domain *dmar_insert_dev_info(struct intel_iommu *iommu,
2170                                                 int bus, int devfn,
2171                                                 struct device *dev,
2172                                                 struct dmar_domain *domain)
2173 {
2174         struct dmar_domain *found = NULL;
2175         struct device_domain_info *info;
2176         unsigned long flags;
2177
2178         info = alloc_devinfo_mem();
2179         if (!info)
2180                 return NULL;
2181
2182         info->bus = bus;
2183         info->devfn = devfn;
2184         info->dev = dev;
2185         info->domain = domain;
2186         info->iommu = iommu;
2187
2188         spin_lock_irqsave(&device_domain_lock, flags);
2189         if (dev)
2190                 found = find_domain(dev);
2191         else {
2192                 struct device_domain_info *info2;
2193                 info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
2194                 if (info2)
2195                         found = info2->domain;
2196         }
2197         if (found) {
2198                 spin_unlock_irqrestore(&device_domain_lock, flags);
2199                 free_devinfo_mem(info);
2200                 /* Caller must free the original domain */
2201                 return found;
2202         }
2203
2204         list_add(&info->link, &domain->devices);
2205         list_add(&info->global, &device_domain_list);
2206         if (dev)
2207                 dev->archdata.iommu = info;
2208         spin_unlock_irqrestore(&device_domain_lock, flags);
2209
2210         return domain;
2211 }
2212
2213 static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
2214 {
2215         *(u16 *)opaque = alias;
2216         return 0;
2217 }
2218
2219 /* domain is initialized */
2220 static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
2221 {
2222         struct dmar_domain *domain, *tmp;
2223         struct intel_iommu *iommu;
2224         struct device_domain_info *info;
2225         u16 dma_alias;
2226         unsigned long flags;
2227         u8 bus, devfn;
2228
2229         domain = find_domain(dev);
2230         if (domain)
2231                 return domain;
2232
2233         iommu = device_to_iommu(dev, &bus, &devfn);
2234         if (!iommu)
2235                 return NULL;
2236
2237         if (dev_is_pci(dev)) {
2238                 struct pci_dev *pdev = to_pci_dev(dev);
2239
2240                 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2241
2242                 spin_lock_irqsave(&device_domain_lock, flags);
2243                 info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
2244                                                       PCI_BUS_NUM(dma_alias),
2245                                                       dma_alias & 0xff);
2246                 if (info) {
2247                         iommu = info->iommu;
2248                         domain = info->domain;
2249                 }
2250                 spin_unlock_irqrestore(&device_domain_lock, flags);
2251
2252                 /* DMA alias already has a domain, uses it */
2253                 if (info)
2254                         goto found_domain;
2255         }
2256
2257         /* Allocate and initialize new domain for the device */
2258         domain = alloc_domain(0);
2259         if (!domain)
2260                 return NULL;
2261         domain->id = iommu_attach_domain(domain, iommu);
2262         if (domain->id < 0) {
2263                 free_domain_mem(domain);
2264                 return NULL;
2265         }
2266         domain_attach_iommu(domain, iommu);
2267         if (domain_init(domain, gaw)) {
2268                 domain_exit(domain);
2269                 return NULL;
2270         }
2271
2272         /* register PCI DMA alias device */
2273         if (dev_is_pci(dev)) {
2274                 tmp = dmar_insert_dev_info(iommu, PCI_BUS_NUM(dma_alias),
2275                                            dma_alias & 0xff, NULL, domain);
2276
2277                 if (!tmp || tmp != domain) {
2278                         domain_exit(domain);
2279                         domain = tmp;
2280                 }
2281
2282                 if (!domain)
2283                         return NULL;
2284         }
2285
2286 found_domain:
2287         tmp = dmar_insert_dev_info(iommu, bus, devfn, dev, domain);
2288
2289         if (!tmp || tmp != domain) {
2290                 domain_exit(domain);
2291                 domain = tmp;
2292         }
2293
2294         return domain;
2295 }
2296
2297 static int iommu_identity_mapping;
2298 #define IDENTMAP_ALL            1
2299 #define IDENTMAP_GFX            2
2300 #define IDENTMAP_AZALIA         4
2301
2302 static int iommu_domain_identity_map(struct dmar_domain *domain,
2303                                      unsigned long long start,
2304                                      unsigned long long end)
2305 {
2306         unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2307         unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2308
2309         if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2310                           dma_to_mm_pfn(last_vpfn))) {
2311                 printk(KERN_ERR "IOMMU: reserve iova failed\n");
2312                 return -ENOMEM;
2313         }
2314
2315         pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
2316                  start, end, domain->id);
2317         /*
2318          * RMRR range might have overlap with physical memory range,
2319          * clear it first
2320          */
2321         dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2322
2323         return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2324                                   last_vpfn - first_vpfn + 1,
2325                                   DMA_PTE_READ|DMA_PTE_WRITE);
2326 }
2327
2328 static int iommu_prepare_identity_map(struct device *dev,
2329                                       unsigned long long start,
2330                                       unsigned long long end)
2331 {
2332         struct dmar_domain *domain;
2333         int ret;
2334
2335         domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2336         if (!domain)
2337                 return -ENOMEM;
2338
2339         /* For _hardware_ passthrough, don't bother. But for software
2340            passthrough, we do it anyway -- it may indicate a memory
2341            range which is reserved in E820, so which didn't get set
2342            up to start with in si_domain */
2343         if (domain == si_domain && hw_pass_through) {
2344                 printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2345                        dev_name(dev), start, end);
2346                 return 0;
2347         }
2348
2349         printk(KERN_INFO
2350                "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2351                dev_name(dev), start, end);
2352         
2353         if (end < start) {
2354                 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2355                         "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2356                         dmi_get_system_info(DMI_BIOS_VENDOR),
2357                         dmi_get_system_info(DMI_BIOS_VERSION),
2358                      dmi_get_system_info(DMI_PRODUCT_VERSION));
2359                 ret = -EIO;
2360                 goto error;
2361         }
2362
2363         if (end >> agaw_to_width(domain->agaw)) {
2364                 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2365                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2366                      agaw_to_width(domain->agaw),
2367                      dmi_get_system_info(DMI_BIOS_VENDOR),
2368                      dmi_get_system_info(DMI_BIOS_VERSION),
2369                      dmi_get_system_info(DMI_PRODUCT_VERSION));
2370                 ret = -EIO;
2371                 goto error;
2372         }
2373
2374         ret = iommu_domain_identity_map(domain, start, end);
2375         if (ret)
2376                 goto error;
2377
2378         /* context entry init */
2379         ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL);
2380         if (ret)
2381                 goto error;
2382
2383         return 0;
2384
2385  error:
2386         domain_exit(domain);
2387         return ret;
2388 }
2389
2390 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2391                                          struct device *dev)
2392 {
2393         if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2394                 return 0;
2395         return iommu_prepare_identity_map(dev, rmrr->base_address,
2396                                           rmrr->end_address);
2397 }
2398
2399 #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
2400 static inline void iommu_prepare_isa(void)
2401 {
2402         struct pci_dev *pdev;
2403         int ret;
2404
2405         pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2406         if (!pdev)
2407                 return;
2408
2409         printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
2410         ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1);
2411
2412         if (ret)
2413                 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
2414                        "floppy might not work\n");
2415
2416         pci_dev_put(pdev);
2417 }
2418 #else
2419 static inline void iommu_prepare_isa(void)
2420 {
2421         return;
2422 }
2423 #endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
2424
2425 static int md_domain_init(struct dmar_domain *domain, int guest_width);
2426
2427 static int __init si_domain_init(int hw)
2428 {
2429         struct dmar_drhd_unit *drhd;
2430         struct intel_iommu *iommu;
2431         int nid, ret = 0;
2432         bool first = true;
2433
2434         si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2435         if (!si_domain)
2436                 return -EFAULT;
2437
2438         for_each_active_iommu(iommu, drhd) {
2439                 ret = iommu_attach_domain(si_domain, iommu);
2440                 if (ret < 0) {
2441                         domain_exit(si_domain);
2442                         return -EFAULT;
2443                 } else if (first) {
2444                         si_domain->id = ret;
2445                         first = false;
2446                 } else if (si_domain->id != ret) {
2447                         domain_exit(si_domain);
2448                         return -EFAULT;
2449                 }
2450                 domain_attach_iommu(si_domain, iommu);
2451         }
2452
2453         if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2454                 domain_exit(si_domain);
2455                 return -EFAULT;
2456         }
2457
2458         pr_debug("IOMMU: identity mapping domain is domain %d\n",
2459                  si_domain->id);
2460
2461         if (hw)
2462                 return 0;
2463
2464         for_each_online_node(nid) {
2465                 unsigned long start_pfn, end_pfn;
2466                 int i;
2467
2468                 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2469                         ret = iommu_domain_identity_map(si_domain,
2470                                         PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2471                         if (ret)
2472                                 return ret;
2473                 }
2474         }
2475
2476         return 0;
2477 }
2478
2479 static int identity_mapping(struct device *dev)
2480 {
2481         struct device_domain_info *info;
2482
2483         if (likely(!iommu_identity_mapping))
2484                 return 0;
2485
2486         info = dev->archdata.iommu;
2487         if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2488                 return (info->domain == si_domain);
2489
2490         return 0;
2491 }
2492
2493 static int domain_add_dev_info(struct dmar_domain *domain,
2494                                struct device *dev, int translation)
2495 {
2496         struct dmar_domain *ndomain;
2497         struct intel_iommu *iommu;
2498         u8 bus, devfn;
2499         int ret;
2500
2501         iommu = device_to_iommu(dev, &bus, &devfn);
2502         if (!iommu)
2503                 return -ENODEV;
2504
2505         ndomain = dmar_insert_dev_info(iommu, bus, devfn, dev, domain);
2506         if (ndomain != domain)
2507                 return -EBUSY;
2508
2509         ret = domain_context_mapping(domain, dev, translation);
2510         if (ret) {
2511                 domain_remove_one_dev_info(domain, dev);
2512                 return ret;
2513         }
2514
2515         return 0;
2516 }
2517
2518 static bool device_has_rmrr(struct device *dev)
2519 {
2520         struct dmar_rmrr_unit *rmrr;
2521         struct device *tmp;
2522         int i;
2523
2524         rcu_read_lock();
2525         for_each_rmrr_units(rmrr) {
2526                 /*
2527                  * Return TRUE if this RMRR contains the device that
2528                  * is passed in.
2529                  */
2530                 for_each_active_dev_scope(rmrr->devices,
2531                                           rmrr->devices_cnt, i, tmp)
2532                         if (tmp == dev) {
2533                                 rcu_read_unlock();
2534                                 return true;
2535                         }
2536         }
2537         rcu_read_unlock();
2538         return false;
2539 }
2540
2541 /*
2542  * There are a couple cases where we need to restrict the functionality of
2543  * devices associated with RMRRs.  The first is when evaluating a device for
2544  * identity mapping because problems exist when devices are moved in and out
2545  * of domains and their respective RMRR information is lost.  This means that
2546  * a device with associated RMRRs will never be in a "passthrough" domain.
2547  * The second is use of the device through the IOMMU API.  This interface
2548  * expects to have full control of the IOVA space for the device.  We cannot
2549  * satisfy both the requirement that RMRR access is maintained and have an
2550  * unencumbered IOVA space.  We also have no ability to quiesce the device's
2551  * use of the RMRR space or even inform the IOMMU API user of the restriction.
2552  * We therefore prevent devices associated with an RMRR from participating in
2553  * the IOMMU API, which eliminates them from device assignment.
2554  *
2555  * In both cases we assume that PCI USB devices with RMRRs have them largely
2556  * for historical reasons and that the RMRR space is not actively used post
2557  * boot.  This exclusion may change if vendors begin to abuse it.
2558  */
2559 static bool device_is_rmrr_locked(struct device *dev)
2560 {
2561         if (!device_has_rmrr(dev))
2562                 return false;
2563
2564         if (dev_is_pci(dev)) {
2565                 struct pci_dev *pdev = to_pci_dev(dev);
2566
2567                 if ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
2568                         return false;
2569         }
2570
2571         return true;
2572 }
2573
2574 static int iommu_should_identity_map(struct device *dev, int startup)
2575 {
2576
2577         if (dev_is_pci(dev)) {
2578                 struct pci_dev *pdev = to_pci_dev(dev);
2579
2580                 if (device_is_rmrr_locked(dev))
2581                         return 0;
2582
2583                 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2584                         return 1;
2585
2586                 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2587                         return 1;
2588
2589                 if (!(iommu_identity_mapping & IDENTMAP_ALL))
2590                         return 0;
2591
2592                 /*
2593                  * We want to start off with all devices in the 1:1 domain, and
2594                  * take them out later if we find they can't access all of memory.
2595                  *
2596                  * However, we can't do this for PCI devices behind bridges,
2597                  * because all PCI devices behind the same bridge will end up
2598                  * with the same source-id on their transactions.
2599                  *
2600                  * Practically speaking, we can't change things around for these
2601                  * devices at run-time, because we can't be sure there'll be no
2602                  * DMA transactions in flight for any of their siblings.
2603                  *
2604                  * So PCI devices (unless they're on the root bus) as well as
2605                  * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2606                  * the 1:1 domain, just in _case_ one of their siblings turns out
2607                  * not to be able to map all of memory.
2608                  */
2609                 if (!pci_is_pcie(pdev)) {
2610                         if (!pci_is_root_bus(pdev->bus))
2611                                 return 0;
2612                         if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2613                                 return 0;
2614                 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
2615                         return 0;
2616         } else {
2617                 if (device_has_rmrr(dev))
2618                         return 0;
2619         }
2620
2621         /*
2622          * At boot time, we don't yet know if devices will be 64-bit capable.
2623          * Assume that they will — if they turn out not to be, then we can
2624          * take them out of the 1:1 domain later.
2625          */
2626         if (!startup) {
2627                 /*
2628                  * If the device's dma_mask is less than the system's memory
2629                  * size then this is not a candidate for identity mapping.
2630                  */
2631                 u64 dma_mask = *dev->dma_mask;
2632
2633                 if (dev->coherent_dma_mask &&
2634                     dev->coherent_dma_mask < dma_mask)
2635                         dma_mask = dev->coherent_dma_mask;
2636
2637                 return dma_mask >= dma_get_required_mask(dev);
2638         }
2639
2640         return 1;
2641 }
2642
2643 static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw)
2644 {
2645         int ret;
2646
2647         if (!iommu_should_identity_map(dev, 1))
2648                 return 0;
2649
2650         ret = domain_add_dev_info(si_domain, dev,
2651                                   hw ? CONTEXT_TT_PASS_THROUGH :
2652                                        CONTEXT_TT_MULTI_LEVEL);
2653         if (!ret)
2654                 pr_info("IOMMU: %s identity mapping for device %s\n",
2655                         hw ? "hardware" : "software", dev_name(dev));
2656         else if (ret == -ENODEV)
2657                 /* device not associated with an iommu */
2658                 ret = 0;
2659
2660         return ret;
2661 }
2662
2663
2664 static int __init iommu_prepare_static_identity_mapping(int hw)
2665 {
2666         struct pci_dev *pdev = NULL;
2667         struct dmar_drhd_unit *drhd;
2668         struct intel_iommu *iommu;
2669         struct device *dev;
2670         int i;
2671         int ret = 0;
2672
2673         ret = si_domain_init(hw);
2674         if (ret)
2675                 return -EFAULT;
2676
2677         for_each_pci_dev(pdev) {
2678                 ret = dev_prepare_static_identity_mapping(&pdev->dev, hw);
2679                 if (ret)
2680                         return ret;
2681         }
2682
2683         for_each_active_iommu(iommu, drhd)
2684                 for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) {
2685                         struct acpi_device_physical_node *pn;
2686                         struct acpi_device *adev;
2687
2688                         if (dev->bus != &acpi_bus_type)
2689                                 continue;
2690                                 
2691                         adev= to_acpi_device(dev);
2692                         mutex_lock(&adev->physical_node_lock);
2693                         list_for_each_entry(pn, &adev->physical_node_list, node) {
2694                                 ret = dev_prepare_static_identity_mapping(pn->dev, hw);
2695                                 if (ret)
2696                                         break;
2697                         }
2698                         mutex_unlock(&adev->physical_node_lock);
2699                         if (ret)
2700                                 return ret;
2701                 }
2702
2703         return 0;
2704 }
2705
2706 static void intel_iommu_init_qi(struct intel_iommu *iommu)
2707 {
2708         /*
2709          * Start from the sane iommu hardware state.
2710          * If the queued invalidation is already initialized by us
2711          * (for example, while enabling interrupt-remapping) then
2712          * we got the things already rolling from a sane state.
2713          */
2714         if (!iommu->qi) {
2715                 /*
2716                  * Clear any previous faults.
2717                  */
2718                 dmar_fault(-1, iommu);
2719                 /*
2720                  * Disable queued invalidation if supported and already enabled
2721                  * before OS handover.
2722                  */
2723                 dmar_disable_qi(iommu);
2724         }
2725
2726         if (dmar_enable_qi(iommu)) {
2727                 /*
2728                  * Queued Invalidate not enabled, use Register Based Invalidate
2729                  */
2730                 iommu->flush.flush_context = __iommu_flush_context;
2731                 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2732                 pr_info("IOMMU: %s using Register based invalidation\n",
2733                         iommu->name);
2734         } else {
2735                 iommu->flush.flush_context = qi_flush_context;
2736                 iommu->flush.flush_iotlb = qi_flush_iotlb;
2737                 pr_info("IOMMU: %s using Queued invalidation\n", iommu->name);
2738         }
2739 }
2740
2741 static int __init init_dmars(void)
2742 {
2743         struct dmar_drhd_unit *drhd;
2744         struct dmar_rmrr_unit *rmrr;
2745         struct device *dev;
2746         struct intel_iommu *iommu;
2747         int i, ret;
2748
2749         /*
2750          * for each drhd
2751          *    allocate root
2752          *    initialize and program root entry to not present
2753          * endfor
2754          */
2755         for_each_drhd_unit(drhd) {
2756                 /*
2757                  * lock not needed as this is only incremented in the single
2758                  * threaded kernel __init code path all other access are read
2759                  * only
2760                  */
2761                 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
2762                         g_num_of_iommus++;
2763                         continue;
2764                 }
2765                 printk_once(KERN_ERR "intel-iommu: exceeded %d IOMMUs\n",
2766                           DMAR_UNITS_SUPPORTED);
2767         }
2768
2769         /* Preallocate enough resources for IOMMU hot-addition */
2770         if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
2771                 g_num_of_iommus = DMAR_UNITS_SUPPORTED;
2772
2773         g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2774                         GFP_KERNEL);
2775         if (!g_iommus) {
2776                 printk(KERN_ERR "Allocating global iommu array failed\n");
2777                 ret = -ENOMEM;
2778                 goto error;
2779         }
2780
2781         deferred_flush = kzalloc(g_num_of_iommus *
2782                 sizeof(struct deferred_flush_tables), GFP_KERNEL);
2783         if (!deferred_flush) {
2784                 ret = -ENOMEM;
2785                 goto free_g_iommus;
2786         }
2787
2788         for_each_active_iommu(iommu, drhd) {
2789                 g_iommus[iommu->seq_id] = iommu;
2790
2791                 ret = iommu_init_domains(iommu);
2792                 if (ret)
2793                         goto free_iommu;
2794
2795                 /*
2796                  * TBD:
2797                  * we could share the same root & context tables
2798                  * among all IOMMU's. Need to Split it later.
2799                  */
2800                 ret = iommu_alloc_root_entry(iommu);
2801                 if (ret)
2802                         goto free_iommu;
2803                 if (!ecap_pass_through(iommu->ecap))
2804                         hw_pass_through = 0;
2805         }
2806
2807         for_each_active_iommu(iommu, drhd)
2808                 intel_iommu_init_qi(iommu);
2809
2810         if (iommu_pass_through)
2811                 iommu_identity_mapping |= IDENTMAP_ALL;
2812
2813 #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
2814         iommu_identity_mapping |= IDENTMAP_GFX;
2815 #endif
2816
2817         check_tylersburg_isoch();
2818
2819         /*
2820          * If pass through is not set or not enabled, setup context entries for
2821          * identity mappings for rmrr, gfx, and isa and may fall back to static
2822          * identity mapping if iommu_identity_mapping is set.
2823          */
2824         if (iommu_identity_mapping) {
2825                 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
2826                 if (ret) {
2827                         printk(KERN_CRIT "Failed to setup IOMMU pass-through\n");
2828                         goto free_iommu;
2829                 }
2830         }
2831         /*
2832          * For each rmrr
2833          *   for each dev attached to rmrr
2834          *   do
2835          *     locate drhd for dev, alloc domain for dev
2836          *     allocate free domain
2837          *     allocate page table entries for rmrr
2838          *     if context not allocated for bus
2839          *           allocate and init context
2840          *           set present in root table for this bus
2841          *     init context with domain, translation etc
2842          *    endfor
2843          * endfor
2844          */
2845         printk(KERN_INFO "IOMMU: Setting RMRR:\n");
2846         for_each_rmrr_units(rmrr) {
2847                 /* some BIOS lists non-exist devices in DMAR table. */
2848                 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
2849                                           i, dev) {
2850                         ret = iommu_prepare_rmrr_dev(rmrr, dev);
2851                         if (ret)
2852                                 printk(KERN_ERR
2853                                        "IOMMU: mapping reserved region failed\n");
2854                 }
2855         }
2856
2857         iommu_prepare_isa();
2858
2859         /*
2860          * for each drhd
2861          *   enable fault log
2862          *   global invalidate context cache
2863          *   global invalidate iotlb
2864          *   enable translation
2865          */
2866         for_each_iommu(iommu, drhd) {
2867                 if (drhd->ignored) {
2868                         /*
2869                          * we always have to disable PMRs or DMA may fail on
2870                          * this device
2871                          */
2872                         if (force_on)
2873                                 iommu_disable_protect_mem_regions(iommu);
2874                         continue;
2875                 }
2876
2877                 iommu_flush_write_buffer(iommu);
2878
2879                 ret = dmar_set_interrupt(iommu);
2880                 if (ret)
2881                         goto free_iommu;
2882
2883                 iommu_set_root_entry(iommu);
2884
2885                 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
2886                 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2887                 iommu_enable_translation(iommu);
2888                 iommu_disable_protect_mem_regions(iommu);
2889         }
2890
2891         return 0;
2892
2893 free_iommu:
2894         for_each_active_iommu(iommu, drhd) {
2895                 disable_dmar_iommu(iommu);
2896                 free_dmar_iommu(iommu);
2897         }
2898         kfree(deferred_flush);
2899 free_g_iommus:
2900         kfree(g_iommus);
2901 error:
2902         return ret;
2903 }
2904
2905 /* This takes a number of _MM_ pages, not VTD pages */
2906 static struct iova *intel_alloc_iova(struct device *dev,
2907                                      struct dmar_domain *domain,
2908                                      unsigned long nrpages, uint64_t dma_mask)
2909 {
2910         struct iova *iova = NULL;
2911
2912         /* Restrict dma_mask to the width that the iommu can handle */
2913         dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2914
2915         if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
2916                 /*
2917                  * First try to allocate an io virtual address in
2918                  * DMA_BIT_MASK(32) and if that fails then try allocating
2919                  * from higher range
2920                  */
2921                 iova = alloc_iova(&domain->iovad, nrpages,
2922                                   IOVA_PFN(DMA_BIT_MASK(32)), 1);
2923                 if (iova)
2924                         return iova;
2925         }
2926         iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2927         if (unlikely(!iova)) {
2928                 printk(KERN_ERR "Allocating %ld-page iova for %s failed",
2929                        nrpages, dev_name(dev));
2930                 return NULL;
2931         }
2932
2933         return iova;
2934 }
2935
2936 static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
2937 {
2938         struct dmar_domain *domain;
2939         int ret;
2940
2941         domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2942         if (!domain) {
2943                 printk(KERN_ERR "Allocating domain for %s failed",
2944                        dev_name(dev));
2945                 return NULL;
2946         }
2947
2948         /* make sure context mapping is ok */
2949         if (unlikely(!domain_context_mapped(dev))) {
2950                 ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL);
2951                 if (ret) {
2952                         printk(KERN_ERR "Domain context map for %s failed",
2953                                dev_name(dev));
2954                         return NULL;
2955                 }
2956         }
2957
2958         return domain;
2959 }
2960
2961 static inline struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
2962 {
2963         struct device_domain_info *info;
2964
2965         /* No lock here, assumes no domain exit in normal case */
2966         info = dev->archdata.iommu;
2967         if (likely(info))
2968                 return info->domain;
2969
2970         return __get_valid_domain_for_dev(dev);
2971 }
2972
2973 static int iommu_dummy(struct device *dev)
2974 {
2975         return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
2976 }
2977
2978 /* Check if the dev needs to go through non-identity map and unmap process.*/
2979 static int iommu_no_mapping(struct device *dev)
2980 {
2981         int found;
2982
2983         if (iommu_dummy(dev))
2984                 return 1;
2985
2986         if (!iommu_identity_mapping)
2987                 return 0;
2988
2989         found = identity_mapping(dev);
2990         if (found) {
2991                 if (iommu_should_identity_map(dev, 0))
2992                         return 1;
2993                 else {
2994                         /*
2995                          * 32 bit DMA is removed from si_domain and fall back
2996                          * to non-identity mapping.
2997                          */
2998                         domain_remove_one_dev_info(si_domain, dev);
2999                         printk(KERN_INFO "32bit %s uses non-identity mapping\n",
3000                                dev_name(dev));
3001                         return 0;
3002                 }
3003         } else {
3004                 /*
3005                  * In case of a detached 64 bit DMA device from vm, the device
3006                  * is put into si_domain for identity mapping.
3007                  */
3008                 if (iommu_should_identity_map(dev, 0)) {
3009                         int ret;
3010                         ret = domain_add_dev_info(si_domain, dev,
3011                                                   hw_pass_through ?
3012                                                   CONTEXT_TT_PASS_THROUGH :
3013                                                   CONTEXT_TT_MULTI_LEVEL);
3014                         if (!ret) {
3015                                 printk(KERN_INFO "64bit %s uses identity mapping\n",
3016                                        dev_name(dev));
3017                                 return 1;
3018                         }
3019                 }
3020         }
3021
3022         return 0;
3023 }
3024
3025 static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
3026                                      size_t size, int dir, u64 dma_mask)
3027 {
3028         struct dmar_domain *domain;
3029         phys_addr_t start_paddr;
3030         struct iova *iova;
3031         int prot = 0;
3032         int ret;
3033         struct intel_iommu *iommu;
3034         unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
3035
3036         BUG_ON(dir == DMA_NONE);
3037
3038         if (iommu_no_mapping(dev))
3039                 return paddr;
3040
3041         domain = get_valid_domain_for_dev(dev);
3042         if (!domain)
3043                 return 0;
3044
3045         iommu = domain_get_iommu(domain);
3046         size = aligned_nrpages(paddr, size);
3047
3048         iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
3049         if (!iova)
3050                 goto error;
3051
3052         /*
3053          * Check if DMAR supports zero-length reads on write only
3054          * mappings..
3055          */
3056         if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3057                         !cap_zlr(iommu->cap))
3058                 prot |= DMA_PTE_READ;
3059         if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3060                 prot |= DMA_PTE_WRITE;
3061         /*
3062          * paddr - (paddr + size) might be partial page, we should map the whole
3063          * page.  Note: if two part of one page are separately mapped, we
3064          * might have two guest_addr mapping to the same host paddr, but this
3065          * is not a big problem
3066          */
3067         ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
3068                                  mm_to_dma_pfn(paddr_pfn), size, prot);
3069         if (ret)
3070                 goto error;
3071
3072         /* it's a non-present to present mapping. Only flush if caching mode */
3073         if (cap_caching_mode(iommu->cap))
3074                 iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 0, 1);
3075         else
3076                 iommu_flush_write_buffer(iommu);
3077
3078         start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
3079         start_paddr += paddr & ~PAGE_MASK;
3080         return start_paddr;
3081
3082 error:
3083         if (iova)
3084                 __free_iova(&domain->iovad, iova);
3085         printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n",
3086                 dev_name(dev), size, (unsigned long long)paddr, dir);
3087         return 0;
3088 }
3089
3090 static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3091                                  unsigned long offset, size_t size,
3092                                  enum dma_data_direction dir,
3093                                  struct dma_attrs *attrs)
3094 {
3095         return __intel_map_single(dev, page_to_phys(page) + offset, size,
3096                                   dir, *dev->dma_mask);
3097 }
3098
3099 static void flush_unmaps(void)
3100 {
3101         int i, j;
3102
3103         timer_on = 0;
3104
3105         /* just flush them all */
3106         for (i = 0; i < g_num_of_iommus; i++) {
3107                 struct intel_iommu *iommu = g_iommus[i];
3108                 if (!iommu)
3109                         continue;
3110
3111                 if (!deferred_flush[i].next)
3112                         continue;
3113
3114                 /* In caching mode, global flushes turn emulation expensive */
3115                 if (!cap_caching_mode(iommu->cap))
3116                         iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3117                                          DMA_TLB_GLOBAL_FLUSH);
3118                 for (j = 0; j < deferred_flush[i].next; j++) {
3119                         unsigned long mask;
3120                         struct iova *iova = deferred_flush[i].iova[j];
3121                         struct dmar_domain *domain = deferred_flush[i].domain[j];
3122
3123                         /* On real hardware multiple invalidations are expensive */
3124                         if (cap_caching_mode(iommu->cap))
3125                                 iommu_flush_iotlb_psi(iommu, domain->id,
3126                                         iova->pfn_lo, iova_size(iova),
3127                                         !deferred_flush[i].freelist[j], 0);
3128                         else {
3129                                 mask = ilog2(mm_to_dma_pfn(iova_size(iova)));
3130                                 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
3131                                                 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
3132                         }
3133                         __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
3134                         if (deferred_flush[i].freelist[j])
3135                                 dma_free_pagelist(deferred_flush[i].freelist[j]);
3136                 }
3137                 deferred_flush[i].next = 0;
3138         }
3139
3140         list_size = 0;
3141 }
3142
3143 static void flush_unmaps_timeout(unsigned long data)
3144 {
3145         unsigned long flags;
3146
3147         spin_lock_irqsave(&async_umap_flush_lock, flags);
3148         flush_unmaps();
3149         spin_unlock_irqrestore(&async_umap_flush_lock, flags);
3150 }
3151
3152 static void add_unmap(struct dmar_domain *dom, struct iova *iova, struct page *freelist)
3153 {
3154         unsigned long flags;
3155         int next, iommu_id;
3156         struct intel_iommu *iommu;
3157
3158         spin_lock_irqsave(&async_umap_flush_lock, flags);
3159         if (list_size == HIGH_WATER_MARK)
3160                 flush_unmaps();
3161
3162         iommu = domain_get_iommu(dom);
3163         iommu_id = iommu->seq_id;
3164
3165         next = deferred_flush[iommu_id].next;
3166         deferred_flush[iommu_id].domain[next] = dom;
3167         deferred_flush[iommu_id].iova[next] = iova;
3168         deferred_flush[iommu_id].freelist[next] = freelist;
3169         deferred_flush[iommu_id].next++;
3170
3171         if (!timer_on) {
3172                 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
3173                 timer_on = 1;
3174         }
3175         list_size++;
3176         spin_unlock_irqrestore(&async_umap_flush_lock, flags);
3177 }
3178
3179 static void intel_unmap(struct device *dev, dma_addr_t dev_addr)
3180 {
3181         struct dmar_domain *domain;
3182         unsigned long start_pfn, last_pfn;
3183         struct iova *iova;
3184         struct intel_iommu *iommu;
3185         struct page *freelist;
3186
3187         if (iommu_no_mapping(dev))
3188                 return;
3189
3190         domain = find_domain(dev);
3191         BUG_ON(!domain);
3192
3193         iommu = domain_get_iommu(domain);
3194
3195         iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
3196         if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
3197                       (unsigned long long)dev_addr))
3198                 return;
3199
3200         start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3201         last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
3202
3203         pr_debug("Device %s unmapping: pfn %lx-%lx\n",
3204                  dev_name(dev), start_pfn, last_pfn);
3205
3206         freelist = domain_unmap(domain, start_pfn, last_pfn);
3207
3208         if (intel_iommu_strict) {
3209                 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
3210                                       last_pfn - start_pfn + 1, !freelist, 0);
3211                 /* free iova */
3212                 __free_iova(&domain->iovad, iova);
3213                 dma_free_pagelist(freelist);
3214         } else {
3215                 add_unmap(domain, iova, freelist);
3216                 /*
3217                  * queue up the release of the unmap to save the 1/6th of the
3218                  * cpu used up by the iotlb flush operation...
3219                  */
3220         }
3221 }
3222
3223 static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3224                              size_t size, enum dma_data_direction dir,
3225                              struct dma_attrs *attrs)
3226 {
3227         intel_unmap(dev, dev_addr);
3228 }
3229
3230 static void *intel_alloc_coherent(struct device *dev, size_t size,
3231                                   dma_addr_t *dma_handle, gfp_t flags,
3232                                   struct dma_attrs *attrs)
3233 {
3234         struct page *page = NULL;
3235         int order;
3236
3237         size = PAGE_ALIGN(size);
3238         order = get_order(size);
3239
3240         if (!iommu_no_mapping(dev))
3241                 flags &= ~(GFP_DMA | GFP_DMA32);
3242         else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) {
3243                 if (dev->coherent_dma_mask < DMA_BIT_MASK(32))
3244                         flags |= GFP_DMA;
3245                 else
3246                         flags |= GFP_DMA32;
3247         }
3248
3249         if (flags & __GFP_WAIT) {
3250                 unsigned int count = size >> PAGE_SHIFT;
3251
3252                 page = dma_alloc_from_contiguous(dev, count, order);
3253                 if (page && iommu_no_mapping(dev) &&
3254                     page_to_phys(page) + size > dev->coherent_dma_mask) {
3255                         dma_release_from_contiguous(dev, page, count);
3256                         page = NULL;
3257                 }
3258         }
3259
3260         if (!page)
3261                 page = alloc_pages(flags, order);
3262         if (!page)
3263                 return NULL;
3264         memset(page_address(page), 0, size);
3265
3266         *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
3267                                          DMA_BIDIRECTIONAL,
3268                                          dev->coherent_dma_mask);
3269         if (*dma_handle)
3270                 return page_address(page);
3271         if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3272                 __free_pages(page, order);
3273
3274         return NULL;
3275 }
3276
3277 static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
3278                                 dma_addr_t dma_handle, struct dma_attrs *attrs)
3279 {
3280         int order;
3281         struct page *page = virt_to_page(vaddr);
3282
3283         size = PAGE_ALIGN(size);
3284         order = get_order(size);
3285
3286         intel_unmap(dev, dma_handle);
3287         if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3288                 __free_pages(page, order);
3289 }
3290
3291 static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
3292                            int nelems, enum dma_data_direction dir,
3293                            struct dma_attrs *attrs)
3294 {
3295         intel_unmap(dev, sglist[0].dma_address);
3296 }
3297
3298 static int intel_nontranslate_map_sg(struct device *hddev,
3299         struct scatterlist *sglist, int nelems, int dir)
3300 {
3301         int i;
3302         struct scatterlist *sg;
3303
3304         for_each_sg(sglist, sg, nelems, i) {
3305                 BUG_ON(!sg_page(sg));
3306                 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
3307                 sg->dma_length = sg->length;
3308         }
3309         return nelems;
3310 }
3311
3312 static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
3313                         enum dma_data_direction dir, struct dma_attrs *attrs)
3314 {
3315         int i;
3316         struct dmar_domain *domain;
3317         size_t size = 0;
3318         int prot = 0;
3319         struct iova *iova = NULL;
3320         int ret;
3321         struct scatterlist *sg;
3322         unsigned long start_vpfn;
3323         struct intel_iommu *iommu;
3324
3325         BUG_ON(dir == DMA_NONE);
3326         if (iommu_no_mapping(dev))
3327                 return intel_nontranslate_map_sg(dev, sglist, nelems, dir);
3328
3329         domain = get_valid_domain_for_dev(dev);
3330         if (!domain)
3331                 return 0;
3332
3333         iommu = domain_get_iommu(domain);
3334
3335         for_each_sg(sglist, sg, nelems, i)
3336                 size += aligned_nrpages(sg->offset, sg->length);
3337
3338         iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
3339                                 *dev->dma_mask);
3340         if (!iova) {
3341                 sglist->dma_length = 0;
3342                 return 0;
3343         }
3344
3345         /*
3346          * Check if DMAR supports zero-length reads on write only
3347          * mappings..
3348          */
3349         if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3350                         !cap_zlr(iommu->cap))
3351                 prot |= DMA_PTE_READ;
3352         if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3353                 prot |= DMA_PTE_WRITE;
3354
3355         start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
3356
3357         ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
3358         if (unlikely(ret)) {
3359                 dma_pte_free_pagetable(domain, start_vpfn,
3360                                        start_vpfn + size - 1);
3361                 __free_iova(&domain->iovad, iova);
3362                 return 0;
3363         }
3364
3365         /* it's a non-present to present mapping. Only flush if caching mode */
3366         if (cap_caching_mode(iommu->cap))
3367                 iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 0, 1);
3368         else
3369                 iommu_flush_write_buffer(iommu);
3370
3371         return nelems;
3372 }
3373
3374 static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3375 {
3376         return !dma_addr;
3377 }
3378
3379 struct dma_map_ops intel_dma_ops = {
3380         .alloc = intel_alloc_coherent,
3381         .free = intel_free_coherent,
3382         .map_sg = intel_map_sg,
3383         .unmap_sg = intel_unmap_sg,
3384         .map_page = intel_map_page,
3385         .unmap_page = intel_unmap_page,
3386         .mapping_error = intel_mapping_error,
3387 };
3388
3389 static inline int iommu_domain_cache_init(void)
3390 {
3391         int ret = 0;
3392
3393         iommu_domain_cache = kmem_cache_create("iommu_domain",
3394                                          sizeof(struct dmar_domain),
3395                                          0,
3396                                          SLAB_HWCACHE_ALIGN,
3397
3398                                          NULL);
3399         if (!iommu_domain_cache) {
3400                 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
3401                 ret = -ENOMEM;
3402         }
3403
3404         return ret;
3405 }
3406
3407 static inline int iommu_devinfo_cache_init(void)
3408 {
3409         int ret = 0;
3410
3411         iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3412                                          sizeof(struct device_domain_info),
3413                                          0,
3414                                          SLAB_HWCACHE_ALIGN,
3415                                          NULL);
3416         if (!iommu_devinfo_cache) {
3417                 printk(KERN_ERR "Couldn't create devinfo cache\n");
3418                 ret = -ENOMEM;
3419         }
3420
3421         return ret;
3422 }
3423
3424 static int __init iommu_init_mempool(void)
3425 {
3426         int ret;
3427         ret = iommu_iova_cache_init();
3428         if (ret)
3429                 return ret;
3430
3431         ret = iommu_domain_cache_init();
3432         if (ret)
3433                 goto domain_error;
3434
3435         ret = iommu_devinfo_cache_init();
3436         if (!ret)
3437                 return ret;
3438
3439         kmem_cache_destroy(iommu_domain_cache);
3440 domain_error:
3441         iommu_iova_cache_destroy();
3442
3443         return -ENOMEM;
3444 }
3445
3446 static void __init iommu_exit_mempool(void)
3447 {
3448         kmem_cache_destroy(iommu_devinfo_cache);
3449         kmem_cache_destroy(iommu_domain_cache);
3450         iommu_iova_cache_destroy();
3451 }
3452
3453 static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3454 {
3455         struct dmar_drhd_unit *drhd;
3456         u32 vtbar;
3457         int rc;
3458
3459         /* We know that this device on this chipset has its own IOMMU.
3460          * If we find it under a different IOMMU, then the BIOS is lying
3461          * to us. Hope that the IOMMU for this device is actually
3462          * disabled, and it needs no translation...
3463          */
3464         rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3465         if (rc) {
3466                 /* "can't" happen */
3467                 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3468                 return;
3469         }
3470         vtbar &= 0xffff0000;
3471
3472         /* we know that the this iommu should be at offset 0xa000 from vtbar */
3473         drhd = dmar_find_matched_drhd_unit(pdev);
3474         if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3475                             TAINT_FIRMWARE_WORKAROUND,
3476                             "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3477                 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3478 }
3479 DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3480
3481 static void __init init_no_remapping_devices(void)
3482 {
3483         struct dmar_drhd_unit *drhd;
3484         struct device *dev;
3485         int i;
3486
3487         for_each_drhd_unit(drhd) {
3488                 if (!drhd->include_all) {
3489                         for_each_active_dev_scope(drhd->devices,
3490                                                   drhd->devices_cnt, i, dev)
3491                                 break;
3492                         /* ignore DMAR unit if no devices exist */
3493                         if (i == drhd->devices_cnt)
3494                                 drhd->ignored = 1;
3495                 }
3496         }
3497
3498         for_each_active_drhd_unit(drhd) {
3499                 if (drhd->include_all)
3500                         continue;
3501
3502                 for_each_active_dev_scope(drhd->devices,
3503                                           drhd->devices_cnt, i, dev)
3504                         if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
3505                                 break;
3506                 if (i < drhd->devices_cnt)
3507                         continue;
3508
3509                 /* This IOMMU has *only* gfx devices. Either bypass it or
3510                    set the gfx_mapped flag, as appropriate */
3511                 if (dmar_map_gfx) {
3512                         intel_iommu_gfx_mapped = 1;
3513                 } else {
3514                         drhd->ignored = 1;
3515                         for_each_active_dev_scope(drhd->devices,
3516                                                   drhd->devices_cnt, i, dev)
3517                                 dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3518                 }
3519         }
3520 }
3521
3522 #ifdef CONFIG_SUSPEND
3523 static int init_iommu_hw(void)
3524 {
3525         struct dmar_drhd_unit *drhd;
3526         struct intel_iommu *iommu = NULL;
3527
3528         for_each_active_iommu(iommu, drhd)
3529                 if (iommu->qi)
3530                         dmar_reenable_qi(iommu);
3531
3532         for_each_iommu(iommu, drhd) {
3533                 if (drhd->ignored) {
3534                         /*
3535                          * we always have to disable PMRs or DMA may fail on
3536                          * this device
3537                          */
3538                         if (force_on)
3539                                 iommu_disable_protect_mem_regions(iommu);
3540                         continue;
3541                 }
3542         
3543                 iommu_flush_write_buffer(iommu);
3544
3545                 iommu_set_root_entry(iommu);
3546
3547                 iommu->flush.flush_context(iommu, 0, 0, 0,
3548                                            DMA_CCMD_GLOBAL_INVL);
3549                 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3550                 iommu_enable_translation(iommu);
3551                 iommu_disable_protect_mem_regions(iommu);
3552         }
3553
3554         return 0;
3555 }
3556
3557 static void iommu_flush_all(void)
3558 {
3559         struct dmar_drhd_unit *drhd;
3560         struct intel_iommu *iommu;
3561
3562         for_each_active_iommu(iommu, drhd) {
3563                 iommu->flush.flush_context(iommu, 0, 0, 0,
3564                                            DMA_CCMD_GLOBAL_INVL);
3565                 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3566                                          DMA_TLB_GLOBAL_FLUSH);
3567         }
3568 }
3569
3570 static int iommu_suspend(void)
3571 {
3572         struct dmar_drhd_unit *drhd;
3573         struct intel_iommu *iommu = NULL;
3574         unsigned long flag;
3575
3576         for_each_active_iommu(iommu, drhd) {
3577                 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3578                                                  GFP_ATOMIC);
3579                 if (!iommu->iommu_state)
3580                         goto nomem;
3581         }
3582
3583         iommu_flush_all();
3584
3585         for_each_active_iommu(iommu, drhd) {
3586                 iommu_disable_translation(iommu);
3587
3588                 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3589
3590                 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3591                         readl(iommu->reg + DMAR_FECTL_REG);
3592                 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3593                         readl(iommu->reg + DMAR_FEDATA_REG);
3594                 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3595                         readl(iommu->reg + DMAR_FEADDR_REG);
3596                 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3597                         readl(iommu->reg + DMAR_FEUADDR_REG);
3598
3599                 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3600         }
3601         return 0;
3602
3603 nomem:
3604         for_each_active_iommu(iommu, drhd)
3605                 kfree(iommu->iommu_state);
3606
3607         return -ENOMEM;
3608 }
3609
3610 static void iommu_resume(void)
3611 {
3612         struct dmar_drhd_unit *drhd;
3613         struct intel_iommu *iommu = NULL;
3614         unsigned long flag;
3615
3616         if (init_iommu_hw()) {
3617                 if (force_on)
3618                         panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3619                 else
3620                         WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
3621                 return;
3622         }
3623
3624         for_each_active_iommu(iommu, drhd) {
3625
3626                 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3627
3628                 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3629                         iommu->reg + DMAR_FECTL_REG);
3630                 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3631                         iommu->reg + DMAR_FEDATA_REG);
3632                 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3633                         iommu->reg + DMAR_FEADDR_REG);
3634                 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3635                         iommu->reg + DMAR_FEUADDR_REG);
3636
3637                 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3638         }
3639
3640         for_each_active_iommu(iommu, drhd)
3641                 kfree(iommu->iommu_state);
3642 }
3643
3644 static struct syscore_ops iommu_syscore_ops = {
3645         .resume         = iommu_resume,
3646         .suspend        = iommu_suspend,
3647 };
3648
3649 static void __init init_iommu_pm_ops(void)
3650 {
3651         register_syscore_ops(&iommu_syscore_ops);
3652 }
3653
3654 #else
3655 static inline void init_iommu_pm_ops(void) {}
3656 #endif  /* CONFIG_PM */
3657
3658
3659 int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
3660 {
3661         struct acpi_dmar_reserved_memory *rmrr;
3662         struct dmar_rmrr_unit *rmrru;
3663
3664         rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
3665         if (!rmrru)
3666                 return -ENOMEM;
3667
3668         rmrru->hdr = header;
3669         rmrr = (struct acpi_dmar_reserved_memory *)header;
3670         rmrru->base_address = rmrr->base_address;
3671         rmrru->end_address = rmrr->end_address;
3672         rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
3673                                 ((void *)rmrr) + rmrr->header.length,
3674                                 &rmrru->devices_cnt);
3675         if (rmrru->devices_cnt && rmrru->devices == NULL) {
3676                 kfree(rmrru);
3677                 return -ENOMEM;
3678         }
3679
3680         list_add(&rmrru->list, &dmar_rmrr_units);
3681
3682         return 0;
3683 }
3684
3685 static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
3686 {
3687         struct dmar_atsr_unit *atsru;
3688         struct acpi_dmar_atsr *tmp;
3689
3690         list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
3691                 tmp = (struct acpi_dmar_atsr *)atsru->hdr;
3692                 if (atsr->segment != tmp->segment)
3693                         continue;
3694                 if (atsr->header.length != tmp->header.length)
3695                         continue;
3696                 if (memcmp(atsr, tmp, atsr->header.length) == 0)
3697                         return atsru;
3698         }
3699
3700         return NULL;
3701 }
3702
3703 int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
3704 {
3705         struct acpi_dmar_atsr *atsr;
3706         struct dmar_atsr_unit *atsru;
3707
3708         if (system_state != SYSTEM_BOOTING && !intel_iommu_enabled)
3709                 return 0;
3710
3711         atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3712         atsru = dmar_find_atsr(atsr);
3713         if (atsru)
3714                 return 0;
3715
3716         atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
3717         if (!atsru)
3718                 return -ENOMEM;
3719
3720         /*
3721          * If memory is allocated from slab by ACPI _DSM method, we need to
3722          * copy the memory content because the memory buffer will be freed
3723          * on return.
3724          */
3725         atsru->hdr = (void *)(atsru + 1);
3726         memcpy(atsru->hdr, hdr, hdr->length);
3727         atsru->include_all = atsr->flags & 0x1;
3728         if (!atsru->include_all) {
3729                 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
3730                                 (void *)atsr + atsr->header.length,
3731                                 &atsru->devices_cnt);
3732                 if (atsru->devices_cnt && atsru->devices == NULL) {
3733                         kfree(atsru);
3734                         return -ENOMEM;
3735                 }
3736         }
3737
3738         list_add_rcu(&atsru->list, &dmar_atsr_units);
3739
3740         return 0;
3741 }
3742
3743 static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
3744 {
3745         dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
3746         kfree(atsru);
3747 }
3748
3749 int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
3750 {
3751         struct acpi_dmar_atsr *atsr;
3752         struct dmar_atsr_unit *atsru;
3753
3754         atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3755         atsru = dmar_find_atsr(atsr);
3756         if (atsru) {
3757                 list_del_rcu(&atsru->list);
3758                 synchronize_rcu();
3759                 intel_iommu_free_atsr(atsru);
3760         }
3761
3762         return 0;
3763 }
3764
3765 int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
3766 {
3767         int i;
3768         struct device *dev;
3769         struct acpi_dmar_atsr *atsr;
3770         struct dmar_atsr_unit *atsru;
3771
3772         atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3773         atsru = dmar_find_atsr(atsr);
3774         if (!atsru)
3775                 return 0;
3776
3777         if (!atsru->include_all && atsru->devices && atsru->devices_cnt)
3778                 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
3779                                           i, dev)
3780                         return -EBUSY;
3781
3782         return 0;
3783 }
3784
3785 static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
3786 {
3787         int sp, ret = 0;
3788         struct intel_iommu *iommu = dmaru->iommu;
3789
3790         if (g_iommus[iommu->seq_id])
3791                 return 0;
3792
3793         if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
3794                 pr_warn("IOMMU: %s doesn't support hardware pass through.\n",
3795                         iommu->name);
3796                 return -ENXIO;
3797         }
3798         if (!ecap_sc_support(iommu->ecap) &&
3799             domain_update_iommu_snooping(iommu)) {
3800                 pr_warn("IOMMU: %s doesn't support snooping.\n",
3801                         iommu->name);
3802                 return -ENXIO;
3803         }
3804         sp = domain_update_iommu_superpage(iommu) - 1;
3805         if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
3806                 pr_warn("IOMMU: %s doesn't support large page.\n",
3807                         iommu->name);
3808                 return -ENXIO;
3809         }
3810
3811         /*
3812          * Disable translation if already enabled prior to OS handover.
3813          */
3814         if (iommu->gcmd & DMA_GCMD_TE)
3815                 iommu_disable_translation(iommu);
3816
3817         g_iommus[iommu->seq_id] = iommu;
3818         ret = iommu_init_domains(iommu);
3819         if (ret == 0)
3820                 ret = iommu_alloc_root_entry(iommu);
3821         if (ret)
3822                 goto out;
3823
3824         if (dmaru->ignored) {
3825                 /*
3826                  * we always have to disable PMRs or DMA may fail on this device
3827                  */
3828                 if (force_on)
3829                         iommu_disable_protect_mem_regions(iommu);
3830                 return 0;
3831         }
3832
3833         intel_iommu_init_qi(iommu);
3834         iommu_flush_write_buffer(iommu);
3835         ret = dmar_set_interrupt(iommu);
3836         if (ret)
3837                 goto disable_iommu;
3838
3839         iommu_set_root_entry(iommu);
3840         iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
3841         iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3842         iommu_enable_translation(iommu);
3843
3844         if (si_domain) {
3845                 ret = iommu_attach_domain(si_domain, iommu);
3846                 if (ret < 0 || si_domain->id != ret)
3847                         goto disable_iommu;
3848                 domain_attach_iommu(si_domain, iommu);
3849         }
3850
3851         iommu_disable_protect_mem_regions(iommu);
3852         return 0;
3853
3854 disable_iommu:
3855         disable_dmar_iommu(iommu);
3856 out:
3857         free_dmar_iommu(iommu);
3858         return ret;
3859 }
3860
3861 int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
3862 {
3863         int ret = 0;
3864         struct intel_iommu *iommu = dmaru->iommu;
3865
3866         if (!intel_iommu_enabled)
3867                 return 0;
3868         if (iommu == NULL)
3869                 return -EINVAL;
3870
3871         if (insert) {
3872                 ret = intel_iommu_add(dmaru);
3873         } else {
3874                 disable_dmar_iommu(iommu);
3875                 free_dmar_iommu(iommu);
3876         }
3877
3878         return ret;
3879 }
3880
3881 static void intel_iommu_free_dmars(void)
3882 {
3883         struct dmar_rmrr_unit *rmrru, *rmrr_n;
3884         struct dmar_atsr_unit *atsru, *atsr_n;
3885
3886         list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
3887                 list_del(&rmrru->list);
3888                 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
3889                 kfree(rmrru);
3890         }
3891
3892         list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
3893                 list_del(&atsru->list);
3894                 intel_iommu_free_atsr(atsru);
3895         }
3896 }
3897
3898 int dmar_find_matched_atsr_unit(struct pci_dev *dev)
3899 {
3900         int i, ret = 1;
3901         struct pci_bus *bus;
3902         struct pci_dev *bridge = NULL;
3903         struct device *tmp;
3904         struct acpi_dmar_atsr *atsr;
3905         struct dmar_atsr_unit *atsru;
3906
3907         dev = pci_physfn(dev);
3908         for (bus = dev->bus; bus; bus = bus->parent) {
3909                 bridge = bus->self;
3910                 if (!bridge || !pci_is_pcie(bridge) ||
3911                     pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
3912                         return 0;
3913                 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
3914                         break;
3915         }
3916         if (!bridge)
3917                 return 0;
3918
3919         rcu_read_lock();
3920         list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
3921                 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3922                 if (atsr->segment != pci_domain_nr(dev->bus))
3923                         continue;
3924
3925                 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
3926                         if (tmp == &bridge->dev)
3927                                 goto out;
3928
3929                 if (atsru->include_all)
3930                         goto out;
3931         }
3932         ret = 0;
3933 out:
3934         rcu_read_unlock();
3935
3936         return ret;
3937 }
3938
3939 int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
3940 {
3941         int ret = 0;
3942         struct dmar_rmrr_unit *rmrru;
3943         struct dmar_atsr_unit *atsru;
3944         struct acpi_dmar_atsr *atsr;
3945         struct acpi_dmar_reserved_memory *rmrr;
3946
3947         if (!intel_iommu_enabled && system_state != SYSTEM_BOOTING)
3948                 return 0;
3949
3950         list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
3951                 rmrr = container_of(rmrru->hdr,
3952                                     struct acpi_dmar_reserved_memory, header);
3953                 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
3954                         ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
3955                                 ((void *)rmrr) + rmrr->header.length,
3956                                 rmrr->segment, rmrru->devices,
3957                                 rmrru->devices_cnt);
3958                         if(ret < 0)
3959                                 return ret;
3960                 } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
3961                         dmar_remove_dev_scope(info, rmrr->segment,
3962                                 rmrru->devices, rmrru->devices_cnt);
3963                 }
3964         }
3965
3966         list_for_each_entry(atsru, &dmar_atsr_units, list) {
3967                 if (atsru->include_all)
3968                         continue;
3969
3970                 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3971                 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
3972                         ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
3973                                         (void *)atsr + atsr->header.length,
3974                                         atsr->segment, atsru->devices,
3975                                         atsru->devices_cnt);
3976                         if (ret > 0)
3977                                 break;
3978                         else if(ret < 0)
3979                                 return ret;
3980                 } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
3981                         if (dmar_remove_dev_scope(info, atsr->segment,
3982                                         atsru->devices, atsru->devices_cnt))
3983                                 break;
3984                 }
3985         }
3986
3987         return 0;
3988 }
3989
3990 /*
3991  * Here we only respond to action of unbound device from driver.
3992  *
3993  * Added device is not attached to its DMAR domain here yet. That will happen
3994  * when mapping the device to iova.
3995  */
3996 static int device_notifier(struct notifier_block *nb,
3997                                   unsigned long action, void *data)
3998 {
3999         struct device *dev = data;
4000         struct dmar_domain *domain;
4001
4002         if (iommu_dummy(dev))
4003                 return 0;
4004
4005         if (action != BUS_NOTIFY_REMOVED_DEVICE)
4006                 return 0;
4007
4008         domain = find_domain(dev);
4009         if (!domain)
4010                 return 0;
4011
4012         down_read(&dmar_global_lock);
4013         domain_remove_one_dev_info(domain, dev);
4014         if (!domain_type_is_vm_or_si(domain) && list_empty(&domain->devices))
4015                 domain_exit(domain);
4016         up_read(&dmar_global_lock);
4017
4018         return 0;
4019 }
4020
4021 static struct notifier_block device_nb = {
4022         .notifier_call = device_notifier,
4023 };
4024
4025 static int intel_iommu_memory_notifier(struct notifier_block *nb,
4026                                        unsigned long val, void *v)
4027 {
4028         struct memory_notify *mhp = v;
4029         unsigned long long start, end;
4030         unsigned long start_vpfn, last_vpfn;
4031
4032         switch (val) {
4033         case MEM_GOING_ONLINE:
4034                 start = mhp->start_pfn << PAGE_SHIFT;
4035                 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
4036                 if (iommu_domain_identity_map(si_domain, start, end)) {
4037                         pr_warn("dmar: failed to build identity map for [%llx-%llx]\n",
4038                                 start, end);
4039                         return NOTIFY_BAD;
4040                 }
4041                 break;
4042
4043         case MEM_OFFLINE:
4044         case MEM_CANCEL_ONLINE:
4045                 start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
4046                 last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
4047                 while (start_vpfn <= last_vpfn) {
4048                         struct iova *iova;
4049                         struct dmar_drhd_unit *drhd;
4050                         struct intel_iommu *iommu;
4051                         struct page *freelist;
4052
4053                         iova = find_iova(&si_domain->iovad, start_vpfn);
4054                         if (iova == NULL) {
4055                                 pr_debug("dmar: failed get IOVA for PFN %lx\n",
4056                                          start_vpfn);
4057                                 break;
4058                         }
4059
4060                         iova = split_and_remove_iova(&si_domain->iovad, iova,
4061                                                      start_vpfn, last_vpfn);
4062                         if (iova == NULL) {
4063                                 pr_warn("dmar: failed to split IOVA PFN [%lx-%lx]\n",
4064                                         start_vpfn, last_vpfn);
4065                                 return NOTIFY_BAD;
4066                         }
4067
4068                         freelist = domain_unmap(si_domain, iova->pfn_lo,
4069                                                iova->pfn_hi);
4070
4071                         rcu_read_lock();
4072                         for_each_active_iommu(iommu, drhd)
4073                                 iommu_flush_iotlb_psi(iommu, si_domain->id,
4074                                         iova->pfn_lo, iova_size(iova),
4075                                         !freelist, 0);
4076                         rcu_read_unlock();
4077                         dma_free_pagelist(freelist);
4078
4079                         start_vpfn = iova->pfn_hi + 1;
4080                         free_iova_mem(iova);
4081                 }
4082                 break;
4083         }
4084
4085         return NOTIFY_OK;
4086 }
4087
4088 static struct notifier_block intel_iommu_memory_nb = {
4089         .notifier_call = intel_iommu_memory_notifier,
4090         .priority = 0
4091 };
4092
4093
4094 static ssize_t intel_iommu_show_version(struct device *dev,
4095                                         struct device_attribute *attr,
4096                                         char *buf)
4097 {
4098         struct intel_iommu *iommu = dev_get_drvdata(dev);
4099         u32 ver = readl(iommu->reg + DMAR_VER_REG);
4100         return sprintf(buf, "%d:%d\n",
4101                        DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
4102 }
4103 static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
4104
4105 static ssize_t intel_iommu_show_address(struct device *dev,
4106                                         struct device_attribute *attr,
4107                                         char *buf)
4108 {
4109         struct intel_iommu *iommu = dev_get_drvdata(dev);
4110         return sprintf(buf, "%llx\n", iommu->reg_phys);
4111 }
4112 static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
4113
4114 static ssize_t intel_iommu_show_cap(struct device *dev,
4115                                     struct device_attribute *attr,
4116                                     char *buf)
4117 {
4118         struct intel_iommu *iommu = dev_get_drvdata(dev);
4119         return sprintf(buf, "%llx\n", iommu->cap);
4120 }
4121 static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
4122
4123 static ssize_t intel_iommu_show_ecap(struct device *dev,
4124                                     struct device_attribute *attr,
4125                                     char *buf)
4126 {
4127         struct intel_iommu *iommu = dev_get_drvdata(dev);
4128         return sprintf(buf, "%llx\n", iommu->ecap);
4129 }
4130 static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
4131
4132 static struct attribute *intel_iommu_attrs[] = {
4133         &dev_attr_version.attr,
4134         &dev_attr_address.attr,
4135         &dev_attr_cap.attr,
4136         &dev_attr_ecap.attr,
4137         NULL,
4138 };
4139
4140 static struct attribute_group intel_iommu_group = {
4141         .name = "intel-iommu",
4142         .attrs = intel_iommu_attrs,
4143 };
4144
4145 const struct attribute_group *intel_iommu_groups[] = {
4146         &intel_iommu_group,
4147         NULL,
4148 };
4149
4150 int __init intel_iommu_init(void)
4151 {
4152         int ret = -ENODEV;
4153         struct dmar_drhd_unit *drhd;
4154         struct intel_iommu *iommu;
4155
4156         /* VT-d is required for a TXT/tboot launch, so enforce that */
4157         force_on = tboot_force_iommu();
4158
4159         if (iommu_init_mempool()) {
4160                 if (force_on)
4161                         panic("tboot: Failed to initialize iommu memory\n");
4162                 return -ENOMEM;
4163         }
4164
4165         down_write(&dmar_global_lock);
4166         if (dmar_table_init()) {
4167                 if (force_on)
4168                         panic("tboot: Failed to initialize DMAR table\n");
4169                 goto out_free_dmar;
4170         }
4171
4172         /*
4173          * Disable translation if already enabled prior to OS handover.
4174          */
4175         for_each_active_iommu(iommu, drhd)
4176                 if (iommu->gcmd & DMA_GCMD_TE)
4177                         iommu_disable_translation(iommu);
4178
4179         if (dmar_dev_scope_init() < 0) {
4180                 if (force_on)
4181                         panic("tboot: Failed to initialize DMAR device scope\n");
4182                 goto out_free_dmar;
4183         }
4184
4185         if (no_iommu || dmar_disabled)
4186                 goto out_free_dmar;
4187
4188         if (list_empty(&dmar_rmrr_units))
4189                 printk(KERN_INFO "DMAR: No RMRR found\n");
4190
4191         if (list_empty(&dmar_atsr_units))
4192                 printk(KERN_INFO "DMAR: No ATSR found\n");
4193
4194         if (dmar_init_reserved_ranges()) {
4195                 if (force_on)
4196                         panic("tboot: Failed to reserve iommu ranges\n");
4197                 goto out_free_reserved_range;
4198         }
4199
4200         init_no_remapping_devices();
4201
4202         ret = init_dmars();
4203         if (ret) {
4204                 if (force_on)
4205                         panic("tboot: Failed to initialize DMARs\n");
4206                 printk(KERN_ERR "IOMMU: dmar init failed\n");
4207                 goto out_free_reserved_range;
4208         }
4209         up_write(&dmar_global_lock);
4210         printk(KERN_INFO
4211         "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
4212
4213         init_timer(&unmap_timer);
4214 #ifdef CONFIG_SWIOTLB
4215         swiotlb = 0;
4216 #endif
4217         dma_ops = &intel_dma_ops;
4218
4219         init_iommu_pm_ops();
4220
4221         for_each_active_iommu(iommu, drhd)
4222                 iommu->iommu_dev = iommu_device_create(NULL, iommu,
4223                                                        intel_iommu_groups,
4224                                                        iommu->name);
4225
4226         bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
4227         bus_register_notifier(&pci_bus_type, &device_nb);
4228         if (si_domain && !hw_pass_through)
4229                 register_memory_notifier(&intel_iommu_memory_nb);
4230
4231         intel_iommu_enabled = 1;
4232
4233         return 0;
4234
4235 out_free_reserved_range:
4236         put_iova_domain(&reserved_iova_list);
4237 out_free_dmar:
4238         intel_iommu_free_dmars();
4239         up_write(&dmar_global_lock);
4240         iommu_exit_mempool();
4241         return ret;
4242 }
4243
4244 static int iommu_detach_dev_cb(struct pci_dev *pdev, u16 alias, void *opaque)
4245 {
4246         struct intel_iommu *iommu = opaque;
4247
4248         iommu_detach_dev(iommu, PCI_BUS_NUM(alias), alias & 0xff);
4249         return 0;
4250 }
4251
4252 /*
4253  * NB - intel-iommu lacks any sort of reference counting for the users of
4254  * dependent devices.  If multiple endpoints have intersecting dependent
4255  * devices, unbinding the driver from any one of them will possibly leave
4256  * the others unable to operate.
4257  */
4258 static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
4259                                            struct device *dev)
4260 {
4261         if (!iommu || !dev || !dev_is_pci(dev))
4262                 return;
4263
4264         pci_for_each_dma_alias(to_pci_dev(dev), &iommu_detach_dev_cb, iommu);
4265 }
4266
4267 static void domain_remove_one_dev_info(struct dmar_domain *domain,
4268                                        struct device *dev)
4269 {
4270         struct device_domain_info *info, *tmp;
4271         struct intel_iommu *iommu;
4272         unsigned long flags;
4273         int found = 0;
4274         u8 bus, devfn;
4275
4276         iommu = device_to_iommu(dev, &bus, &devfn);
4277         if (!iommu)
4278                 return;
4279
4280         spin_lock_irqsave(&device_domain_lock, flags);
4281         list_for_each_entry_safe(info, tmp, &domain->devices, link) {
4282                 if (info->iommu == iommu && info->bus == bus &&
4283                     info->devfn == devfn) {
4284                         unlink_domain_info(info);
4285                         spin_unlock_irqrestore(&device_domain_lock, flags);
4286
4287                         iommu_disable_dev_iotlb(info);
4288                         iommu_detach_dev(iommu, info->bus, info->devfn);
4289                         iommu_detach_dependent_devices(iommu, dev);
4290                         free_devinfo_mem(info);
4291
4292                         spin_lock_irqsave(&device_domain_lock, flags);
4293
4294                         if (found)
4295                                 break;
4296                         else
4297                                 continue;
4298                 }
4299
4300                 /* if there is no other devices under the same iommu
4301                  * owned by this domain, clear this iommu in iommu_bmp
4302                  * update iommu count and coherency
4303                  */
4304                 if (info->iommu == iommu)
4305                         found = 1;
4306         }
4307
4308         spin_unlock_irqrestore(&device_domain_lock, flags);
4309
4310         if (found == 0) {
4311                 domain_detach_iommu(domain, iommu);
4312                 if (!domain_type_is_vm_or_si(domain))
4313                         iommu_detach_domain(domain, iommu);
4314         }
4315 }
4316
4317 static int md_domain_init(struct dmar_domain *domain, int guest_width)
4318 {
4319         int adjust_width;
4320
4321         init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN,
4322                         DMA_32BIT_PFN);
4323         domain_reserve_special_ranges(domain);
4324
4325         /* calculate AGAW */
4326         domain->gaw = guest_width;
4327         adjust_width = guestwidth_to_adjustwidth(guest_width);
4328         domain->agaw = width_to_agaw(adjust_width);
4329
4330         domain->iommu_coherency = 0;
4331         domain->iommu_snooping = 0;
4332         domain->iommu_superpage = 0;
4333         domain->max_addr = 0;
4334
4335         /* always allocate the top pgd */
4336         domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
4337         if (!domain->pgd)
4338                 return -ENOMEM;
4339         domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
4340         return 0;
4341 }
4342
4343 static int intel_iommu_domain_init(struct iommu_domain *domain)
4344 {
4345         struct dmar_domain *dmar_domain;
4346
4347         dmar_domain = alloc_domain(DOMAIN_FLAG_VIRTUAL_MACHINE);
4348         if (!dmar_domain) {
4349                 printk(KERN_ERR
4350                         "intel_iommu_domain_init: dmar_domain == NULL\n");
4351                 return -ENOMEM;
4352         }
4353         if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
4354                 printk(KERN_ERR
4355                         "intel_iommu_domain_init() failed\n");
4356                 domain_exit(dmar_domain);
4357                 return -ENOMEM;
4358         }
4359         domain_update_iommu_cap(dmar_domain);
4360         domain->priv = dmar_domain;
4361
4362         domain->geometry.aperture_start = 0;
4363         domain->geometry.aperture_end   = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
4364         domain->geometry.force_aperture = true;
4365
4366         return 0;
4367 }
4368
4369 static void intel_iommu_domain_destroy(struct iommu_domain *domain)
4370 {
4371         struct dmar_domain *dmar_domain = domain->priv;
4372
4373         domain->priv = NULL;
4374         domain_exit(dmar_domain);
4375 }
4376
4377 static int intel_iommu_attach_device(struct iommu_domain *domain,
4378                                      struct device *dev)
4379 {
4380         struct dmar_domain *dmar_domain = domain->priv;
4381         struct intel_iommu *iommu;
4382         int addr_width;
4383         u8 bus, devfn;
4384
4385         if (device_is_rmrr_locked(dev)) {
4386                 dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement.  Contact your platform vendor.\n");
4387                 return -EPERM;
4388         }
4389
4390         /* normally dev is not mapped */
4391         if (unlikely(domain_context_mapped(dev))) {
4392                 struct dmar_domain *old_domain;
4393
4394                 old_domain = find_domain(dev);
4395                 if (old_domain) {
4396                         if (domain_type_is_vm_or_si(dmar_domain))
4397                                 domain_remove_one_dev_info(old_domain, dev);
4398                         else
4399                                 domain_remove_dev_info(old_domain);
4400
4401                         if (!domain_type_is_vm_or_si(old_domain) &&
4402                              list_empty(&old_domain->devices))
4403                                 domain_exit(old_domain);
4404                 }
4405         }
4406
4407         iommu = device_to_iommu(dev, &bus, &devfn);
4408         if (!iommu)
4409                 return -ENODEV;
4410
4411         /* check if this iommu agaw is sufficient for max mapped address */
4412         addr_width = agaw_to_width(iommu->agaw);
4413         if (addr_width > cap_mgaw(iommu->cap))
4414                 addr_width = cap_mgaw(iommu->cap);
4415
4416         if (dmar_domain->max_addr > (1LL << addr_width)) {
4417                 printk(KERN_ERR "%s: iommu width (%d) is not "
4418                        "sufficient for the mapped address (%llx)\n",
4419                        __func__, addr_width, dmar_domain->max_addr);
4420                 return -EFAULT;
4421         }
4422         dmar_domain->gaw = addr_width;
4423
4424         /*
4425          * Knock out extra levels of page tables if necessary
4426          */
4427         while (iommu->agaw < dmar_domain->agaw) {
4428                 struct dma_pte *pte;
4429
4430                 pte = dmar_domain->pgd;
4431                 if (dma_pte_present(pte)) {
4432                         dmar_domain->pgd = (struct dma_pte *)
4433                                 phys_to_virt(dma_pte_addr(pte));
4434                         free_pgtable_page(pte);
4435                 }
4436                 dmar_domain->agaw--;
4437         }
4438
4439         return domain_add_dev_info(dmar_domain, dev, CONTEXT_TT_MULTI_LEVEL);
4440 }
4441
4442 static void intel_iommu_detach_device(struct iommu_domain *domain,
4443                                       struct device *dev)
4444 {
4445         struct dmar_domain *dmar_domain = domain->priv;
4446
4447         domain_remove_one_dev_info(dmar_domain, dev);
4448 }
4449
4450 static int intel_iommu_map(struct iommu_domain *domain,
4451                            unsigned long iova, phys_addr_t hpa,
4452                            size_t size, int iommu_prot)
4453 {
4454         struct dmar_domain *dmar_domain = domain->priv;
4455         u64 max_addr;
4456         int prot = 0;
4457         int ret;
4458
4459         if (iommu_prot & IOMMU_READ)
4460                 prot |= DMA_PTE_READ;
4461         if (iommu_prot & IOMMU_WRITE)
4462                 prot |= DMA_PTE_WRITE;
4463         if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
4464                 prot |= DMA_PTE_SNP;
4465
4466         max_addr = iova + size;
4467         if (dmar_domain->max_addr < max_addr) {
4468                 u64 end;
4469
4470                 /* check if minimum agaw is sufficient for mapped address */
4471                 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
4472                 if (end < max_addr) {
4473                         printk(KERN_ERR "%s: iommu width (%d) is not "
4474                                "sufficient for the mapped address (%llx)\n",
4475                                __func__, dmar_domain->gaw, max_addr);
4476                         return -EFAULT;
4477                 }
4478                 dmar_domain->max_addr = max_addr;
4479         }
4480         /* Round up size to next multiple of PAGE_SIZE, if it and
4481            the low bits of hpa would take us onto the next page */
4482         size = aligned_nrpages(hpa, size);
4483         ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
4484                                  hpa >> VTD_PAGE_SHIFT, size, prot);
4485         return ret;
4486 }
4487
4488 static size_t intel_iommu_unmap(struct iommu_domain *domain,
4489                                 unsigned long iova, size_t size)
4490 {
4491         struct dmar_domain *dmar_domain = domain->priv;
4492         struct page *freelist = NULL;
4493         struct intel_iommu *iommu;
4494         unsigned long start_pfn, last_pfn;
4495         unsigned int npages;
4496         int iommu_id, num, ndomains, level = 0;
4497
4498         /* Cope with horrid API which requires us to unmap more than the
4499            size argument if it happens to be a large-page mapping. */
4500         if (!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level))
4501                 BUG();
4502
4503         if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
4504                 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
4505
4506         start_pfn = iova >> VTD_PAGE_SHIFT;
4507         last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
4508
4509         freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
4510
4511         npages = last_pfn - start_pfn + 1;
4512
4513         for_each_set_bit(iommu_id, dmar_domain->iommu_bmp, g_num_of_iommus) {
4514                iommu = g_iommus[iommu_id];
4515
4516                /*
4517                 * find bit position of dmar_domain
4518                 */
4519                ndomains = cap_ndoms(iommu->cap);
4520                for_each_set_bit(num, iommu->domain_ids, ndomains) {
4521                        if (iommu->domains[num] == dmar_domain)
4522                                iommu_flush_iotlb_psi(iommu, num, start_pfn,
4523                                                      npages, !freelist, 0);
4524                }
4525
4526         }
4527
4528         dma_free_pagelist(freelist);
4529
4530         if (dmar_domain->max_addr == iova + size)
4531                 dmar_domain->max_addr = iova;
4532
4533         return size;
4534 }
4535
4536 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
4537                                             dma_addr_t iova)
4538 {
4539         struct dmar_domain *dmar_domain = domain->priv;
4540         struct dma_pte *pte;
4541         int level = 0;
4542         u64 phys = 0;
4543
4544         pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
4545         if (pte)
4546                 phys = dma_pte_addr(pte);
4547
4548         return phys;
4549 }
4550
4551 static bool intel_iommu_capable(enum iommu_cap cap)
4552 {
4553         if (cap == IOMMU_CAP_CACHE_COHERENCY)
4554                 return domain_update_iommu_snooping(NULL) == 1;
4555         if (cap == IOMMU_CAP_INTR_REMAP)
4556                 return irq_remapping_enabled == 1;
4557
4558         return false;
4559 }
4560
4561 static int intel_iommu_add_device(struct device *dev)
4562 {
4563         struct intel_iommu *iommu;
4564         struct iommu_group *group;
4565         u8 bus, devfn;
4566
4567         iommu = device_to_iommu(dev, &bus, &devfn);
4568         if (!iommu)
4569                 return -ENODEV;
4570
4571         iommu_device_link(iommu->iommu_dev, dev);
4572
4573         group = iommu_group_get_for_dev(dev);
4574
4575         if (IS_ERR(group))
4576                 return PTR_ERR(group);
4577
4578         iommu_group_put(group);
4579         return 0;
4580 }
4581
4582 static void intel_iommu_remove_device(struct device *dev)
4583 {
4584         struct intel_iommu *iommu;
4585         u8 bus, devfn;
4586
4587         iommu = device_to_iommu(dev, &bus, &devfn);
4588         if (!iommu)
4589                 return;
4590
4591         iommu_group_remove_device(dev);
4592
4593         iommu_device_unlink(iommu->iommu_dev, dev);
4594 }
4595
4596 static const struct iommu_ops intel_iommu_ops = {
4597         .capable        = intel_iommu_capable,
4598         .domain_init    = intel_iommu_domain_init,
4599         .domain_destroy = intel_iommu_domain_destroy,
4600         .attach_dev     = intel_iommu_attach_device,
4601         .detach_dev     = intel_iommu_detach_device,
4602         .map            = intel_iommu_map,
4603         .unmap          = intel_iommu_unmap,
4604         .map_sg         = default_iommu_map_sg,
4605         .iova_to_phys   = intel_iommu_iova_to_phys,
4606         .add_device     = intel_iommu_add_device,
4607         .remove_device  = intel_iommu_remove_device,
4608         .pgsize_bitmap  = INTEL_IOMMU_PGSIZES,
4609 };
4610
4611 static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
4612 {
4613         /* G4x/GM45 integrated gfx dmar support is totally busted. */
4614         printk(KERN_INFO "DMAR: Disabling IOMMU for graphics on this chipset\n");
4615         dmar_map_gfx = 0;
4616 }
4617
4618 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
4619 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
4620 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
4621 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
4622 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
4623 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
4624 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
4625
4626 static void quirk_iommu_rwbf(struct pci_dev *dev)
4627 {
4628         /*
4629          * Mobile 4 Series Chipset neglects to set RWBF capability,
4630          * but needs it. Same seems to hold for the desktop versions.
4631          */
4632         printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
4633         rwbf_quirk = 1;
4634 }
4635
4636 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
4637 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
4638 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
4639 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
4640 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
4641 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
4642 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
4643
4644 #define GGC 0x52
4645 #define GGC_MEMORY_SIZE_MASK    (0xf << 8)
4646 #define GGC_MEMORY_SIZE_NONE    (0x0 << 8)
4647 #define GGC_MEMORY_SIZE_1M      (0x1 << 8)
4648 #define GGC_MEMORY_SIZE_2M      (0x3 << 8)
4649 #define GGC_MEMORY_VT_ENABLED   (0x8 << 8)
4650 #define GGC_MEMORY_SIZE_2M_VT   (0x9 << 8)
4651 #define GGC_MEMORY_SIZE_3M_VT   (0xa << 8)
4652 #define GGC_MEMORY_SIZE_4M_VT   (0xb << 8)
4653
4654 static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
4655 {
4656         unsigned short ggc;
4657
4658         if (pci_read_config_word(dev, GGC, &ggc))
4659                 return;
4660
4661         if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
4662                 printk(KERN_INFO "DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
4663                 dmar_map_gfx = 0;
4664         } else if (dmar_map_gfx) {
4665                 /* we have to ensure the gfx device is idle before we flush */
4666                 printk(KERN_INFO "DMAR: Disabling batched IOTLB flush on Ironlake\n");
4667                 intel_iommu_strict = 1;
4668        }
4669 }
4670 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
4671 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
4672 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
4673 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
4674
4675 /* On Tylersburg chipsets, some BIOSes have been known to enable the
4676    ISOCH DMAR unit for the Azalia sound device, but not give it any
4677    TLB entries, which causes it to deadlock. Check for that.  We do
4678    this in a function called from init_dmars(), instead of in a PCI
4679    quirk, because we don't want to print the obnoxious "BIOS broken"
4680    message if VT-d is actually disabled.
4681 */
4682 static void __init check_tylersburg_isoch(void)
4683 {
4684         struct pci_dev *pdev;
4685         uint32_t vtisochctrl;
4686
4687         /* If there's no Azalia in the system anyway, forget it. */
4688         pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
4689         if (!pdev)
4690                 return;
4691         pci_dev_put(pdev);
4692
4693         /* System Management Registers. Might be hidden, in which case
4694            we can't do the sanity check. But that's OK, because the
4695            known-broken BIOSes _don't_ actually hide it, so far. */
4696         pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
4697         if (!pdev)
4698                 return;
4699
4700         if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
4701                 pci_dev_put(pdev);
4702                 return;
4703         }
4704
4705         pci_dev_put(pdev);
4706
4707         /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
4708         if (vtisochctrl & 1)
4709                 return;
4710
4711         /* Drop all bits other than the number of TLB entries */
4712         vtisochctrl &= 0x1c;
4713
4714         /* If we have the recommended number of TLB entries (16), fine. */
4715         if (vtisochctrl == 0x10)
4716                 return;
4717
4718         /* Zero TLB entries? You get to ride the short bus to school. */
4719         if (!vtisochctrl) {
4720                 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4721                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4722                      dmi_get_system_info(DMI_BIOS_VENDOR),
4723                      dmi_get_system_info(DMI_BIOS_VERSION),
4724                      dmi_get_system_info(DMI_PRODUCT_VERSION));
4725                 iommu_identity_mapping |= IDENTMAP_AZALIA;
4726                 return;
4727         }
4728         
4729         printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
4730                vtisochctrl);
4731 }