cd1ba24c766ad74368707e9ace7f0afd968a6ca3
[firefly-linux-kernel-4.4.55.git] / drivers / iommu / intel-iommu.c
1 /*
2  * Copyright © 2006-2014 Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * Authors: David Woodhouse <dwmw2@infradead.org>,
14  *          Ashok Raj <ashok.raj@intel.com>,
15  *          Shaohua Li <shaohua.li@intel.com>,
16  *          Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
17  *          Fenghua Yu <fenghua.yu@intel.com>
18  */
19
20 #include <linux/init.h>
21 #include <linux/bitmap.h>
22 #include <linux/debugfs.h>
23 #include <linux/export.h>
24 #include <linux/slab.h>
25 #include <linux/irq.h>
26 #include <linux/interrupt.h>
27 #include <linux/spinlock.h>
28 #include <linux/pci.h>
29 #include <linux/dmar.h>
30 #include <linux/dma-mapping.h>
31 #include <linux/mempool.h>
32 #include <linux/memory.h>
33 #include <linux/timer.h>
34 #include <linux/iova.h>
35 #include <linux/iommu.h>
36 #include <linux/intel-iommu.h>
37 #include <linux/syscore_ops.h>
38 #include <linux/tboot.h>
39 #include <linux/dmi.h>
40 #include <linux/pci-ats.h>
41 #include <linux/memblock.h>
42 #include <linux/dma-contiguous.h>
43 #include <asm/irq_remapping.h>
44 #include <asm/cacheflush.h>
45 #include <asm/iommu.h>
46
47 #include "irq_remapping.h"
48
49 #define ROOT_SIZE               VTD_PAGE_SIZE
50 #define CONTEXT_SIZE            VTD_PAGE_SIZE
51
52 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
53 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
54 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
55
56 #define IOAPIC_RANGE_START      (0xfee00000)
57 #define IOAPIC_RANGE_END        (0xfeefffff)
58 #define IOVA_START_ADDR         (0x1000)
59
60 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
61
62 #define MAX_AGAW_WIDTH 64
63 #define MAX_AGAW_PFN_WIDTH      (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
64
65 #define __DOMAIN_MAX_PFN(gaw)  ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
66 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
67
68 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
69    to match. That way, we can use 'unsigned long' for PFNs with impunity. */
70 #define DOMAIN_MAX_PFN(gaw)     ((unsigned long) min_t(uint64_t, \
71                                 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
72 #define DOMAIN_MAX_ADDR(gaw)    (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
73
74 #define IOVA_PFN(addr)          ((addr) >> PAGE_SHIFT)
75 #define DMA_32BIT_PFN           IOVA_PFN(DMA_BIT_MASK(32))
76 #define DMA_64BIT_PFN           IOVA_PFN(DMA_BIT_MASK(64))
77
78 /* page table handling */
79 #define LEVEL_STRIDE            (9)
80 #define LEVEL_MASK              (((u64)1 << LEVEL_STRIDE) - 1)
81
82 /*
83  * This bitmap is used to advertise the page sizes our hardware support
84  * to the IOMMU core, which will then use this information to split
85  * physically contiguous memory regions it is mapping into page sizes
86  * that we support.
87  *
88  * Traditionally the IOMMU core just handed us the mappings directly,
89  * after making sure the size is an order of a 4KiB page and that the
90  * mapping has natural alignment.
91  *
92  * To retain this behavior, we currently advertise that we support
93  * all page sizes that are an order of 4KiB.
94  *
95  * If at some point we'd like to utilize the IOMMU core's new behavior,
96  * we could change this to advertise the real page sizes we support.
97  */
98 #define INTEL_IOMMU_PGSIZES     (~0xFFFUL)
99
100 static inline int agaw_to_level(int agaw)
101 {
102         return agaw + 2;
103 }
104
105 static inline int agaw_to_width(int agaw)
106 {
107         return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
108 }
109
110 static inline int width_to_agaw(int width)
111 {
112         return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
113 }
114
115 static inline unsigned int level_to_offset_bits(int level)
116 {
117         return (level - 1) * LEVEL_STRIDE;
118 }
119
120 static inline int pfn_level_offset(unsigned long pfn, int level)
121 {
122         return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
123 }
124
125 static inline unsigned long level_mask(int level)
126 {
127         return -1UL << level_to_offset_bits(level);
128 }
129
130 static inline unsigned long level_size(int level)
131 {
132         return 1UL << level_to_offset_bits(level);
133 }
134
135 static inline unsigned long align_to_level(unsigned long pfn, int level)
136 {
137         return (pfn + level_size(level) - 1) & level_mask(level);
138 }
139
140 static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
141 {
142         return  1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
143 }
144
145 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
146    are never going to work. */
147 static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
148 {
149         return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
150 }
151
152 static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
153 {
154         return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
155 }
156 static inline unsigned long page_to_dma_pfn(struct page *pg)
157 {
158         return mm_to_dma_pfn(page_to_pfn(pg));
159 }
160 static inline unsigned long virt_to_dma_pfn(void *p)
161 {
162         return page_to_dma_pfn(virt_to_page(p));
163 }
164
165 /* global iommu list, set NULL for ignored DMAR units */
166 static struct intel_iommu **g_iommus;
167
168 static void __init check_tylersburg_isoch(void);
169 static int rwbf_quirk;
170
171 /*
172  * set to 1 to panic kernel if can't successfully enable VT-d
173  * (used when kernel is launched w/ TXT)
174  */
175 static int force_on = 0;
176
177 /*
178  * 0: Present
179  * 1-11: Reserved
180  * 12-63: Context Ptr (12 - (haw-1))
181  * 64-127: Reserved
182  */
183 struct root_entry {
184         u64     val;
185         u64     rsvd1;
186 };
187 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
188 static inline bool root_present(struct root_entry *root)
189 {
190         return (root->val & 1);
191 }
192 static inline void set_root_present(struct root_entry *root)
193 {
194         root->val |= 1;
195 }
196 static inline void set_root_value(struct root_entry *root, unsigned long value)
197 {
198         root->val |= value & VTD_PAGE_MASK;
199 }
200
201 static inline struct context_entry *
202 get_context_addr_from_root(struct root_entry *root)
203 {
204         return (struct context_entry *)
205                 (root_present(root)?phys_to_virt(
206                 root->val & VTD_PAGE_MASK) :
207                 NULL);
208 }
209
210 /*
211  * low 64 bits:
212  * 0: present
213  * 1: fault processing disable
214  * 2-3: translation type
215  * 12-63: address space root
216  * high 64 bits:
217  * 0-2: address width
218  * 3-6: aval
219  * 8-23: domain id
220  */
221 struct context_entry {
222         u64 lo;
223         u64 hi;
224 };
225
226 static inline bool context_present(struct context_entry *context)
227 {
228         return (context->lo & 1);
229 }
230 static inline void context_set_present(struct context_entry *context)
231 {
232         context->lo |= 1;
233 }
234
235 static inline void context_set_fault_enable(struct context_entry *context)
236 {
237         context->lo &= (((u64)-1) << 2) | 1;
238 }
239
240 static inline void context_set_translation_type(struct context_entry *context,
241                                                 unsigned long value)
242 {
243         context->lo &= (((u64)-1) << 4) | 3;
244         context->lo |= (value & 3) << 2;
245 }
246
247 static inline void context_set_address_root(struct context_entry *context,
248                                             unsigned long value)
249 {
250         context->lo |= value & VTD_PAGE_MASK;
251 }
252
253 static inline void context_set_address_width(struct context_entry *context,
254                                              unsigned long value)
255 {
256         context->hi |= value & 7;
257 }
258
259 static inline void context_set_domain_id(struct context_entry *context,
260                                          unsigned long value)
261 {
262         context->hi |= (value & ((1 << 16) - 1)) << 8;
263 }
264
265 static inline void context_clear_entry(struct context_entry *context)
266 {
267         context->lo = 0;
268         context->hi = 0;
269 }
270
271 /*
272  * 0: readable
273  * 1: writable
274  * 2-6: reserved
275  * 7: super page
276  * 8-10: available
277  * 11: snoop behavior
278  * 12-63: Host physcial address
279  */
280 struct dma_pte {
281         u64 val;
282 };
283
284 static inline void dma_clear_pte(struct dma_pte *pte)
285 {
286         pte->val = 0;
287 }
288
289 static inline u64 dma_pte_addr(struct dma_pte *pte)
290 {
291 #ifdef CONFIG_64BIT
292         return pte->val & VTD_PAGE_MASK;
293 #else
294         /* Must have a full atomic 64-bit read */
295         return  __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
296 #endif
297 }
298
299 static inline bool dma_pte_present(struct dma_pte *pte)
300 {
301         return (pte->val & 3) != 0;
302 }
303
304 static inline bool dma_pte_superpage(struct dma_pte *pte)
305 {
306         return (pte->val & DMA_PTE_LARGE_PAGE);
307 }
308
309 static inline int first_pte_in_page(struct dma_pte *pte)
310 {
311         return !((unsigned long)pte & ~VTD_PAGE_MASK);
312 }
313
314 /*
315  * This domain is a statically identity mapping domain.
316  *      1. This domain creats a static 1:1 mapping to all usable memory.
317  *      2. It maps to each iommu if successful.
318  *      3. Each iommu mapps to this domain if successful.
319  */
320 static struct dmar_domain *si_domain;
321 static int hw_pass_through = 1;
322
323 /* domain represents a virtual machine, more than one devices
324  * across iommus may be owned in one domain, e.g. kvm guest.
325  */
326 #define DOMAIN_FLAG_VIRTUAL_MACHINE     (1 << 0)
327
328 /* si_domain contains mulitple devices */
329 #define DOMAIN_FLAG_STATIC_IDENTITY     (1 << 1)
330
331 /* define the limit of IOMMUs supported in each domain */
332 #ifdef  CONFIG_X86
333 # define        IOMMU_UNITS_SUPPORTED   MAX_IO_APICS
334 #else
335 # define        IOMMU_UNITS_SUPPORTED   64
336 #endif
337
338 struct dmar_domain {
339         int     id;                     /* domain id */
340         int     nid;                    /* node id */
341         DECLARE_BITMAP(iommu_bmp, IOMMU_UNITS_SUPPORTED);
342                                         /* bitmap of iommus this domain uses*/
343
344         struct list_head devices;       /* all devices' list */
345         struct iova_domain iovad;       /* iova's that belong to this domain */
346
347         struct dma_pte  *pgd;           /* virtual address */
348         int             gaw;            /* max guest address width */
349
350         /* adjusted guest address width, 0 is level 2 30-bit */
351         int             agaw;
352
353         int             flags;          /* flags to find out type of domain */
354
355         int             iommu_coherency;/* indicate coherency of iommu access */
356         int             iommu_snooping; /* indicate snooping control feature*/
357         int             iommu_count;    /* reference count of iommu */
358         int             iommu_superpage;/* Level of superpages supported:
359                                            0 == 4KiB (no superpages), 1 == 2MiB,
360                                            2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
361         spinlock_t      iommu_lock;     /* protect iommu set in domain */
362         u64             max_addr;       /* maximum mapped address */
363 };
364
365 /* PCI domain-device relationship */
366 struct device_domain_info {
367         struct list_head link;  /* link to domain siblings */
368         struct list_head global; /* link to global list */
369         u8 bus;                 /* PCI bus number */
370         u8 devfn;               /* PCI devfn number */
371         struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
372         struct intel_iommu *iommu; /* IOMMU used by this device */
373         struct dmar_domain *domain; /* pointer to domain */
374 };
375
376 struct dmar_rmrr_unit {
377         struct list_head list;          /* list of rmrr units   */
378         struct acpi_dmar_header *hdr;   /* ACPI header          */
379         u64     base_address;           /* reserved base address*/
380         u64     end_address;            /* reserved end address */
381         struct dmar_dev_scope *devices; /* target devices */
382         int     devices_cnt;            /* target device count */
383 };
384
385 struct dmar_atsr_unit {
386         struct list_head list;          /* list of ATSR units */
387         struct acpi_dmar_header *hdr;   /* ACPI header */
388         struct dmar_dev_scope *devices; /* target devices */
389         int devices_cnt;                /* target device count */
390         u8 include_all:1;               /* include all ports */
391 };
392
393 static LIST_HEAD(dmar_atsr_units);
394 static LIST_HEAD(dmar_rmrr_units);
395
396 #define for_each_rmrr_units(rmrr) \
397         list_for_each_entry(rmrr, &dmar_rmrr_units, list)
398
399 static void flush_unmaps_timeout(unsigned long data);
400
401 static DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0);
402
403 #define HIGH_WATER_MARK 250
404 struct deferred_flush_tables {
405         int next;
406         struct iova *iova[HIGH_WATER_MARK];
407         struct dmar_domain *domain[HIGH_WATER_MARK];
408         struct page *freelist[HIGH_WATER_MARK];
409 };
410
411 static struct deferred_flush_tables *deferred_flush;
412
413 /* bitmap for indexing intel_iommus */
414 static int g_num_of_iommus;
415
416 static DEFINE_SPINLOCK(async_umap_flush_lock);
417 static LIST_HEAD(unmaps_to_do);
418
419 static int timer_on;
420 static long list_size;
421
422 static void domain_exit(struct dmar_domain *domain);
423 static void domain_remove_dev_info(struct dmar_domain *domain);
424 static void domain_remove_one_dev_info(struct dmar_domain *domain,
425                                        struct device *dev);
426 static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
427                                            struct device *dev);
428 static int domain_detach_iommu(struct dmar_domain *domain,
429                                struct intel_iommu *iommu);
430
431 #ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
432 int dmar_disabled = 0;
433 #else
434 int dmar_disabled = 1;
435 #endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
436
437 int intel_iommu_enabled = 0;
438 EXPORT_SYMBOL_GPL(intel_iommu_enabled);
439
440 static int dmar_map_gfx = 1;
441 static int dmar_forcedac;
442 static int intel_iommu_strict;
443 static int intel_iommu_superpage = 1;
444
445 int intel_iommu_gfx_mapped;
446 EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
447
448 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
449 static DEFINE_SPINLOCK(device_domain_lock);
450 static LIST_HEAD(device_domain_list);
451
452 static const struct iommu_ops intel_iommu_ops;
453
454 static int __init intel_iommu_setup(char *str)
455 {
456         if (!str)
457                 return -EINVAL;
458         while (*str) {
459                 if (!strncmp(str, "on", 2)) {
460                         dmar_disabled = 0;
461                         printk(KERN_INFO "Intel-IOMMU: enabled\n");
462                 } else if (!strncmp(str, "off", 3)) {
463                         dmar_disabled = 1;
464                         printk(KERN_INFO "Intel-IOMMU: disabled\n");
465                 } else if (!strncmp(str, "igfx_off", 8)) {
466                         dmar_map_gfx = 0;
467                         printk(KERN_INFO
468                                 "Intel-IOMMU: disable GFX device mapping\n");
469                 } else if (!strncmp(str, "forcedac", 8)) {
470                         printk(KERN_INFO
471                                 "Intel-IOMMU: Forcing DAC for PCI devices\n");
472                         dmar_forcedac = 1;
473                 } else if (!strncmp(str, "strict", 6)) {
474                         printk(KERN_INFO
475                                 "Intel-IOMMU: disable batched IOTLB flush\n");
476                         intel_iommu_strict = 1;
477                 } else if (!strncmp(str, "sp_off", 6)) {
478                         printk(KERN_INFO
479                                 "Intel-IOMMU: disable supported super page\n");
480                         intel_iommu_superpage = 0;
481                 }
482
483                 str += strcspn(str, ",");
484                 while (*str == ',')
485                         str++;
486         }
487         return 0;
488 }
489 __setup("intel_iommu=", intel_iommu_setup);
490
491 static struct kmem_cache *iommu_domain_cache;
492 static struct kmem_cache *iommu_devinfo_cache;
493 static struct kmem_cache *iommu_iova_cache;
494
495 static inline void *alloc_pgtable_page(int node)
496 {
497         struct page *page;
498         void *vaddr = NULL;
499
500         page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
501         if (page)
502                 vaddr = page_address(page);
503         return vaddr;
504 }
505
506 static inline void free_pgtable_page(void *vaddr)
507 {
508         free_page((unsigned long)vaddr);
509 }
510
511 static inline void *alloc_domain_mem(void)
512 {
513         return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
514 }
515
516 static void free_domain_mem(void *vaddr)
517 {
518         kmem_cache_free(iommu_domain_cache, vaddr);
519 }
520
521 static inline void * alloc_devinfo_mem(void)
522 {
523         return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
524 }
525
526 static inline void free_devinfo_mem(void *vaddr)
527 {
528         kmem_cache_free(iommu_devinfo_cache, vaddr);
529 }
530
531 struct iova *alloc_iova_mem(void)
532 {
533         return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC);
534 }
535
536 void free_iova_mem(struct iova *iova)
537 {
538         kmem_cache_free(iommu_iova_cache, iova);
539 }
540
541 static inline int domain_type_is_vm(struct dmar_domain *domain)
542 {
543         return domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE;
544 }
545
546 static inline int domain_type_is_vm_or_si(struct dmar_domain *domain)
547 {
548         return domain->flags & (DOMAIN_FLAG_VIRTUAL_MACHINE |
549                                 DOMAIN_FLAG_STATIC_IDENTITY);
550 }
551
552 static inline int domain_pfn_supported(struct dmar_domain *domain,
553                                        unsigned long pfn)
554 {
555         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
556
557         return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
558 }
559
560 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
561 {
562         unsigned long sagaw;
563         int agaw = -1;
564
565         sagaw = cap_sagaw(iommu->cap);
566         for (agaw = width_to_agaw(max_gaw);
567              agaw >= 0; agaw--) {
568                 if (test_bit(agaw, &sagaw))
569                         break;
570         }
571
572         return agaw;
573 }
574
575 /*
576  * Calculate max SAGAW for each iommu.
577  */
578 int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
579 {
580         return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
581 }
582
583 /*
584  * calculate agaw for each iommu.
585  * "SAGAW" may be different across iommus, use a default agaw, and
586  * get a supported less agaw for iommus that don't support the default agaw.
587  */
588 int iommu_calculate_agaw(struct intel_iommu *iommu)
589 {
590         return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
591 }
592
593 /* This functionin only returns single iommu in a domain */
594 static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
595 {
596         int iommu_id;
597
598         /* si_domain and vm domain should not get here. */
599         BUG_ON(domain_type_is_vm_or_si(domain));
600         iommu_id = find_first_bit(domain->iommu_bmp, g_num_of_iommus);
601         if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
602                 return NULL;
603
604         return g_iommus[iommu_id];
605 }
606
607 static void domain_update_iommu_coherency(struct dmar_domain *domain)
608 {
609         struct dmar_drhd_unit *drhd;
610         struct intel_iommu *iommu;
611         int i, found = 0;
612
613         domain->iommu_coherency = 1;
614
615         for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
616                 found = 1;
617                 if (!ecap_coherent(g_iommus[i]->ecap)) {
618                         domain->iommu_coherency = 0;
619                         break;
620                 }
621         }
622         if (found)
623                 return;
624
625         /* No hardware attached; use lowest common denominator */
626         rcu_read_lock();
627         for_each_active_iommu(iommu, drhd) {
628                 if (!ecap_coherent(iommu->ecap)) {
629                         domain->iommu_coherency = 0;
630                         break;
631                 }
632         }
633         rcu_read_unlock();
634 }
635
636 static void domain_update_iommu_snooping(struct dmar_domain *domain)
637 {
638         int i;
639
640         domain->iommu_snooping = 1;
641
642         for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
643                 if (!ecap_sc_support(g_iommus[i]->ecap)) {
644                         domain->iommu_snooping = 0;
645                         break;
646                 }
647         }
648 }
649
650 static void domain_update_iommu_superpage(struct dmar_domain *domain)
651 {
652         struct dmar_drhd_unit *drhd;
653         struct intel_iommu *iommu = NULL;
654         int mask = 0xf;
655
656         if (!intel_iommu_superpage) {
657                 domain->iommu_superpage = 0;
658                 return;
659         }
660
661         /* set iommu_superpage to the smallest common denominator */
662         rcu_read_lock();
663         for_each_active_iommu(iommu, drhd) {
664                 mask &= cap_super_page_val(iommu->cap);
665                 if (!mask) {
666                         break;
667                 }
668         }
669         rcu_read_unlock();
670
671         domain->iommu_superpage = fls(mask);
672 }
673
674 /* Some capabilities may be different across iommus */
675 static void domain_update_iommu_cap(struct dmar_domain *domain)
676 {
677         domain_update_iommu_coherency(domain);
678         domain_update_iommu_snooping(domain);
679         domain_update_iommu_superpage(domain);
680 }
681
682 static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
683 {
684         struct dmar_drhd_unit *drhd = NULL;
685         struct intel_iommu *iommu;
686         struct device *tmp;
687         struct pci_dev *ptmp, *pdev = NULL;
688         u16 segment = 0;
689         int i;
690
691         if (dev_is_pci(dev)) {
692                 pdev = to_pci_dev(dev);
693                 segment = pci_domain_nr(pdev->bus);
694         } else if (ACPI_COMPANION(dev))
695                 dev = &ACPI_COMPANION(dev)->dev;
696
697         rcu_read_lock();
698         for_each_active_iommu(iommu, drhd) {
699                 if (pdev && segment != drhd->segment)
700                         continue;
701
702                 for_each_active_dev_scope(drhd->devices,
703                                           drhd->devices_cnt, i, tmp) {
704                         if (tmp == dev) {
705                                 *bus = drhd->devices[i].bus;
706                                 *devfn = drhd->devices[i].devfn;
707                                 goto out;
708                         }
709
710                         if (!pdev || !dev_is_pci(tmp))
711                                 continue;
712
713                         ptmp = to_pci_dev(tmp);
714                         if (ptmp->subordinate &&
715                             ptmp->subordinate->number <= pdev->bus->number &&
716                             ptmp->subordinate->busn_res.end >= pdev->bus->number)
717                                 goto got_pdev;
718                 }
719
720                 if (pdev && drhd->include_all) {
721                 got_pdev:
722                         *bus = pdev->bus->number;
723                         *devfn = pdev->devfn;
724                         goto out;
725                 }
726         }
727         iommu = NULL;
728  out:
729         rcu_read_unlock();
730
731         return iommu;
732 }
733
734 static void domain_flush_cache(struct dmar_domain *domain,
735                                void *addr, int size)
736 {
737         if (!domain->iommu_coherency)
738                 clflush_cache_range(addr, size);
739 }
740
741 /* Gets context entry for a given bus and devfn */
742 static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
743                 u8 bus, u8 devfn)
744 {
745         struct root_entry *root;
746         struct context_entry *context;
747         unsigned long phy_addr;
748         unsigned long flags;
749
750         spin_lock_irqsave(&iommu->lock, flags);
751         root = &iommu->root_entry[bus];
752         context = get_context_addr_from_root(root);
753         if (!context) {
754                 context = (struct context_entry *)
755                                 alloc_pgtable_page(iommu->node);
756                 if (!context) {
757                         spin_unlock_irqrestore(&iommu->lock, flags);
758                         return NULL;
759                 }
760                 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
761                 phy_addr = virt_to_phys((void *)context);
762                 set_root_value(root, phy_addr);
763                 set_root_present(root);
764                 __iommu_flush_cache(iommu, root, sizeof(*root));
765         }
766         spin_unlock_irqrestore(&iommu->lock, flags);
767         return &context[devfn];
768 }
769
770 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
771 {
772         struct root_entry *root;
773         struct context_entry *context;
774         int ret;
775         unsigned long flags;
776
777         spin_lock_irqsave(&iommu->lock, flags);
778         root = &iommu->root_entry[bus];
779         context = get_context_addr_from_root(root);
780         if (!context) {
781                 ret = 0;
782                 goto out;
783         }
784         ret = context_present(&context[devfn]);
785 out:
786         spin_unlock_irqrestore(&iommu->lock, flags);
787         return ret;
788 }
789
790 static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
791 {
792         struct root_entry *root;
793         struct context_entry *context;
794         unsigned long flags;
795
796         spin_lock_irqsave(&iommu->lock, flags);
797         root = &iommu->root_entry[bus];
798         context = get_context_addr_from_root(root);
799         if (context) {
800                 context_clear_entry(&context[devfn]);
801                 __iommu_flush_cache(iommu, &context[devfn], \
802                         sizeof(*context));
803         }
804         spin_unlock_irqrestore(&iommu->lock, flags);
805 }
806
807 static void free_context_table(struct intel_iommu *iommu)
808 {
809         struct root_entry *root;
810         int i;
811         unsigned long flags;
812         struct context_entry *context;
813
814         spin_lock_irqsave(&iommu->lock, flags);
815         if (!iommu->root_entry) {
816                 goto out;
817         }
818         for (i = 0; i < ROOT_ENTRY_NR; i++) {
819                 root = &iommu->root_entry[i];
820                 context = get_context_addr_from_root(root);
821                 if (context)
822                         free_pgtable_page(context);
823         }
824         free_pgtable_page(iommu->root_entry);
825         iommu->root_entry = NULL;
826 out:
827         spin_unlock_irqrestore(&iommu->lock, flags);
828 }
829
830 static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
831                                       unsigned long pfn, int *target_level)
832 {
833         struct dma_pte *parent, *pte = NULL;
834         int level = agaw_to_level(domain->agaw);
835         int offset;
836
837         BUG_ON(!domain->pgd);
838
839         if (!domain_pfn_supported(domain, pfn))
840                 /* Address beyond IOMMU's addressing capabilities. */
841                 return NULL;
842
843         parent = domain->pgd;
844
845         while (1) {
846                 void *tmp_page;
847
848                 offset = pfn_level_offset(pfn, level);
849                 pte = &parent[offset];
850                 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
851                         break;
852                 if (level == *target_level)
853                         break;
854
855                 if (!dma_pte_present(pte)) {
856                         uint64_t pteval;
857
858                         tmp_page = alloc_pgtable_page(domain->nid);
859
860                         if (!tmp_page)
861                                 return NULL;
862
863                         domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
864                         pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
865                         if (cmpxchg64(&pte->val, 0ULL, pteval))
866                                 /* Someone else set it while we were thinking; use theirs. */
867                                 free_pgtable_page(tmp_page);
868                         else
869                                 domain_flush_cache(domain, pte, sizeof(*pte));
870                 }
871                 if (level == 1)
872                         break;
873
874                 parent = phys_to_virt(dma_pte_addr(pte));
875                 level--;
876         }
877
878         if (!*target_level)
879                 *target_level = level;
880
881         return pte;
882 }
883
884
885 /* return address's pte at specific level */
886 static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
887                                          unsigned long pfn,
888                                          int level, int *large_page)
889 {
890         struct dma_pte *parent, *pte = NULL;
891         int total = agaw_to_level(domain->agaw);
892         int offset;
893
894         parent = domain->pgd;
895         while (level <= total) {
896                 offset = pfn_level_offset(pfn, total);
897                 pte = &parent[offset];
898                 if (level == total)
899                         return pte;
900
901                 if (!dma_pte_present(pte)) {
902                         *large_page = total;
903                         break;
904                 }
905
906                 if (dma_pte_superpage(pte)) {
907                         *large_page = total;
908                         return pte;
909                 }
910
911                 parent = phys_to_virt(dma_pte_addr(pte));
912                 total--;
913         }
914         return NULL;
915 }
916
917 /* clear last level pte, a tlb flush should be followed */
918 static void dma_pte_clear_range(struct dmar_domain *domain,
919                                 unsigned long start_pfn,
920                                 unsigned long last_pfn)
921 {
922         unsigned int large_page = 1;
923         struct dma_pte *first_pte, *pte;
924
925         BUG_ON(!domain_pfn_supported(domain, start_pfn));
926         BUG_ON(!domain_pfn_supported(domain, last_pfn));
927         BUG_ON(start_pfn > last_pfn);
928
929         /* we don't need lock here; nobody else touches the iova range */
930         do {
931                 large_page = 1;
932                 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
933                 if (!pte) {
934                         start_pfn = align_to_level(start_pfn + 1, large_page + 1);
935                         continue;
936                 }
937                 do {
938                         dma_clear_pte(pte);
939                         start_pfn += lvl_to_nr_pages(large_page);
940                         pte++;
941                 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
942
943                 domain_flush_cache(domain, first_pte,
944                                    (void *)pte - (void *)first_pte);
945
946         } while (start_pfn && start_pfn <= last_pfn);
947 }
948
949 static void dma_pte_free_level(struct dmar_domain *domain, int level,
950                                struct dma_pte *pte, unsigned long pfn,
951                                unsigned long start_pfn, unsigned long last_pfn)
952 {
953         pfn = max(start_pfn, pfn);
954         pte = &pte[pfn_level_offset(pfn, level)];
955
956         do {
957                 unsigned long level_pfn;
958                 struct dma_pte *level_pte;
959
960                 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
961                         goto next;
962
963                 level_pfn = pfn & level_mask(level - 1);
964                 level_pte = phys_to_virt(dma_pte_addr(pte));
965
966                 if (level > 2)
967                         dma_pte_free_level(domain, level - 1, level_pte,
968                                            level_pfn, start_pfn, last_pfn);
969
970                 /* If range covers entire pagetable, free it */
971                 if (!(start_pfn > level_pfn ||
972                       last_pfn < level_pfn + level_size(level) - 1)) {
973                         dma_clear_pte(pte);
974                         domain_flush_cache(domain, pte, sizeof(*pte));
975                         free_pgtable_page(level_pte);
976                 }
977 next:
978                 pfn += level_size(level);
979         } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
980 }
981
982 /* free page table pages. last level pte should already be cleared */
983 static void dma_pte_free_pagetable(struct dmar_domain *domain,
984                                    unsigned long start_pfn,
985                                    unsigned long last_pfn)
986 {
987         BUG_ON(!domain_pfn_supported(domain, start_pfn));
988         BUG_ON(!domain_pfn_supported(domain, last_pfn));
989         BUG_ON(start_pfn > last_pfn);
990
991         dma_pte_clear_range(domain, start_pfn, last_pfn);
992
993         /* We don't need lock here; nobody else touches the iova range */
994         dma_pte_free_level(domain, agaw_to_level(domain->agaw),
995                            domain->pgd, 0, start_pfn, last_pfn);
996
997         /* free pgd */
998         if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
999                 free_pgtable_page(domain->pgd);
1000                 domain->pgd = NULL;
1001         }
1002 }
1003
1004 /* When a page at a given level is being unlinked from its parent, we don't
1005    need to *modify* it at all. All we need to do is make a list of all the
1006    pages which can be freed just as soon as we've flushed the IOTLB and we
1007    know the hardware page-walk will no longer touch them.
1008    The 'pte' argument is the *parent* PTE, pointing to the page that is to
1009    be freed. */
1010 static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1011                                             int level, struct dma_pte *pte,
1012                                             struct page *freelist)
1013 {
1014         struct page *pg;
1015
1016         pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1017         pg->freelist = freelist;
1018         freelist = pg;
1019
1020         if (level == 1)
1021                 return freelist;
1022
1023         pte = page_address(pg);
1024         do {
1025                 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1026                         freelist = dma_pte_list_pagetables(domain, level - 1,
1027                                                            pte, freelist);
1028                 pte++;
1029         } while (!first_pte_in_page(pte));
1030
1031         return freelist;
1032 }
1033
1034 static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1035                                         struct dma_pte *pte, unsigned long pfn,
1036                                         unsigned long start_pfn,
1037                                         unsigned long last_pfn,
1038                                         struct page *freelist)
1039 {
1040         struct dma_pte *first_pte = NULL, *last_pte = NULL;
1041
1042         pfn = max(start_pfn, pfn);
1043         pte = &pte[pfn_level_offset(pfn, level)];
1044
1045         do {
1046                 unsigned long level_pfn;
1047
1048                 if (!dma_pte_present(pte))
1049                         goto next;
1050
1051                 level_pfn = pfn & level_mask(level);
1052
1053                 /* If range covers entire pagetable, free it */
1054                 if (start_pfn <= level_pfn &&
1055                     last_pfn >= level_pfn + level_size(level) - 1) {
1056                         /* These suborbinate page tables are going away entirely. Don't
1057                            bother to clear them; we're just going to *free* them. */
1058                         if (level > 1 && !dma_pte_superpage(pte))
1059                                 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1060
1061                         dma_clear_pte(pte);
1062                         if (!first_pte)
1063                                 first_pte = pte;
1064                         last_pte = pte;
1065                 } else if (level > 1) {
1066                         /* Recurse down into a level that isn't *entirely* obsolete */
1067                         freelist = dma_pte_clear_level(domain, level - 1,
1068                                                        phys_to_virt(dma_pte_addr(pte)),
1069                                                        level_pfn, start_pfn, last_pfn,
1070                                                        freelist);
1071                 }
1072 next:
1073                 pfn += level_size(level);
1074         } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1075
1076         if (first_pte)
1077                 domain_flush_cache(domain, first_pte,
1078                                    (void *)++last_pte - (void *)first_pte);
1079
1080         return freelist;
1081 }
1082
1083 /* We can't just free the pages because the IOMMU may still be walking
1084    the page tables, and may have cached the intermediate levels. The
1085    pages can only be freed after the IOTLB flush has been done. */
1086 struct page *domain_unmap(struct dmar_domain *domain,
1087                           unsigned long start_pfn,
1088                           unsigned long last_pfn)
1089 {
1090         struct page *freelist = NULL;
1091
1092         BUG_ON(!domain_pfn_supported(domain, start_pfn));
1093         BUG_ON(!domain_pfn_supported(domain, last_pfn));
1094         BUG_ON(start_pfn > last_pfn);
1095
1096         /* we don't need lock here; nobody else touches the iova range */
1097         freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1098                                        domain->pgd, 0, start_pfn, last_pfn, NULL);
1099
1100         /* free pgd */
1101         if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1102                 struct page *pgd_page = virt_to_page(domain->pgd);
1103                 pgd_page->freelist = freelist;
1104                 freelist = pgd_page;
1105
1106                 domain->pgd = NULL;
1107         }
1108
1109         return freelist;
1110 }
1111
1112 void dma_free_pagelist(struct page *freelist)
1113 {
1114         struct page *pg;
1115
1116         while ((pg = freelist)) {
1117                 freelist = pg->freelist;
1118                 free_pgtable_page(page_address(pg));
1119         }
1120 }
1121
1122 /* iommu handling */
1123 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1124 {
1125         struct root_entry *root;
1126         unsigned long flags;
1127
1128         root = (struct root_entry *)alloc_pgtable_page(iommu->node);
1129         if (!root)
1130                 return -ENOMEM;
1131
1132         __iommu_flush_cache(iommu, root, ROOT_SIZE);
1133
1134         spin_lock_irqsave(&iommu->lock, flags);
1135         iommu->root_entry = root;
1136         spin_unlock_irqrestore(&iommu->lock, flags);
1137
1138         return 0;
1139 }
1140
1141 static void iommu_set_root_entry(struct intel_iommu *iommu)
1142 {
1143         void *addr;
1144         u32 sts;
1145         unsigned long flag;
1146
1147         addr = iommu->root_entry;
1148
1149         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1150         dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
1151
1152         writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
1153
1154         /* Make sure hardware complete it */
1155         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1156                       readl, (sts & DMA_GSTS_RTPS), sts);
1157
1158         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1159 }
1160
1161 static void iommu_flush_write_buffer(struct intel_iommu *iommu)
1162 {
1163         u32 val;
1164         unsigned long flag;
1165
1166         if (!rwbf_quirk && !cap_rwbf(iommu->cap))
1167                 return;
1168
1169         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1170         writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
1171
1172         /* Make sure hardware complete it */
1173         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1174                       readl, (!(val & DMA_GSTS_WBFS)), val);
1175
1176         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1177 }
1178
1179 /* return value determine if we need a write buffer flush */
1180 static void __iommu_flush_context(struct intel_iommu *iommu,
1181                                   u16 did, u16 source_id, u8 function_mask,
1182                                   u64 type)
1183 {
1184         u64 val = 0;
1185         unsigned long flag;
1186
1187         switch (type) {
1188         case DMA_CCMD_GLOBAL_INVL:
1189                 val = DMA_CCMD_GLOBAL_INVL;
1190                 break;
1191         case DMA_CCMD_DOMAIN_INVL:
1192                 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1193                 break;
1194         case DMA_CCMD_DEVICE_INVL:
1195                 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1196                         | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1197                 break;
1198         default:
1199                 BUG();
1200         }
1201         val |= DMA_CCMD_ICC;
1202
1203         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1204         dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1205
1206         /* Make sure hardware complete it */
1207         IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1208                 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1209
1210         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1211 }
1212
1213 /* return value determine if we need a write buffer flush */
1214 static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1215                                 u64 addr, unsigned int size_order, u64 type)
1216 {
1217         int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1218         u64 val = 0, val_iva = 0;
1219         unsigned long flag;
1220
1221         switch (type) {
1222         case DMA_TLB_GLOBAL_FLUSH:
1223                 /* global flush doesn't need set IVA_REG */
1224                 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1225                 break;
1226         case DMA_TLB_DSI_FLUSH:
1227                 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1228                 break;
1229         case DMA_TLB_PSI_FLUSH:
1230                 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1231                 /* IH bit is passed in as part of address */
1232                 val_iva = size_order | addr;
1233                 break;
1234         default:
1235                 BUG();
1236         }
1237         /* Note: set drain read/write */
1238 #if 0
1239         /*
1240          * This is probably to be super secure.. Looks like we can
1241          * ignore it without any impact.
1242          */
1243         if (cap_read_drain(iommu->cap))
1244                 val |= DMA_TLB_READ_DRAIN;
1245 #endif
1246         if (cap_write_drain(iommu->cap))
1247                 val |= DMA_TLB_WRITE_DRAIN;
1248
1249         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1250         /* Note: Only uses first TLB reg currently */
1251         if (val_iva)
1252                 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1253         dmar_writeq(iommu->reg + tlb_offset + 8, val);
1254
1255         /* Make sure hardware complete it */
1256         IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1257                 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1258
1259         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1260
1261         /* check IOTLB invalidation granularity */
1262         if (DMA_TLB_IAIG(val) == 0)
1263                 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
1264         if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1265                 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
1266                         (unsigned long long)DMA_TLB_IIRG(type),
1267                         (unsigned long long)DMA_TLB_IAIG(val));
1268 }
1269
1270 static struct device_domain_info *
1271 iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1272                          u8 bus, u8 devfn)
1273 {
1274         int found = 0;
1275         unsigned long flags;
1276         struct device_domain_info *info;
1277         struct pci_dev *pdev;
1278
1279         if (!ecap_dev_iotlb_support(iommu->ecap))
1280                 return NULL;
1281
1282         if (!iommu->qi)
1283                 return NULL;
1284
1285         spin_lock_irqsave(&device_domain_lock, flags);
1286         list_for_each_entry(info, &domain->devices, link)
1287                 if (info->iommu == iommu && info->bus == bus &&
1288                     info->devfn == devfn) {
1289                         found = 1;
1290                         break;
1291                 }
1292         spin_unlock_irqrestore(&device_domain_lock, flags);
1293
1294         if (!found || !info->dev || !dev_is_pci(info->dev))
1295                 return NULL;
1296
1297         pdev = to_pci_dev(info->dev);
1298
1299         if (!pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS))
1300                 return NULL;
1301
1302         if (!dmar_find_matched_atsr_unit(pdev))
1303                 return NULL;
1304
1305         return info;
1306 }
1307
1308 static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1309 {
1310         if (!info || !dev_is_pci(info->dev))
1311                 return;
1312
1313         pci_enable_ats(to_pci_dev(info->dev), VTD_PAGE_SHIFT);
1314 }
1315
1316 static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1317 {
1318         if (!info->dev || !dev_is_pci(info->dev) ||
1319             !pci_ats_enabled(to_pci_dev(info->dev)))
1320                 return;
1321
1322         pci_disable_ats(to_pci_dev(info->dev));
1323 }
1324
1325 static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1326                                   u64 addr, unsigned mask)
1327 {
1328         u16 sid, qdep;
1329         unsigned long flags;
1330         struct device_domain_info *info;
1331
1332         spin_lock_irqsave(&device_domain_lock, flags);
1333         list_for_each_entry(info, &domain->devices, link) {
1334                 struct pci_dev *pdev;
1335                 if (!info->dev || !dev_is_pci(info->dev))
1336                         continue;
1337
1338                 pdev = to_pci_dev(info->dev);
1339                 if (!pci_ats_enabled(pdev))
1340                         continue;
1341
1342                 sid = info->bus << 8 | info->devfn;
1343                 qdep = pci_ats_queue_depth(pdev);
1344                 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1345         }
1346         spin_unlock_irqrestore(&device_domain_lock, flags);
1347 }
1348
1349 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
1350                                   unsigned long pfn, unsigned int pages, int ih, int map)
1351 {
1352         unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1353         uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1354
1355         BUG_ON(pages == 0);
1356
1357         if (ih)
1358                 ih = 1 << 6;
1359         /*
1360          * Fallback to domain selective flush if no PSI support or the size is
1361          * too big.
1362          * PSI requires page size to be 2 ^ x, and the base address is naturally
1363          * aligned to the size
1364          */
1365         if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1366                 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1367                                                 DMA_TLB_DSI_FLUSH);
1368         else
1369                 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
1370                                                 DMA_TLB_PSI_FLUSH);
1371
1372         /*
1373          * In caching mode, changes of pages from non-present to present require
1374          * flush. However, device IOTLB doesn't need to be flushed in this case.
1375          */
1376         if (!cap_caching_mode(iommu->cap) || !map)
1377                 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
1378 }
1379
1380 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1381 {
1382         u32 pmen;
1383         unsigned long flags;
1384
1385         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1386         pmen = readl(iommu->reg + DMAR_PMEN_REG);
1387         pmen &= ~DMA_PMEN_EPM;
1388         writel(pmen, iommu->reg + DMAR_PMEN_REG);
1389
1390         /* wait for the protected region status bit to clear */
1391         IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1392                 readl, !(pmen & DMA_PMEN_PRS), pmen);
1393
1394         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1395 }
1396
1397 static void iommu_enable_translation(struct intel_iommu *iommu)
1398 {
1399         u32 sts;
1400         unsigned long flags;
1401
1402         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1403         iommu->gcmd |= DMA_GCMD_TE;
1404         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1405
1406         /* Make sure hardware complete it */
1407         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1408                       readl, (sts & DMA_GSTS_TES), sts);
1409
1410         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1411 }
1412
1413 static void iommu_disable_translation(struct intel_iommu *iommu)
1414 {
1415         u32 sts;
1416         unsigned long flag;
1417
1418         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1419         iommu->gcmd &= ~DMA_GCMD_TE;
1420         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1421
1422         /* Make sure hardware complete it */
1423         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1424                       readl, (!(sts & DMA_GSTS_TES)), sts);
1425
1426         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1427 }
1428
1429
1430 static int iommu_init_domains(struct intel_iommu *iommu)
1431 {
1432         unsigned long ndomains;
1433         unsigned long nlongs;
1434
1435         ndomains = cap_ndoms(iommu->cap);
1436         pr_debug("IOMMU%d: Number of Domains supported <%ld>\n",
1437                  iommu->seq_id, ndomains);
1438         nlongs = BITS_TO_LONGS(ndomains);
1439
1440         spin_lock_init(&iommu->lock);
1441
1442         /* TBD: there might be 64K domains,
1443          * consider other allocation for future chip
1444          */
1445         iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1446         if (!iommu->domain_ids) {
1447                 pr_err("IOMMU%d: allocating domain id array failed\n",
1448                        iommu->seq_id);
1449                 return -ENOMEM;
1450         }
1451         iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1452                         GFP_KERNEL);
1453         if (!iommu->domains) {
1454                 pr_err("IOMMU%d: allocating domain array failed\n",
1455                        iommu->seq_id);
1456                 kfree(iommu->domain_ids);
1457                 iommu->domain_ids = NULL;
1458                 return -ENOMEM;
1459         }
1460
1461         /*
1462          * if Caching mode is set, then invalid translations are tagged
1463          * with domainid 0. Hence we need to pre-allocate it.
1464          */
1465         if (cap_caching_mode(iommu->cap))
1466                 set_bit(0, iommu->domain_ids);
1467         return 0;
1468 }
1469
1470 static void free_dmar_iommu(struct intel_iommu *iommu)
1471 {
1472         struct dmar_domain *domain;
1473         int i;
1474
1475         if ((iommu->domains) && (iommu->domain_ids)) {
1476                 for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) {
1477                         /*
1478                          * Domain id 0 is reserved for invalid translation
1479                          * if hardware supports caching mode.
1480                          */
1481                         if (cap_caching_mode(iommu->cap) && i == 0)
1482                                 continue;
1483
1484                         domain = iommu->domains[i];
1485                         clear_bit(i, iommu->domain_ids);
1486                         if (domain_detach_iommu(domain, iommu) == 0 &&
1487                             !domain_type_is_vm(domain))
1488                                 domain_exit(domain);
1489                 }
1490         }
1491
1492         if (iommu->gcmd & DMA_GCMD_TE)
1493                 iommu_disable_translation(iommu);
1494
1495         kfree(iommu->domains);
1496         kfree(iommu->domain_ids);
1497         iommu->domains = NULL;
1498         iommu->domain_ids = NULL;
1499
1500         g_iommus[iommu->seq_id] = NULL;
1501
1502         /* free context mapping */
1503         free_context_table(iommu);
1504 }
1505
1506 static struct dmar_domain *alloc_domain(int flags)
1507 {
1508         /* domain id for virtual machine, it won't be set in context */
1509         static atomic_t vm_domid = ATOMIC_INIT(0);
1510         struct dmar_domain *domain;
1511
1512         domain = alloc_domain_mem();
1513         if (!domain)
1514                 return NULL;
1515
1516         memset(domain, 0, sizeof(*domain));
1517         domain->nid = -1;
1518         domain->flags = flags;
1519         spin_lock_init(&domain->iommu_lock);
1520         INIT_LIST_HEAD(&domain->devices);
1521         if (flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
1522                 domain->id = atomic_inc_return(&vm_domid);
1523
1524         return domain;
1525 }
1526
1527 static int __iommu_attach_domain(struct dmar_domain *domain,
1528                                  struct intel_iommu *iommu)
1529 {
1530         int num;
1531         unsigned long ndomains;
1532
1533         ndomains = cap_ndoms(iommu->cap);
1534         num = find_first_zero_bit(iommu->domain_ids, ndomains);
1535         if (num < ndomains) {
1536                 set_bit(num, iommu->domain_ids);
1537                 iommu->domains[num] = domain;
1538         } else {
1539                 num = -ENOSPC;
1540         }
1541
1542         return num;
1543 }
1544
1545 static int iommu_attach_domain(struct dmar_domain *domain,
1546                                struct intel_iommu *iommu)
1547 {
1548         int num;
1549         unsigned long flags;
1550
1551         spin_lock_irqsave(&iommu->lock, flags);
1552         num = __iommu_attach_domain(domain, iommu);
1553         spin_unlock_irqrestore(&iommu->lock, flags);
1554         if (num < 0)
1555                 pr_err("IOMMU: no free domain ids\n");
1556
1557         return num;
1558 }
1559
1560 static int iommu_attach_vm_domain(struct dmar_domain *domain,
1561                                   struct intel_iommu *iommu)
1562 {
1563         int num;
1564         unsigned long ndomains;
1565
1566         ndomains = cap_ndoms(iommu->cap);
1567         for_each_set_bit(num, iommu->domain_ids, ndomains)
1568                 if (iommu->domains[num] == domain)
1569                         return num;
1570
1571         return __iommu_attach_domain(domain, iommu);
1572 }
1573
1574 static void iommu_detach_domain(struct dmar_domain *domain,
1575                                 struct intel_iommu *iommu)
1576 {
1577         unsigned long flags;
1578         int num, ndomains;
1579
1580         spin_lock_irqsave(&iommu->lock, flags);
1581         if (domain_type_is_vm_or_si(domain)) {
1582                 ndomains = cap_ndoms(iommu->cap);
1583                 for_each_set_bit(num, iommu->domain_ids, ndomains) {
1584                         if (iommu->domains[num] == domain) {
1585                                 clear_bit(num, iommu->domain_ids);
1586                                 iommu->domains[num] = NULL;
1587                                 break;
1588                         }
1589                 }
1590         } else {
1591                 clear_bit(domain->id, iommu->domain_ids);
1592                 iommu->domains[domain->id] = NULL;
1593         }
1594         spin_unlock_irqrestore(&iommu->lock, flags);
1595 }
1596
1597 static void domain_attach_iommu(struct dmar_domain *domain,
1598                                struct intel_iommu *iommu)
1599 {
1600         unsigned long flags;
1601
1602         spin_lock_irqsave(&domain->iommu_lock, flags);
1603         if (!test_and_set_bit(iommu->seq_id, domain->iommu_bmp)) {
1604                 domain->iommu_count++;
1605                 if (domain->iommu_count == 1)
1606                         domain->nid = iommu->node;
1607                 domain_update_iommu_cap(domain);
1608         }
1609         spin_unlock_irqrestore(&domain->iommu_lock, flags);
1610 }
1611
1612 static int domain_detach_iommu(struct dmar_domain *domain,
1613                                struct intel_iommu *iommu)
1614 {
1615         unsigned long flags;
1616         int count = INT_MAX;
1617
1618         spin_lock_irqsave(&domain->iommu_lock, flags);
1619         if (test_and_clear_bit(iommu->seq_id, domain->iommu_bmp)) {
1620                 count = --domain->iommu_count;
1621                 domain_update_iommu_cap(domain);
1622         }
1623         spin_unlock_irqrestore(&domain->iommu_lock, flags);
1624
1625         return count;
1626 }
1627
1628 static struct iova_domain reserved_iova_list;
1629 static struct lock_class_key reserved_rbtree_key;
1630
1631 static int dmar_init_reserved_ranges(void)
1632 {
1633         struct pci_dev *pdev = NULL;
1634         struct iova *iova;
1635         int i;
1636
1637         init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
1638
1639         lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1640                 &reserved_rbtree_key);
1641
1642         /* IOAPIC ranges shouldn't be accessed by DMA */
1643         iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1644                 IOVA_PFN(IOAPIC_RANGE_END));
1645         if (!iova) {
1646                 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1647                 return -ENODEV;
1648         }
1649
1650         /* Reserve all PCI MMIO to avoid peer-to-peer access */
1651         for_each_pci_dev(pdev) {
1652                 struct resource *r;
1653
1654                 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1655                         r = &pdev->resource[i];
1656                         if (!r->flags || !(r->flags & IORESOURCE_MEM))
1657                                 continue;
1658                         iova = reserve_iova(&reserved_iova_list,
1659                                             IOVA_PFN(r->start),
1660                                             IOVA_PFN(r->end));
1661                         if (!iova) {
1662                                 printk(KERN_ERR "Reserve iova failed\n");
1663                                 return -ENODEV;
1664                         }
1665                 }
1666         }
1667         return 0;
1668 }
1669
1670 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1671 {
1672         copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1673 }
1674
1675 static inline int guestwidth_to_adjustwidth(int gaw)
1676 {
1677         int agaw;
1678         int r = (gaw - 12) % 9;
1679
1680         if (r == 0)
1681                 agaw = gaw;
1682         else
1683                 agaw = gaw + 9 - r;
1684         if (agaw > 64)
1685                 agaw = 64;
1686         return agaw;
1687 }
1688
1689 static int domain_init(struct dmar_domain *domain, int guest_width)
1690 {
1691         struct intel_iommu *iommu;
1692         int adjust_width, agaw;
1693         unsigned long sagaw;
1694
1695         init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
1696         domain_reserve_special_ranges(domain);
1697
1698         /* calculate AGAW */
1699         iommu = domain_get_iommu(domain);
1700         if (guest_width > cap_mgaw(iommu->cap))
1701                 guest_width = cap_mgaw(iommu->cap);
1702         domain->gaw = guest_width;
1703         adjust_width = guestwidth_to_adjustwidth(guest_width);
1704         agaw = width_to_agaw(adjust_width);
1705         sagaw = cap_sagaw(iommu->cap);
1706         if (!test_bit(agaw, &sagaw)) {
1707                 /* hardware doesn't support it, choose a bigger one */
1708                 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1709                 agaw = find_next_bit(&sagaw, 5, agaw);
1710                 if (agaw >= 5)
1711                         return -ENODEV;
1712         }
1713         domain->agaw = agaw;
1714
1715         if (ecap_coherent(iommu->ecap))
1716                 domain->iommu_coherency = 1;
1717         else
1718                 domain->iommu_coherency = 0;
1719
1720         if (ecap_sc_support(iommu->ecap))
1721                 domain->iommu_snooping = 1;
1722         else
1723                 domain->iommu_snooping = 0;
1724
1725         if (intel_iommu_superpage)
1726                 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1727         else
1728                 domain->iommu_superpage = 0;
1729
1730         domain->nid = iommu->node;
1731
1732         /* always allocate the top pgd */
1733         domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
1734         if (!domain->pgd)
1735                 return -ENOMEM;
1736         __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1737         return 0;
1738 }
1739
1740 static void domain_exit(struct dmar_domain *domain)
1741 {
1742         struct dmar_drhd_unit *drhd;
1743         struct intel_iommu *iommu;
1744         struct page *freelist = NULL;
1745
1746         /* Domain 0 is reserved, so dont process it */
1747         if (!domain)
1748                 return;
1749
1750         /* Flush any lazy unmaps that may reference this domain */
1751         if (!intel_iommu_strict)
1752                 flush_unmaps_timeout(0);
1753
1754         /* remove associated devices */
1755         domain_remove_dev_info(domain);
1756
1757         /* destroy iovas */
1758         put_iova_domain(&domain->iovad);
1759
1760         freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1761
1762         /* clear attached or cached domains */
1763         rcu_read_lock();
1764         for_each_active_iommu(iommu, drhd)
1765                 iommu_detach_domain(domain, iommu);
1766         rcu_read_unlock();
1767
1768         dma_free_pagelist(freelist);
1769
1770         free_domain_mem(domain);
1771 }
1772
1773 static int domain_context_mapping_one(struct dmar_domain *domain,
1774                                       struct intel_iommu *iommu,
1775                                       u8 bus, u8 devfn, int translation)
1776 {
1777         struct context_entry *context;
1778         unsigned long flags;
1779         struct dma_pte *pgd;
1780         int id;
1781         int agaw;
1782         struct device_domain_info *info = NULL;
1783
1784         pr_debug("Set context mapping for %02x:%02x.%d\n",
1785                 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1786
1787         BUG_ON(!domain->pgd);
1788         BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1789                translation != CONTEXT_TT_MULTI_LEVEL);
1790
1791         context = device_to_context_entry(iommu, bus, devfn);
1792         if (!context)
1793                 return -ENOMEM;
1794         spin_lock_irqsave(&iommu->lock, flags);
1795         if (context_present(context)) {
1796                 spin_unlock_irqrestore(&iommu->lock, flags);
1797                 return 0;
1798         }
1799
1800         id = domain->id;
1801         pgd = domain->pgd;
1802
1803         if (domain_type_is_vm_or_si(domain)) {
1804                 if (domain_type_is_vm(domain)) {
1805                         id = iommu_attach_vm_domain(domain, iommu);
1806                         if (id < 0) {
1807                                 spin_unlock_irqrestore(&iommu->lock, flags);
1808                                 pr_err("IOMMU: no free domain ids\n");
1809                                 return -EFAULT;
1810                         }
1811                 }
1812
1813                 /* Skip top levels of page tables for
1814                  * iommu which has less agaw than default.
1815                  * Unnecessary for PT mode.
1816                  */
1817                 if (translation != CONTEXT_TT_PASS_THROUGH) {
1818                         for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1819                                 pgd = phys_to_virt(dma_pte_addr(pgd));
1820                                 if (!dma_pte_present(pgd)) {
1821                                         spin_unlock_irqrestore(&iommu->lock, flags);
1822                                         return -ENOMEM;
1823                                 }
1824                         }
1825                 }
1826         }
1827
1828         context_set_domain_id(context, id);
1829
1830         if (translation != CONTEXT_TT_PASS_THROUGH) {
1831                 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
1832                 translation = info ? CONTEXT_TT_DEV_IOTLB :
1833                                      CONTEXT_TT_MULTI_LEVEL;
1834         }
1835         /*
1836          * In pass through mode, AW must be programmed to indicate the largest
1837          * AGAW value supported by hardware. And ASR is ignored by hardware.
1838          */
1839         if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
1840                 context_set_address_width(context, iommu->msagaw);
1841         else {
1842                 context_set_address_root(context, virt_to_phys(pgd));
1843                 context_set_address_width(context, iommu->agaw);
1844         }
1845
1846         context_set_translation_type(context, translation);
1847         context_set_fault_enable(context);
1848         context_set_present(context);
1849         domain_flush_cache(domain, context, sizeof(*context));
1850
1851         /*
1852          * It's a non-present to present mapping. If hardware doesn't cache
1853          * non-present entry we only need to flush the write-buffer. If the
1854          * _does_ cache non-present entries, then it does so in the special
1855          * domain #0, which we have to flush:
1856          */
1857         if (cap_caching_mode(iommu->cap)) {
1858                 iommu->flush.flush_context(iommu, 0,
1859                                            (((u16)bus) << 8) | devfn,
1860                                            DMA_CCMD_MASK_NOBIT,
1861                                            DMA_CCMD_DEVICE_INVL);
1862                 iommu->flush.flush_iotlb(iommu, id, 0, 0, DMA_TLB_DSI_FLUSH);
1863         } else {
1864                 iommu_flush_write_buffer(iommu);
1865         }
1866         iommu_enable_dev_iotlb(info);
1867         spin_unlock_irqrestore(&iommu->lock, flags);
1868
1869         domain_attach_iommu(domain, iommu);
1870
1871         return 0;
1872 }
1873
1874 struct domain_context_mapping_data {
1875         struct dmar_domain *domain;
1876         struct intel_iommu *iommu;
1877         int translation;
1878 };
1879
1880 static int domain_context_mapping_cb(struct pci_dev *pdev,
1881                                      u16 alias, void *opaque)
1882 {
1883         struct domain_context_mapping_data *data = opaque;
1884
1885         return domain_context_mapping_one(data->domain, data->iommu,
1886                                           PCI_BUS_NUM(alias), alias & 0xff,
1887                                           data->translation);
1888 }
1889
1890 static int
1891 domain_context_mapping(struct dmar_domain *domain, struct device *dev,
1892                        int translation)
1893 {
1894         struct intel_iommu *iommu;
1895         u8 bus, devfn;
1896         struct domain_context_mapping_data data;
1897
1898         iommu = device_to_iommu(dev, &bus, &devfn);
1899         if (!iommu)
1900                 return -ENODEV;
1901
1902         if (!dev_is_pci(dev))
1903                 return domain_context_mapping_one(domain, iommu, bus, devfn,
1904                                                   translation);
1905
1906         data.domain = domain;
1907         data.iommu = iommu;
1908         data.translation = translation;
1909
1910         return pci_for_each_dma_alias(to_pci_dev(dev),
1911                                       &domain_context_mapping_cb, &data);
1912 }
1913
1914 static int domain_context_mapped_cb(struct pci_dev *pdev,
1915                                     u16 alias, void *opaque)
1916 {
1917         struct intel_iommu *iommu = opaque;
1918
1919         return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
1920 }
1921
1922 static int domain_context_mapped(struct device *dev)
1923 {
1924         struct intel_iommu *iommu;
1925         u8 bus, devfn;
1926
1927         iommu = device_to_iommu(dev, &bus, &devfn);
1928         if (!iommu)
1929                 return -ENODEV;
1930
1931         if (!dev_is_pci(dev))
1932                 return device_context_mapped(iommu, bus, devfn);
1933
1934         return !pci_for_each_dma_alias(to_pci_dev(dev),
1935                                        domain_context_mapped_cb, iommu);
1936 }
1937
1938 /* Returns a number of VTD pages, but aligned to MM page size */
1939 static inline unsigned long aligned_nrpages(unsigned long host_addr,
1940                                             size_t size)
1941 {
1942         host_addr &= ~PAGE_MASK;
1943         return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
1944 }
1945
1946 /* Return largest possible superpage level for a given mapping */
1947 static inline int hardware_largepage_caps(struct dmar_domain *domain,
1948                                           unsigned long iov_pfn,
1949                                           unsigned long phy_pfn,
1950                                           unsigned long pages)
1951 {
1952         int support, level = 1;
1953         unsigned long pfnmerge;
1954
1955         support = domain->iommu_superpage;
1956
1957         /* To use a large page, the virtual *and* physical addresses
1958            must be aligned to 2MiB/1GiB/etc. Lower bits set in either
1959            of them will mean we have to use smaller pages. So just
1960            merge them and check both at once. */
1961         pfnmerge = iov_pfn | phy_pfn;
1962
1963         while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
1964                 pages >>= VTD_STRIDE_SHIFT;
1965                 if (!pages)
1966                         break;
1967                 pfnmerge >>= VTD_STRIDE_SHIFT;
1968                 level++;
1969                 support--;
1970         }
1971         return level;
1972 }
1973
1974 static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1975                             struct scatterlist *sg, unsigned long phys_pfn,
1976                             unsigned long nr_pages, int prot)
1977 {
1978         struct dma_pte *first_pte = NULL, *pte = NULL;
1979         phys_addr_t uninitialized_var(pteval);
1980         unsigned long sg_res;
1981         unsigned int largepage_lvl = 0;
1982         unsigned long lvl_pages = 0;
1983
1984         BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
1985
1986         if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1987                 return -EINVAL;
1988
1989         prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
1990
1991         if (sg)
1992                 sg_res = 0;
1993         else {
1994                 sg_res = nr_pages + 1;
1995                 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1996         }
1997
1998         while (nr_pages > 0) {
1999                 uint64_t tmp;
2000
2001                 if (!sg_res) {
2002                         sg_res = aligned_nrpages(sg->offset, sg->length);
2003                         sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
2004                         sg->dma_length = sg->length;
2005                         pteval = page_to_phys(sg_page(sg)) | prot;
2006                         phys_pfn = pteval >> VTD_PAGE_SHIFT;
2007                 }
2008
2009                 if (!pte) {
2010                         largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2011
2012                         first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
2013                         if (!pte)
2014                                 return -ENOMEM;
2015                         /* It is large page*/
2016                         if (largepage_lvl > 1) {
2017                                 pteval |= DMA_PTE_LARGE_PAGE;
2018                                 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2019                                 /*
2020                                  * Ensure that old small page tables are
2021                                  * removed to make room for superpage,
2022                                  * if they exist.
2023                                  */
2024                                 dma_pte_free_pagetable(domain, iov_pfn,
2025                                                        iov_pfn + lvl_pages - 1);
2026                         } else {
2027                                 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
2028                         }
2029
2030                 }
2031                 /* We don't need lock here, nobody else
2032                  * touches the iova range
2033                  */
2034                 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
2035                 if (tmp) {
2036                         static int dumps = 5;
2037                         printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2038                                iov_pfn, tmp, (unsigned long long)pteval);
2039                         if (dumps) {
2040                                 dumps--;
2041                                 debug_dma_dump_mappings(NULL);
2042                         }
2043                         WARN_ON(1);
2044                 }
2045
2046                 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2047
2048                 BUG_ON(nr_pages < lvl_pages);
2049                 BUG_ON(sg_res < lvl_pages);
2050
2051                 nr_pages -= lvl_pages;
2052                 iov_pfn += lvl_pages;
2053                 phys_pfn += lvl_pages;
2054                 pteval += lvl_pages * VTD_PAGE_SIZE;
2055                 sg_res -= lvl_pages;
2056
2057                 /* If the next PTE would be the first in a new page, then we
2058                    need to flush the cache on the entries we've just written.
2059                    And then we'll need to recalculate 'pte', so clear it and
2060                    let it get set again in the if (!pte) block above.
2061
2062                    If we're done (!nr_pages) we need to flush the cache too.
2063
2064                    Also if we've been setting superpages, we may need to
2065                    recalculate 'pte' and switch back to smaller pages for the
2066                    end of the mapping, if the trailing size is not enough to
2067                    use another superpage (i.e. sg_res < lvl_pages). */
2068                 pte++;
2069                 if (!nr_pages || first_pte_in_page(pte) ||
2070                     (largepage_lvl > 1 && sg_res < lvl_pages)) {
2071                         domain_flush_cache(domain, first_pte,
2072                                            (void *)pte - (void *)first_pte);
2073                         pte = NULL;
2074                 }
2075
2076                 if (!sg_res && nr_pages)
2077                         sg = sg_next(sg);
2078         }
2079         return 0;
2080 }
2081
2082 static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2083                                     struct scatterlist *sg, unsigned long nr_pages,
2084                                     int prot)
2085 {
2086         return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
2087 }
2088
2089 static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2090                                      unsigned long phys_pfn, unsigned long nr_pages,
2091                                      int prot)
2092 {
2093         return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
2094 }
2095
2096 static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
2097 {
2098         if (!iommu)
2099                 return;
2100
2101         clear_context_table(iommu, bus, devfn);
2102         iommu->flush.flush_context(iommu, 0, 0, 0,
2103                                            DMA_CCMD_GLOBAL_INVL);
2104         iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2105 }
2106
2107 static inline void unlink_domain_info(struct device_domain_info *info)
2108 {
2109         assert_spin_locked(&device_domain_lock);
2110         list_del(&info->link);
2111         list_del(&info->global);
2112         if (info->dev)
2113                 info->dev->archdata.iommu = NULL;
2114 }
2115
2116 static void domain_remove_dev_info(struct dmar_domain *domain)
2117 {
2118         struct device_domain_info *info, *tmp;
2119         unsigned long flags;
2120
2121         spin_lock_irqsave(&device_domain_lock, flags);
2122         list_for_each_entry_safe(info, tmp, &domain->devices, link) {
2123                 unlink_domain_info(info);
2124                 spin_unlock_irqrestore(&device_domain_lock, flags);
2125
2126                 iommu_disable_dev_iotlb(info);
2127                 iommu_detach_dev(info->iommu, info->bus, info->devfn);
2128
2129                 if (domain_type_is_vm(domain)) {
2130                         iommu_detach_dependent_devices(info->iommu, info->dev);
2131                         domain_detach_iommu(domain, info->iommu);
2132                 }
2133
2134                 free_devinfo_mem(info);
2135                 spin_lock_irqsave(&device_domain_lock, flags);
2136         }
2137         spin_unlock_irqrestore(&device_domain_lock, flags);
2138 }
2139
2140 /*
2141  * find_domain
2142  * Note: we use struct device->archdata.iommu stores the info
2143  */
2144 static struct dmar_domain *find_domain(struct device *dev)
2145 {
2146         struct device_domain_info *info;
2147
2148         /* No lock here, assumes no domain exit in normal case */
2149         info = dev->archdata.iommu;
2150         if (info)
2151                 return info->domain;
2152         return NULL;
2153 }
2154
2155 static inline struct device_domain_info *
2156 dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2157 {
2158         struct device_domain_info *info;
2159
2160         list_for_each_entry(info, &device_domain_list, global)
2161                 if (info->iommu->segment == segment && info->bus == bus &&
2162                     info->devfn == devfn)
2163                         return info;
2164
2165         return NULL;
2166 }
2167
2168 static struct dmar_domain *dmar_insert_dev_info(struct intel_iommu *iommu,
2169                                                 int bus, int devfn,
2170                                                 struct device *dev,
2171                                                 struct dmar_domain *domain)
2172 {
2173         struct dmar_domain *found = NULL;
2174         struct device_domain_info *info;
2175         unsigned long flags;
2176
2177         info = alloc_devinfo_mem();
2178         if (!info)
2179                 return NULL;
2180
2181         info->bus = bus;
2182         info->devfn = devfn;
2183         info->dev = dev;
2184         info->domain = domain;
2185         info->iommu = iommu;
2186
2187         spin_lock_irqsave(&device_domain_lock, flags);
2188         if (dev)
2189                 found = find_domain(dev);
2190         else {
2191                 struct device_domain_info *info2;
2192                 info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
2193                 if (info2)
2194                         found = info2->domain;
2195         }
2196         if (found) {
2197                 spin_unlock_irqrestore(&device_domain_lock, flags);
2198                 free_devinfo_mem(info);
2199                 /* Caller must free the original domain */
2200                 return found;
2201         }
2202
2203         list_add(&info->link, &domain->devices);
2204         list_add(&info->global, &device_domain_list);
2205         if (dev)
2206                 dev->archdata.iommu = info;
2207         spin_unlock_irqrestore(&device_domain_lock, flags);
2208
2209         return domain;
2210 }
2211
2212 static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
2213 {
2214         *(u16 *)opaque = alias;
2215         return 0;
2216 }
2217
2218 /* domain is initialized */
2219 static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
2220 {
2221         struct dmar_domain *domain, *tmp;
2222         struct intel_iommu *iommu;
2223         struct device_domain_info *info;
2224         u16 dma_alias;
2225         unsigned long flags;
2226         u8 bus, devfn;
2227
2228         domain = find_domain(dev);
2229         if (domain)
2230                 return domain;
2231
2232         iommu = device_to_iommu(dev, &bus, &devfn);
2233         if (!iommu)
2234                 return NULL;
2235
2236         if (dev_is_pci(dev)) {
2237                 struct pci_dev *pdev = to_pci_dev(dev);
2238
2239                 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2240
2241                 spin_lock_irqsave(&device_domain_lock, flags);
2242                 info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
2243                                                       PCI_BUS_NUM(dma_alias),
2244                                                       dma_alias & 0xff);
2245                 if (info) {
2246                         iommu = info->iommu;
2247                         domain = info->domain;
2248                 }
2249                 spin_unlock_irqrestore(&device_domain_lock, flags);
2250
2251                 /* DMA alias already has a domain, uses it */
2252                 if (info)
2253                         goto found_domain;
2254         }
2255
2256         /* Allocate and initialize new domain for the device */
2257         domain = alloc_domain(0);
2258         if (!domain)
2259                 return NULL;
2260         domain->id = iommu_attach_domain(domain, iommu);
2261         if (domain->id < 0) {
2262                 free_domain_mem(domain);
2263                 return NULL;
2264         }
2265         domain_attach_iommu(domain, iommu);
2266         if (domain_init(domain, gaw)) {
2267                 domain_exit(domain);
2268                 return NULL;
2269         }
2270
2271         /* register PCI DMA alias device */
2272         if (dev_is_pci(dev)) {
2273                 tmp = dmar_insert_dev_info(iommu, PCI_BUS_NUM(dma_alias),
2274                                            dma_alias & 0xff, NULL, domain);
2275
2276                 if (!tmp || tmp != domain) {
2277                         domain_exit(domain);
2278                         domain = tmp;
2279                 }
2280
2281                 if (!domain)
2282                         return NULL;
2283         }
2284
2285 found_domain:
2286         tmp = dmar_insert_dev_info(iommu, bus, devfn, dev, domain);
2287
2288         if (!tmp || tmp != domain) {
2289                 domain_exit(domain);
2290                 domain = tmp;
2291         }
2292
2293         return domain;
2294 }
2295
2296 static int iommu_identity_mapping;
2297 #define IDENTMAP_ALL            1
2298 #define IDENTMAP_GFX            2
2299 #define IDENTMAP_AZALIA         4
2300
2301 static int iommu_domain_identity_map(struct dmar_domain *domain,
2302                                      unsigned long long start,
2303                                      unsigned long long end)
2304 {
2305         unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2306         unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2307
2308         if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2309                           dma_to_mm_pfn(last_vpfn))) {
2310                 printk(KERN_ERR "IOMMU: reserve iova failed\n");
2311                 return -ENOMEM;
2312         }
2313
2314         pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
2315                  start, end, domain->id);
2316         /*
2317          * RMRR range might have overlap with physical memory range,
2318          * clear it first
2319          */
2320         dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2321
2322         return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2323                                   last_vpfn - first_vpfn + 1,
2324                                   DMA_PTE_READ|DMA_PTE_WRITE);
2325 }
2326
2327 static int iommu_prepare_identity_map(struct device *dev,
2328                                       unsigned long long start,
2329                                       unsigned long long end)
2330 {
2331         struct dmar_domain *domain;
2332         int ret;
2333
2334         domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2335         if (!domain)
2336                 return -ENOMEM;
2337
2338         /* For _hardware_ passthrough, don't bother. But for software
2339            passthrough, we do it anyway -- it may indicate a memory
2340            range which is reserved in E820, so which didn't get set
2341            up to start with in si_domain */
2342         if (domain == si_domain && hw_pass_through) {
2343                 printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2344                        dev_name(dev), start, end);
2345                 return 0;
2346         }
2347
2348         printk(KERN_INFO
2349                "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2350                dev_name(dev), start, end);
2351         
2352         if (end < start) {
2353                 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2354                         "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2355                         dmi_get_system_info(DMI_BIOS_VENDOR),
2356                         dmi_get_system_info(DMI_BIOS_VERSION),
2357                      dmi_get_system_info(DMI_PRODUCT_VERSION));
2358                 ret = -EIO;
2359                 goto error;
2360         }
2361
2362         if (end >> agaw_to_width(domain->agaw)) {
2363                 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2364                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2365                      agaw_to_width(domain->agaw),
2366                      dmi_get_system_info(DMI_BIOS_VENDOR),
2367                      dmi_get_system_info(DMI_BIOS_VERSION),
2368                      dmi_get_system_info(DMI_PRODUCT_VERSION));
2369                 ret = -EIO;
2370                 goto error;
2371         }
2372
2373         ret = iommu_domain_identity_map(domain, start, end);
2374         if (ret)
2375                 goto error;
2376
2377         /* context entry init */
2378         ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL);
2379         if (ret)
2380                 goto error;
2381
2382         return 0;
2383
2384  error:
2385         domain_exit(domain);
2386         return ret;
2387 }
2388
2389 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2390                                          struct device *dev)
2391 {
2392         if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2393                 return 0;
2394         return iommu_prepare_identity_map(dev, rmrr->base_address,
2395                                           rmrr->end_address);
2396 }
2397
2398 #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
2399 static inline void iommu_prepare_isa(void)
2400 {
2401         struct pci_dev *pdev;
2402         int ret;
2403
2404         pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2405         if (!pdev)
2406                 return;
2407
2408         printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
2409         ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1);
2410
2411         if (ret)
2412                 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
2413                        "floppy might not work\n");
2414
2415         pci_dev_put(pdev);
2416 }
2417 #else
2418 static inline void iommu_prepare_isa(void)
2419 {
2420         return;
2421 }
2422 #endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
2423
2424 static int md_domain_init(struct dmar_domain *domain, int guest_width);
2425
2426 static int __init si_domain_init(int hw)
2427 {
2428         struct dmar_drhd_unit *drhd;
2429         struct intel_iommu *iommu;
2430         int nid, ret = 0;
2431         bool first = true;
2432
2433         si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2434         if (!si_domain)
2435                 return -EFAULT;
2436
2437         for_each_active_iommu(iommu, drhd) {
2438                 ret = iommu_attach_domain(si_domain, iommu);
2439                 if (ret < 0) {
2440                         domain_exit(si_domain);
2441                         return -EFAULT;
2442                 } else if (first) {
2443                         si_domain->id = ret;
2444                         first = false;
2445                 } else if (si_domain->id != ret) {
2446                         domain_exit(si_domain);
2447                         return -EFAULT;
2448                 }
2449                 domain_attach_iommu(si_domain, iommu);
2450         }
2451
2452         if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2453                 domain_exit(si_domain);
2454                 return -EFAULT;
2455         }
2456
2457         pr_debug("IOMMU: identity mapping domain is domain %d\n",
2458                  si_domain->id);
2459
2460         if (hw)
2461                 return 0;
2462
2463         for_each_online_node(nid) {
2464                 unsigned long start_pfn, end_pfn;
2465                 int i;
2466
2467                 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2468                         ret = iommu_domain_identity_map(si_domain,
2469                                         PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2470                         if (ret)
2471                                 return ret;
2472                 }
2473         }
2474
2475         return 0;
2476 }
2477
2478 static int identity_mapping(struct device *dev)
2479 {
2480         struct device_domain_info *info;
2481
2482         if (likely(!iommu_identity_mapping))
2483                 return 0;
2484
2485         info = dev->archdata.iommu;
2486         if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2487                 return (info->domain == si_domain);
2488
2489         return 0;
2490 }
2491
2492 static int domain_add_dev_info(struct dmar_domain *domain,
2493                                struct device *dev, int translation)
2494 {
2495         struct dmar_domain *ndomain;
2496         struct intel_iommu *iommu;
2497         u8 bus, devfn;
2498         int ret;
2499
2500         iommu = device_to_iommu(dev, &bus, &devfn);
2501         if (!iommu)
2502                 return -ENODEV;
2503
2504         ndomain = dmar_insert_dev_info(iommu, bus, devfn, dev, domain);
2505         if (ndomain != domain)
2506                 return -EBUSY;
2507
2508         ret = domain_context_mapping(domain, dev, translation);
2509         if (ret) {
2510                 domain_remove_one_dev_info(domain, dev);
2511                 return ret;
2512         }
2513
2514         return 0;
2515 }
2516
2517 static bool device_has_rmrr(struct device *dev)
2518 {
2519         struct dmar_rmrr_unit *rmrr;
2520         struct device *tmp;
2521         int i;
2522
2523         rcu_read_lock();
2524         for_each_rmrr_units(rmrr) {
2525                 /*
2526                  * Return TRUE if this RMRR contains the device that
2527                  * is passed in.
2528                  */
2529                 for_each_active_dev_scope(rmrr->devices,
2530                                           rmrr->devices_cnt, i, tmp)
2531                         if (tmp == dev) {
2532                                 rcu_read_unlock();
2533                                 return true;
2534                         }
2535         }
2536         rcu_read_unlock();
2537         return false;
2538 }
2539
2540 static int iommu_should_identity_map(struct device *dev, int startup)
2541 {
2542
2543         if (dev_is_pci(dev)) {
2544                 struct pci_dev *pdev = to_pci_dev(dev);
2545
2546                 /*
2547                  * We want to prevent any device associated with an RMRR from
2548                  * getting placed into the SI Domain. This is done because
2549                  * problems exist when devices are moved in and out of domains
2550                  * and their respective RMRR info is lost. We exempt USB devices
2551                  * from this process due to their usage of RMRRs that are known
2552                  * to not be needed after BIOS hand-off to OS.
2553                  */
2554                 if (device_has_rmrr(dev) &&
2555                     (pdev->class >> 8) != PCI_CLASS_SERIAL_USB)
2556                         return 0;
2557
2558                 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2559                         return 1;
2560
2561                 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2562                         return 1;
2563
2564                 if (!(iommu_identity_mapping & IDENTMAP_ALL))
2565                         return 0;
2566
2567                 /*
2568                  * We want to start off with all devices in the 1:1 domain, and
2569                  * take them out later if we find they can't access all of memory.
2570                  *
2571                  * However, we can't do this for PCI devices behind bridges,
2572                  * because all PCI devices behind the same bridge will end up
2573                  * with the same source-id on their transactions.
2574                  *
2575                  * Practically speaking, we can't change things around for these
2576                  * devices at run-time, because we can't be sure there'll be no
2577                  * DMA transactions in flight for any of their siblings.
2578                  *
2579                  * So PCI devices (unless they're on the root bus) as well as
2580                  * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2581                  * the 1:1 domain, just in _case_ one of their siblings turns out
2582                  * not to be able to map all of memory.
2583                  */
2584                 if (!pci_is_pcie(pdev)) {
2585                         if (!pci_is_root_bus(pdev->bus))
2586                                 return 0;
2587                         if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2588                                 return 0;
2589                 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
2590                         return 0;
2591         } else {
2592                 if (device_has_rmrr(dev))
2593                         return 0;
2594         }
2595
2596         /*
2597          * At boot time, we don't yet know if devices will be 64-bit capable.
2598          * Assume that they will — if they turn out not to be, then we can
2599          * take them out of the 1:1 domain later.
2600          */
2601         if (!startup) {
2602                 /*
2603                  * If the device's dma_mask is less than the system's memory
2604                  * size then this is not a candidate for identity mapping.
2605                  */
2606                 u64 dma_mask = *dev->dma_mask;
2607
2608                 if (dev->coherent_dma_mask &&
2609                     dev->coherent_dma_mask < dma_mask)
2610                         dma_mask = dev->coherent_dma_mask;
2611
2612                 return dma_mask >= dma_get_required_mask(dev);
2613         }
2614
2615         return 1;
2616 }
2617
2618 static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw)
2619 {
2620         int ret;
2621
2622         if (!iommu_should_identity_map(dev, 1))
2623                 return 0;
2624
2625         ret = domain_add_dev_info(si_domain, dev,
2626                                   hw ? CONTEXT_TT_PASS_THROUGH :
2627                                        CONTEXT_TT_MULTI_LEVEL);
2628         if (!ret)
2629                 pr_info("IOMMU: %s identity mapping for device %s\n",
2630                         hw ? "hardware" : "software", dev_name(dev));
2631         else if (ret == -ENODEV)
2632                 /* device not associated with an iommu */
2633                 ret = 0;
2634
2635         return ret;
2636 }
2637
2638
2639 static int __init iommu_prepare_static_identity_mapping(int hw)
2640 {
2641         struct pci_dev *pdev = NULL;
2642         struct dmar_drhd_unit *drhd;
2643         struct intel_iommu *iommu;
2644         struct device *dev;
2645         int i;
2646         int ret = 0;
2647
2648         ret = si_domain_init(hw);
2649         if (ret)
2650                 return -EFAULT;
2651
2652         for_each_pci_dev(pdev) {
2653                 ret = dev_prepare_static_identity_mapping(&pdev->dev, hw);
2654                 if (ret)
2655                         return ret;
2656         }
2657
2658         for_each_active_iommu(iommu, drhd)
2659                 for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) {
2660                         struct acpi_device_physical_node *pn;
2661                         struct acpi_device *adev;
2662
2663                         if (dev->bus != &acpi_bus_type)
2664                                 continue;
2665                                 
2666                         adev= to_acpi_device(dev);
2667                         mutex_lock(&adev->physical_node_lock);
2668                         list_for_each_entry(pn, &adev->physical_node_list, node) {
2669                                 ret = dev_prepare_static_identity_mapping(pn->dev, hw);
2670                                 if (ret)
2671                                         break;
2672                         }
2673                         mutex_unlock(&adev->physical_node_lock);
2674                         if (ret)
2675                                 return ret;
2676                 }
2677
2678         return 0;
2679 }
2680
2681 static int __init init_dmars(void)
2682 {
2683         struct dmar_drhd_unit *drhd;
2684         struct dmar_rmrr_unit *rmrr;
2685         struct device *dev;
2686         struct intel_iommu *iommu;
2687         int i, ret;
2688
2689         /*
2690          * for each drhd
2691          *    allocate root
2692          *    initialize and program root entry to not present
2693          * endfor
2694          */
2695         for_each_drhd_unit(drhd) {
2696                 /*
2697                  * lock not needed as this is only incremented in the single
2698                  * threaded kernel __init code path all other access are read
2699                  * only
2700                  */
2701                 if (g_num_of_iommus < IOMMU_UNITS_SUPPORTED) {
2702                         g_num_of_iommus++;
2703                         continue;
2704                 }
2705                 printk_once(KERN_ERR "intel-iommu: exceeded %d IOMMUs\n",
2706                           IOMMU_UNITS_SUPPORTED);
2707         }
2708
2709         g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2710                         GFP_KERNEL);
2711         if (!g_iommus) {
2712                 printk(KERN_ERR "Allocating global iommu array failed\n");
2713                 ret = -ENOMEM;
2714                 goto error;
2715         }
2716
2717         deferred_flush = kzalloc(g_num_of_iommus *
2718                 sizeof(struct deferred_flush_tables), GFP_KERNEL);
2719         if (!deferred_flush) {
2720                 ret = -ENOMEM;
2721                 goto free_g_iommus;
2722         }
2723
2724         for_each_active_iommu(iommu, drhd) {
2725                 g_iommus[iommu->seq_id] = iommu;
2726
2727                 ret = iommu_init_domains(iommu);
2728                 if (ret)
2729                         goto free_iommu;
2730
2731                 /*
2732                  * TBD:
2733                  * we could share the same root & context tables
2734                  * among all IOMMU's. Need to Split it later.
2735                  */
2736                 ret = iommu_alloc_root_entry(iommu);
2737                 if (ret) {
2738                         printk(KERN_ERR "IOMMU: allocate root entry failed\n");
2739                         goto free_iommu;
2740                 }
2741                 if (!ecap_pass_through(iommu->ecap))
2742                         hw_pass_through = 0;
2743         }
2744
2745         /*
2746          * Start from the sane iommu hardware state.
2747          */
2748         for_each_active_iommu(iommu, drhd) {
2749                 /*
2750                  * If the queued invalidation is already initialized by us
2751                  * (for example, while enabling interrupt-remapping) then
2752                  * we got the things already rolling from a sane state.
2753                  */
2754                 if (iommu->qi)
2755                         continue;
2756
2757                 /*
2758                  * Clear any previous faults.
2759                  */
2760                 dmar_fault(-1, iommu);
2761                 /*
2762                  * Disable queued invalidation if supported and already enabled
2763                  * before OS handover.
2764                  */
2765                 dmar_disable_qi(iommu);
2766         }
2767
2768         for_each_active_iommu(iommu, drhd) {
2769                 if (dmar_enable_qi(iommu)) {
2770                         /*
2771                          * Queued Invalidate not enabled, use Register Based
2772                          * Invalidate
2773                          */
2774                         iommu->flush.flush_context = __iommu_flush_context;
2775                         iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2776                         printk(KERN_INFO "IOMMU %d 0x%Lx: using Register based "
2777                                "invalidation\n",
2778                                 iommu->seq_id,
2779                                (unsigned long long)drhd->reg_base_addr);
2780                 } else {
2781                         iommu->flush.flush_context = qi_flush_context;
2782                         iommu->flush.flush_iotlb = qi_flush_iotlb;
2783                         printk(KERN_INFO "IOMMU %d 0x%Lx: using Queued "
2784                                "invalidation\n",
2785                                 iommu->seq_id,
2786                                (unsigned long long)drhd->reg_base_addr);
2787                 }
2788         }
2789
2790         if (iommu_pass_through)
2791                 iommu_identity_mapping |= IDENTMAP_ALL;
2792
2793 #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
2794         iommu_identity_mapping |= IDENTMAP_GFX;
2795 #endif
2796
2797         check_tylersburg_isoch();
2798
2799         /*
2800          * If pass through is not set or not enabled, setup context entries for
2801          * identity mappings for rmrr, gfx, and isa and may fall back to static
2802          * identity mapping if iommu_identity_mapping is set.
2803          */
2804         if (iommu_identity_mapping) {
2805                 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
2806                 if (ret) {
2807                         printk(KERN_CRIT "Failed to setup IOMMU pass-through\n");
2808                         goto free_iommu;
2809                 }
2810         }
2811         /*
2812          * For each rmrr
2813          *   for each dev attached to rmrr
2814          *   do
2815          *     locate drhd for dev, alloc domain for dev
2816          *     allocate free domain
2817          *     allocate page table entries for rmrr
2818          *     if context not allocated for bus
2819          *           allocate and init context
2820          *           set present in root table for this bus
2821          *     init context with domain, translation etc
2822          *    endfor
2823          * endfor
2824          */
2825         printk(KERN_INFO "IOMMU: Setting RMRR:\n");
2826         for_each_rmrr_units(rmrr) {
2827                 /* some BIOS lists non-exist devices in DMAR table. */
2828                 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
2829                                           i, dev) {
2830                         ret = iommu_prepare_rmrr_dev(rmrr, dev);
2831                         if (ret)
2832                                 printk(KERN_ERR
2833                                        "IOMMU: mapping reserved region failed\n");
2834                 }
2835         }
2836
2837         iommu_prepare_isa();
2838
2839         /*
2840          * for each drhd
2841          *   enable fault log
2842          *   global invalidate context cache
2843          *   global invalidate iotlb
2844          *   enable translation
2845          */
2846         for_each_iommu(iommu, drhd) {
2847                 if (drhd->ignored) {
2848                         /*
2849                          * we always have to disable PMRs or DMA may fail on
2850                          * this device
2851                          */
2852                         if (force_on)
2853                                 iommu_disable_protect_mem_regions(iommu);
2854                         continue;
2855                 }
2856
2857                 iommu_flush_write_buffer(iommu);
2858
2859                 ret = dmar_set_interrupt(iommu);
2860                 if (ret)
2861                         goto free_iommu;
2862
2863                 iommu_set_root_entry(iommu);
2864
2865                 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
2866                 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2867                 iommu_enable_translation(iommu);
2868                 iommu_disable_protect_mem_regions(iommu);
2869         }
2870
2871         return 0;
2872
2873 free_iommu:
2874         for_each_active_iommu(iommu, drhd)
2875                 free_dmar_iommu(iommu);
2876         kfree(deferred_flush);
2877 free_g_iommus:
2878         kfree(g_iommus);
2879 error:
2880         return ret;
2881 }
2882
2883 /* This takes a number of _MM_ pages, not VTD pages */
2884 static struct iova *intel_alloc_iova(struct device *dev,
2885                                      struct dmar_domain *domain,
2886                                      unsigned long nrpages, uint64_t dma_mask)
2887 {
2888         struct iova *iova = NULL;
2889
2890         /* Restrict dma_mask to the width that the iommu can handle */
2891         dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2892
2893         if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
2894                 /*
2895                  * First try to allocate an io virtual address in
2896                  * DMA_BIT_MASK(32) and if that fails then try allocating
2897                  * from higher range
2898                  */
2899                 iova = alloc_iova(&domain->iovad, nrpages,
2900                                   IOVA_PFN(DMA_BIT_MASK(32)), 1);
2901                 if (iova)
2902                         return iova;
2903         }
2904         iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2905         if (unlikely(!iova)) {
2906                 printk(KERN_ERR "Allocating %ld-page iova for %s failed",
2907                        nrpages, dev_name(dev));
2908                 return NULL;
2909         }
2910
2911         return iova;
2912 }
2913
2914 static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
2915 {
2916         struct dmar_domain *domain;
2917         int ret;
2918
2919         domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2920         if (!domain) {
2921                 printk(KERN_ERR "Allocating domain for %s failed",
2922                        dev_name(dev));
2923                 return NULL;
2924         }
2925
2926         /* make sure context mapping is ok */
2927         if (unlikely(!domain_context_mapped(dev))) {
2928                 ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL);
2929                 if (ret) {
2930                         printk(KERN_ERR "Domain context map for %s failed",
2931                                dev_name(dev));
2932                         return NULL;
2933                 }
2934         }
2935
2936         return domain;
2937 }
2938
2939 static inline struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
2940 {
2941         struct device_domain_info *info;
2942
2943         /* No lock here, assumes no domain exit in normal case */
2944         info = dev->archdata.iommu;
2945         if (likely(info))
2946                 return info->domain;
2947
2948         return __get_valid_domain_for_dev(dev);
2949 }
2950
2951 static int iommu_dummy(struct device *dev)
2952 {
2953         return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
2954 }
2955
2956 /* Check if the dev needs to go through non-identity map and unmap process.*/
2957 static int iommu_no_mapping(struct device *dev)
2958 {
2959         int found;
2960
2961         if (iommu_dummy(dev))
2962                 return 1;
2963
2964         if (!iommu_identity_mapping)
2965                 return 0;
2966
2967         found = identity_mapping(dev);
2968         if (found) {
2969                 if (iommu_should_identity_map(dev, 0))
2970                         return 1;
2971                 else {
2972                         /*
2973                          * 32 bit DMA is removed from si_domain and fall back
2974                          * to non-identity mapping.
2975                          */
2976                         domain_remove_one_dev_info(si_domain, dev);
2977                         printk(KERN_INFO "32bit %s uses non-identity mapping\n",
2978                                dev_name(dev));
2979                         return 0;
2980                 }
2981         } else {
2982                 /*
2983                  * In case of a detached 64 bit DMA device from vm, the device
2984                  * is put into si_domain for identity mapping.
2985                  */
2986                 if (iommu_should_identity_map(dev, 0)) {
2987                         int ret;
2988                         ret = domain_add_dev_info(si_domain, dev,
2989                                                   hw_pass_through ?
2990                                                   CONTEXT_TT_PASS_THROUGH :
2991                                                   CONTEXT_TT_MULTI_LEVEL);
2992                         if (!ret) {
2993                                 printk(KERN_INFO "64bit %s uses identity mapping\n",
2994                                        dev_name(dev));
2995                                 return 1;
2996                         }
2997                 }
2998         }
2999
3000         return 0;
3001 }
3002
3003 static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
3004                                      size_t size, int dir, u64 dma_mask)
3005 {
3006         struct dmar_domain *domain;
3007         phys_addr_t start_paddr;
3008         struct iova *iova;
3009         int prot = 0;
3010         int ret;
3011         struct intel_iommu *iommu;
3012         unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
3013
3014         BUG_ON(dir == DMA_NONE);
3015
3016         if (iommu_no_mapping(dev))
3017                 return paddr;
3018
3019         domain = get_valid_domain_for_dev(dev);
3020         if (!domain)
3021                 return 0;
3022
3023         iommu = domain_get_iommu(domain);
3024         size = aligned_nrpages(paddr, size);
3025
3026         iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
3027         if (!iova)
3028                 goto error;
3029
3030         /*
3031          * Check if DMAR supports zero-length reads on write only
3032          * mappings..
3033          */
3034         if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3035                         !cap_zlr(iommu->cap))
3036                 prot |= DMA_PTE_READ;
3037         if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3038                 prot |= DMA_PTE_WRITE;
3039         /*
3040          * paddr - (paddr + size) might be partial page, we should map the whole
3041          * page.  Note: if two part of one page are separately mapped, we
3042          * might have two guest_addr mapping to the same host paddr, but this
3043          * is not a big problem
3044          */
3045         ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
3046                                  mm_to_dma_pfn(paddr_pfn), size, prot);
3047         if (ret)
3048                 goto error;
3049
3050         /* it's a non-present to present mapping. Only flush if caching mode */
3051         if (cap_caching_mode(iommu->cap))
3052                 iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 0, 1);
3053         else
3054                 iommu_flush_write_buffer(iommu);
3055
3056         start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
3057         start_paddr += paddr & ~PAGE_MASK;
3058         return start_paddr;
3059
3060 error:
3061         if (iova)
3062                 __free_iova(&domain->iovad, iova);
3063         printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n",
3064                 dev_name(dev), size, (unsigned long long)paddr, dir);
3065         return 0;
3066 }
3067
3068 static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3069                                  unsigned long offset, size_t size,
3070                                  enum dma_data_direction dir,
3071                                  struct dma_attrs *attrs)
3072 {
3073         return __intel_map_single(dev, page_to_phys(page) + offset, size,
3074                                   dir, *dev->dma_mask);
3075 }
3076
3077 static void flush_unmaps(void)
3078 {
3079         int i, j;
3080
3081         timer_on = 0;
3082
3083         /* just flush them all */
3084         for (i = 0; i < g_num_of_iommus; i++) {
3085                 struct intel_iommu *iommu = g_iommus[i];
3086                 if (!iommu)
3087                         continue;
3088
3089                 if (!deferred_flush[i].next)
3090                         continue;
3091
3092                 /* In caching mode, global flushes turn emulation expensive */
3093                 if (!cap_caching_mode(iommu->cap))
3094                         iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3095                                          DMA_TLB_GLOBAL_FLUSH);
3096                 for (j = 0; j < deferred_flush[i].next; j++) {
3097                         unsigned long mask;
3098                         struct iova *iova = deferred_flush[i].iova[j];
3099                         struct dmar_domain *domain = deferred_flush[i].domain[j];
3100
3101                         /* On real hardware multiple invalidations are expensive */
3102                         if (cap_caching_mode(iommu->cap))
3103                                 iommu_flush_iotlb_psi(iommu, domain->id,
3104                                         iova->pfn_lo, iova_size(iova),
3105                                         !deferred_flush[i].freelist[j], 0);
3106                         else {
3107                                 mask = ilog2(mm_to_dma_pfn(iova_size(iova)));
3108                                 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
3109                                                 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
3110                         }
3111                         __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
3112                         if (deferred_flush[i].freelist[j])
3113                                 dma_free_pagelist(deferred_flush[i].freelist[j]);
3114                 }
3115                 deferred_flush[i].next = 0;
3116         }
3117
3118         list_size = 0;
3119 }
3120
3121 static void flush_unmaps_timeout(unsigned long data)
3122 {
3123         unsigned long flags;
3124
3125         spin_lock_irqsave(&async_umap_flush_lock, flags);
3126         flush_unmaps();
3127         spin_unlock_irqrestore(&async_umap_flush_lock, flags);
3128 }
3129
3130 static void add_unmap(struct dmar_domain *dom, struct iova *iova, struct page *freelist)
3131 {
3132         unsigned long flags;
3133         int next, iommu_id;
3134         struct intel_iommu *iommu;
3135
3136         spin_lock_irqsave(&async_umap_flush_lock, flags);
3137         if (list_size == HIGH_WATER_MARK)
3138                 flush_unmaps();
3139
3140         iommu = domain_get_iommu(dom);
3141         iommu_id = iommu->seq_id;
3142
3143         next = deferred_flush[iommu_id].next;
3144         deferred_flush[iommu_id].domain[next] = dom;
3145         deferred_flush[iommu_id].iova[next] = iova;
3146         deferred_flush[iommu_id].freelist[next] = freelist;
3147         deferred_flush[iommu_id].next++;
3148
3149         if (!timer_on) {
3150                 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
3151                 timer_on = 1;
3152         }
3153         list_size++;
3154         spin_unlock_irqrestore(&async_umap_flush_lock, flags);
3155 }
3156
3157 static void intel_unmap(struct device *dev, dma_addr_t dev_addr)
3158 {
3159         struct dmar_domain *domain;
3160         unsigned long start_pfn, last_pfn;
3161         struct iova *iova;
3162         struct intel_iommu *iommu;
3163         struct page *freelist;
3164
3165         if (iommu_no_mapping(dev))
3166                 return;
3167
3168         domain = find_domain(dev);
3169         BUG_ON(!domain);
3170
3171         iommu = domain_get_iommu(domain);
3172
3173         iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
3174         if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
3175                       (unsigned long long)dev_addr))
3176                 return;
3177
3178         start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3179         last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
3180
3181         pr_debug("Device %s unmapping: pfn %lx-%lx\n",
3182                  dev_name(dev), start_pfn, last_pfn);
3183
3184         freelist = domain_unmap(domain, start_pfn, last_pfn);
3185
3186         if (intel_iommu_strict) {
3187                 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
3188                                       last_pfn - start_pfn + 1, !freelist, 0);
3189                 /* free iova */
3190                 __free_iova(&domain->iovad, iova);
3191                 dma_free_pagelist(freelist);
3192         } else {
3193                 add_unmap(domain, iova, freelist);
3194                 /*
3195                  * queue up the release of the unmap to save the 1/6th of the
3196                  * cpu used up by the iotlb flush operation...
3197                  */
3198         }
3199 }
3200
3201 static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3202                              size_t size, enum dma_data_direction dir,
3203                              struct dma_attrs *attrs)
3204 {
3205         intel_unmap(dev, dev_addr);
3206 }
3207
3208 static void *intel_alloc_coherent(struct device *dev, size_t size,
3209                                   dma_addr_t *dma_handle, gfp_t flags,
3210                                   struct dma_attrs *attrs)
3211 {
3212         struct page *page = NULL;
3213         int order;
3214
3215         size = PAGE_ALIGN(size);
3216         order = get_order(size);
3217
3218         if (!iommu_no_mapping(dev))
3219                 flags &= ~(GFP_DMA | GFP_DMA32);
3220         else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) {
3221                 if (dev->coherent_dma_mask < DMA_BIT_MASK(32))
3222                         flags |= GFP_DMA;
3223                 else
3224                         flags |= GFP_DMA32;
3225         }
3226
3227         if (flags & __GFP_WAIT) {
3228                 unsigned int count = size >> PAGE_SHIFT;
3229
3230                 page = dma_alloc_from_contiguous(dev, count, order);
3231                 if (page && iommu_no_mapping(dev) &&
3232                     page_to_phys(page) + size > dev->coherent_dma_mask) {
3233                         dma_release_from_contiguous(dev, page, count);
3234                         page = NULL;
3235                 }
3236         }
3237
3238         if (!page)
3239                 page = alloc_pages(flags, order);
3240         if (!page)
3241                 return NULL;
3242         memset(page_address(page), 0, size);
3243
3244         *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
3245                                          DMA_BIDIRECTIONAL,
3246                                          dev->coherent_dma_mask);
3247         if (*dma_handle)
3248                 return page_address(page);
3249         if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3250                 __free_pages(page, order);
3251
3252         return NULL;
3253 }
3254
3255 static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
3256                                 dma_addr_t dma_handle, struct dma_attrs *attrs)
3257 {
3258         int order;
3259         struct page *page = virt_to_page(vaddr);
3260
3261         size = PAGE_ALIGN(size);
3262         order = get_order(size);
3263
3264         intel_unmap(dev, dma_handle);
3265         if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3266                 __free_pages(page, order);
3267 }
3268
3269 static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
3270                            int nelems, enum dma_data_direction dir,
3271                            struct dma_attrs *attrs)
3272 {
3273         intel_unmap(dev, sglist[0].dma_address);
3274 }
3275
3276 static int intel_nontranslate_map_sg(struct device *hddev,
3277         struct scatterlist *sglist, int nelems, int dir)
3278 {
3279         int i;
3280         struct scatterlist *sg;
3281
3282         for_each_sg(sglist, sg, nelems, i) {
3283                 BUG_ON(!sg_page(sg));
3284                 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
3285                 sg->dma_length = sg->length;
3286         }
3287         return nelems;
3288 }
3289
3290 static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
3291                         enum dma_data_direction dir, struct dma_attrs *attrs)
3292 {
3293         int i;
3294         struct dmar_domain *domain;
3295         size_t size = 0;
3296         int prot = 0;
3297         struct iova *iova = NULL;
3298         int ret;
3299         struct scatterlist *sg;
3300         unsigned long start_vpfn;
3301         struct intel_iommu *iommu;
3302
3303         BUG_ON(dir == DMA_NONE);
3304         if (iommu_no_mapping(dev))
3305                 return intel_nontranslate_map_sg(dev, sglist, nelems, dir);
3306
3307         domain = get_valid_domain_for_dev(dev);
3308         if (!domain)
3309                 return 0;
3310
3311         iommu = domain_get_iommu(domain);
3312
3313         for_each_sg(sglist, sg, nelems, i)
3314                 size += aligned_nrpages(sg->offset, sg->length);
3315
3316         iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
3317                                 *dev->dma_mask);
3318         if (!iova) {
3319                 sglist->dma_length = 0;
3320                 return 0;
3321         }
3322
3323         /*
3324          * Check if DMAR supports zero-length reads on write only
3325          * mappings..
3326          */
3327         if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3328                         !cap_zlr(iommu->cap))
3329                 prot |= DMA_PTE_READ;
3330         if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3331                 prot |= DMA_PTE_WRITE;
3332
3333         start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
3334
3335         ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
3336         if (unlikely(ret)) {
3337                 dma_pte_free_pagetable(domain, start_vpfn,
3338                                        start_vpfn + size - 1);
3339                 __free_iova(&domain->iovad, iova);
3340                 return 0;
3341         }
3342
3343         /* it's a non-present to present mapping. Only flush if caching mode */
3344         if (cap_caching_mode(iommu->cap))
3345                 iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 0, 1);
3346         else
3347                 iommu_flush_write_buffer(iommu);
3348
3349         return nelems;
3350 }
3351
3352 static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3353 {
3354         return !dma_addr;
3355 }
3356
3357 struct dma_map_ops intel_dma_ops = {
3358         .alloc = intel_alloc_coherent,
3359         .free = intel_free_coherent,
3360         .map_sg = intel_map_sg,
3361         .unmap_sg = intel_unmap_sg,
3362         .map_page = intel_map_page,
3363         .unmap_page = intel_unmap_page,
3364         .mapping_error = intel_mapping_error,
3365 };
3366
3367 static inline int iommu_domain_cache_init(void)
3368 {
3369         int ret = 0;
3370
3371         iommu_domain_cache = kmem_cache_create("iommu_domain",
3372                                          sizeof(struct dmar_domain),
3373                                          0,
3374                                          SLAB_HWCACHE_ALIGN,
3375
3376                                          NULL);
3377         if (!iommu_domain_cache) {
3378                 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
3379                 ret = -ENOMEM;
3380         }
3381
3382         return ret;
3383 }
3384
3385 static inline int iommu_devinfo_cache_init(void)
3386 {
3387         int ret = 0;
3388
3389         iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3390                                          sizeof(struct device_domain_info),
3391                                          0,
3392                                          SLAB_HWCACHE_ALIGN,
3393                                          NULL);
3394         if (!iommu_devinfo_cache) {
3395                 printk(KERN_ERR "Couldn't create devinfo cache\n");
3396                 ret = -ENOMEM;
3397         }
3398
3399         return ret;
3400 }
3401
3402 static inline int iommu_iova_cache_init(void)
3403 {
3404         int ret = 0;
3405
3406         iommu_iova_cache = kmem_cache_create("iommu_iova",
3407                                          sizeof(struct iova),
3408                                          0,
3409                                          SLAB_HWCACHE_ALIGN,
3410                                          NULL);
3411         if (!iommu_iova_cache) {
3412                 printk(KERN_ERR "Couldn't create iova cache\n");
3413                 ret = -ENOMEM;
3414         }
3415
3416         return ret;
3417 }
3418
3419 static int __init iommu_init_mempool(void)
3420 {
3421         int ret;
3422         ret = iommu_iova_cache_init();
3423         if (ret)
3424                 return ret;
3425
3426         ret = iommu_domain_cache_init();
3427         if (ret)
3428                 goto domain_error;
3429
3430         ret = iommu_devinfo_cache_init();
3431         if (!ret)
3432                 return ret;
3433
3434         kmem_cache_destroy(iommu_domain_cache);
3435 domain_error:
3436         kmem_cache_destroy(iommu_iova_cache);
3437
3438         return -ENOMEM;
3439 }
3440
3441 static void __init iommu_exit_mempool(void)
3442 {
3443         kmem_cache_destroy(iommu_devinfo_cache);
3444         kmem_cache_destroy(iommu_domain_cache);
3445         kmem_cache_destroy(iommu_iova_cache);
3446
3447 }
3448
3449 static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3450 {
3451         struct dmar_drhd_unit *drhd;
3452         u32 vtbar;
3453         int rc;
3454
3455         /* We know that this device on this chipset has its own IOMMU.
3456          * If we find it under a different IOMMU, then the BIOS is lying
3457          * to us. Hope that the IOMMU for this device is actually
3458          * disabled, and it needs no translation...
3459          */
3460         rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3461         if (rc) {
3462                 /* "can't" happen */
3463                 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3464                 return;
3465         }
3466         vtbar &= 0xffff0000;
3467
3468         /* we know that the this iommu should be at offset 0xa000 from vtbar */
3469         drhd = dmar_find_matched_drhd_unit(pdev);
3470         if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3471                             TAINT_FIRMWARE_WORKAROUND,
3472                             "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3473                 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3474 }
3475 DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3476
3477 static void __init init_no_remapping_devices(void)
3478 {
3479         struct dmar_drhd_unit *drhd;
3480         struct device *dev;
3481         int i;
3482
3483         for_each_drhd_unit(drhd) {
3484                 if (!drhd->include_all) {
3485                         for_each_active_dev_scope(drhd->devices,
3486                                                   drhd->devices_cnt, i, dev)
3487                                 break;
3488                         /* ignore DMAR unit if no devices exist */
3489                         if (i == drhd->devices_cnt)
3490                                 drhd->ignored = 1;
3491                 }
3492         }
3493
3494         for_each_active_drhd_unit(drhd) {
3495                 if (drhd->include_all)
3496                         continue;
3497
3498                 for_each_active_dev_scope(drhd->devices,
3499                                           drhd->devices_cnt, i, dev)
3500                         if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
3501                                 break;
3502                 if (i < drhd->devices_cnt)
3503                         continue;
3504
3505                 /* This IOMMU has *only* gfx devices. Either bypass it or
3506                    set the gfx_mapped flag, as appropriate */
3507                 if (dmar_map_gfx) {
3508                         intel_iommu_gfx_mapped = 1;
3509                 } else {
3510                         drhd->ignored = 1;
3511                         for_each_active_dev_scope(drhd->devices,
3512                                                   drhd->devices_cnt, i, dev)
3513                                 dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3514                 }
3515         }
3516 }
3517
3518 #ifdef CONFIG_SUSPEND
3519 static int init_iommu_hw(void)
3520 {
3521         struct dmar_drhd_unit *drhd;
3522         struct intel_iommu *iommu = NULL;
3523
3524         for_each_active_iommu(iommu, drhd)
3525                 if (iommu->qi)
3526                         dmar_reenable_qi(iommu);
3527
3528         for_each_iommu(iommu, drhd) {
3529                 if (drhd->ignored) {
3530                         /*
3531                          * we always have to disable PMRs or DMA may fail on
3532                          * this device
3533                          */
3534                         if (force_on)
3535                                 iommu_disable_protect_mem_regions(iommu);
3536                         continue;
3537                 }
3538         
3539                 iommu_flush_write_buffer(iommu);
3540
3541                 iommu_set_root_entry(iommu);
3542
3543                 iommu->flush.flush_context(iommu, 0, 0, 0,
3544                                            DMA_CCMD_GLOBAL_INVL);
3545                 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3546                 iommu_enable_translation(iommu);
3547                 iommu_disable_protect_mem_regions(iommu);
3548         }
3549
3550         return 0;
3551 }
3552
3553 static void iommu_flush_all(void)
3554 {
3555         struct dmar_drhd_unit *drhd;
3556         struct intel_iommu *iommu;
3557
3558         for_each_active_iommu(iommu, drhd) {
3559                 iommu->flush.flush_context(iommu, 0, 0, 0,
3560                                            DMA_CCMD_GLOBAL_INVL);
3561                 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3562                                          DMA_TLB_GLOBAL_FLUSH);
3563         }
3564 }
3565
3566 static int iommu_suspend(void)
3567 {
3568         struct dmar_drhd_unit *drhd;
3569         struct intel_iommu *iommu = NULL;
3570         unsigned long flag;
3571
3572         for_each_active_iommu(iommu, drhd) {
3573                 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3574                                                  GFP_ATOMIC);
3575                 if (!iommu->iommu_state)
3576                         goto nomem;
3577         }
3578
3579         iommu_flush_all();
3580
3581         for_each_active_iommu(iommu, drhd) {
3582                 iommu_disable_translation(iommu);
3583
3584                 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3585
3586                 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3587                         readl(iommu->reg + DMAR_FECTL_REG);
3588                 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3589                         readl(iommu->reg + DMAR_FEDATA_REG);
3590                 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3591                         readl(iommu->reg + DMAR_FEADDR_REG);
3592                 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3593                         readl(iommu->reg + DMAR_FEUADDR_REG);
3594
3595                 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3596         }
3597         return 0;
3598
3599 nomem:
3600         for_each_active_iommu(iommu, drhd)
3601                 kfree(iommu->iommu_state);
3602
3603         return -ENOMEM;
3604 }
3605
3606 static void iommu_resume(void)
3607 {
3608         struct dmar_drhd_unit *drhd;
3609         struct intel_iommu *iommu = NULL;
3610         unsigned long flag;
3611
3612         if (init_iommu_hw()) {
3613                 if (force_on)
3614                         panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3615                 else
3616                         WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
3617                 return;
3618         }
3619
3620         for_each_active_iommu(iommu, drhd) {
3621
3622                 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3623
3624                 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3625                         iommu->reg + DMAR_FECTL_REG);
3626                 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3627                         iommu->reg + DMAR_FEDATA_REG);
3628                 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3629                         iommu->reg + DMAR_FEADDR_REG);
3630                 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3631                         iommu->reg + DMAR_FEUADDR_REG);
3632
3633                 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3634         }
3635
3636         for_each_active_iommu(iommu, drhd)
3637                 kfree(iommu->iommu_state);
3638 }
3639
3640 static struct syscore_ops iommu_syscore_ops = {
3641         .resume         = iommu_resume,
3642         .suspend        = iommu_suspend,
3643 };
3644
3645 static void __init init_iommu_pm_ops(void)
3646 {
3647         register_syscore_ops(&iommu_syscore_ops);
3648 }
3649
3650 #else
3651 static inline void init_iommu_pm_ops(void) {}
3652 #endif  /* CONFIG_PM */
3653
3654
3655 int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header)
3656 {
3657         struct acpi_dmar_reserved_memory *rmrr;
3658         struct dmar_rmrr_unit *rmrru;
3659
3660         rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
3661         if (!rmrru)
3662                 return -ENOMEM;
3663
3664         rmrru->hdr = header;
3665         rmrr = (struct acpi_dmar_reserved_memory *)header;
3666         rmrru->base_address = rmrr->base_address;
3667         rmrru->end_address = rmrr->end_address;
3668         rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
3669                                 ((void *)rmrr) + rmrr->header.length,
3670                                 &rmrru->devices_cnt);
3671         if (rmrru->devices_cnt && rmrru->devices == NULL) {
3672                 kfree(rmrru);
3673                 return -ENOMEM;
3674         }
3675
3676         list_add(&rmrru->list, &dmar_rmrr_units);
3677
3678         return 0;
3679 }
3680
3681 int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
3682 {
3683         struct acpi_dmar_atsr *atsr;
3684         struct dmar_atsr_unit *atsru;
3685
3686         atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3687         atsru = kzalloc(sizeof(*atsru), GFP_KERNEL);
3688         if (!atsru)
3689                 return -ENOMEM;
3690
3691         atsru->hdr = hdr;
3692         atsru->include_all = atsr->flags & 0x1;
3693         if (!atsru->include_all) {
3694                 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
3695                                 (void *)atsr + atsr->header.length,
3696                                 &atsru->devices_cnt);
3697                 if (atsru->devices_cnt && atsru->devices == NULL) {
3698                         kfree(atsru);
3699                         return -ENOMEM;
3700                 }
3701         }
3702
3703         list_add_rcu(&atsru->list, &dmar_atsr_units);
3704
3705         return 0;
3706 }
3707
3708 static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
3709 {
3710         dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
3711         kfree(atsru);
3712 }
3713
3714 static void intel_iommu_free_dmars(void)
3715 {
3716         struct dmar_rmrr_unit *rmrru, *rmrr_n;
3717         struct dmar_atsr_unit *atsru, *atsr_n;
3718
3719         list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
3720                 list_del(&rmrru->list);
3721                 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
3722                 kfree(rmrru);
3723         }
3724
3725         list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
3726                 list_del(&atsru->list);
3727                 intel_iommu_free_atsr(atsru);
3728         }
3729 }
3730
3731 int dmar_find_matched_atsr_unit(struct pci_dev *dev)
3732 {
3733         int i, ret = 1;
3734         struct pci_bus *bus;
3735         struct pci_dev *bridge = NULL;
3736         struct device *tmp;
3737         struct acpi_dmar_atsr *atsr;
3738         struct dmar_atsr_unit *atsru;
3739
3740         dev = pci_physfn(dev);
3741         for (bus = dev->bus; bus; bus = bus->parent) {
3742                 bridge = bus->self;
3743                 if (!bridge || !pci_is_pcie(bridge) ||
3744                     pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
3745                         return 0;
3746                 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
3747                         break;
3748         }
3749         if (!bridge)
3750                 return 0;
3751
3752         rcu_read_lock();
3753         list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
3754                 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3755                 if (atsr->segment != pci_domain_nr(dev->bus))
3756                         continue;
3757
3758                 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
3759                         if (tmp == &bridge->dev)
3760                                 goto out;
3761
3762                 if (atsru->include_all)
3763                         goto out;
3764         }
3765         ret = 0;
3766 out:
3767         rcu_read_unlock();
3768
3769         return ret;
3770 }
3771
3772 int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
3773 {
3774         int ret = 0;
3775         struct dmar_rmrr_unit *rmrru;
3776         struct dmar_atsr_unit *atsru;
3777         struct acpi_dmar_atsr *atsr;
3778         struct acpi_dmar_reserved_memory *rmrr;
3779
3780         if (!intel_iommu_enabled && system_state != SYSTEM_BOOTING)
3781                 return 0;
3782
3783         list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
3784                 rmrr = container_of(rmrru->hdr,
3785                                     struct acpi_dmar_reserved_memory, header);
3786                 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
3787                         ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
3788                                 ((void *)rmrr) + rmrr->header.length,
3789                                 rmrr->segment, rmrru->devices,
3790                                 rmrru->devices_cnt);
3791                         if(ret < 0)
3792                                 return ret;
3793                 } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
3794                         dmar_remove_dev_scope(info, rmrr->segment,
3795                                 rmrru->devices, rmrru->devices_cnt);
3796                 }
3797         }
3798
3799         list_for_each_entry(atsru, &dmar_atsr_units, list) {
3800                 if (atsru->include_all)
3801                         continue;
3802
3803                 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3804                 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
3805                         ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
3806                                         (void *)atsr + atsr->header.length,
3807                                         atsr->segment, atsru->devices,
3808                                         atsru->devices_cnt);
3809                         if (ret > 0)
3810                                 break;
3811                         else if(ret < 0)
3812                                 return ret;
3813                 } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
3814                         if (dmar_remove_dev_scope(info, atsr->segment,
3815                                         atsru->devices, atsru->devices_cnt))
3816                                 break;
3817                 }
3818         }
3819
3820         return 0;
3821 }
3822
3823 /*
3824  * Here we only respond to action of unbound device from driver.
3825  *
3826  * Added device is not attached to its DMAR domain here yet. That will happen
3827  * when mapping the device to iova.
3828  */
3829 static int device_notifier(struct notifier_block *nb,
3830                                   unsigned long action, void *data)
3831 {
3832         struct device *dev = data;
3833         struct dmar_domain *domain;
3834
3835         if (iommu_dummy(dev))
3836                 return 0;
3837
3838         if (action != BUS_NOTIFY_UNBOUND_DRIVER &&
3839             action != BUS_NOTIFY_DEL_DEVICE)
3840                 return 0;
3841
3842         domain = find_domain(dev);
3843         if (!domain)
3844                 return 0;
3845
3846         down_read(&dmar_global_lock);
3847         domain_remove_one_dev_info(domain, dev);
3848         if (!domain_type_is_vm_or_si(domain) && list_empty(&domain->devices))
3849                 domain_exit(domain);
3850         up_read(&dmar_global_lock);
3851
3852         return 0;
3853 }
3854
3855 static struct notifier_block device_nb = {
3856         .notifier_call = device_notifier,
3857 };
3858
3859 static int intel_iommu_memory_notifier(struct notifier_block *nb,
3860                                        unsigned long val, void *v)
3861 {
3862         struct memory_notify *mhp = v;
3863         unsigned long long start, end;
3864         unsigned long start_vpfn, last_vpfn;
3865
3866         switch (val) {
3867         case MEM_GOING_ONLINE:
3868                 start = mhp->start_pfn << PAGE_SHIFT;
3869                 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
3870                 if (iommu_domain_identity_map(si_domain, start, end)) {
3871                         pr_warn("dmar: failed to build identity map for [%llx-%llx]\n",
3872                                 start, end);
3873                         return NOTIFY_BAD;
3874                 }
3875                 break;
3876
3877         case MEM_OFFLINE:
3878         case MEM_CANCEL_ONLINE:
3879                 start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
3880                 last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
3881                 while (start_vpfn <= last_vpfn) {
3882                         struct iova *iova;
3883                         struct dmar_drhd_unit *drhd;
3884                         struct intel_iommu *iommu;
3885                         struct page *freelist;
3886
3887                         iova = find_iova(&si_domain->iovad, start_vpfn);
3888                         if (iova == NULL) {
3889                                 pr_debug("dmar: failed get IOVA for PFN %lx\n",
3890                                          start_vpfn);
3891                                 break;
3892                         }
3893
3894                         iova = split_and_remove_iova(&si_domain->iovad, iova,
3895                                                      start_vpfn, last_vpfn);
3896                         if (iova == NULL) {
3897                                 pr_warn("dmar: failed to split IOVA PFN [%lx-%lx]\n",
3898                                         start_vpfn, last_vpfn);
3899                                 return NOTIFY_BAD;
3900                         }
3901
3902                         freelist = domain_unmap(si_domain, iova->pfn_lo,
3903                                                iova->pfn_hi);
3904
3905                         rcu_read_lock();
3906                         for_each_active_iommu(iommu, drhd)
3907                                 iommu_flush_iotlb_psi(iommu, si_domain->id,
3908                                         iova->pfn_lo, iova_size(iova),
3909                                         !freelist, 0);
3910                         rcu_read_unlock();
3911                         dma_free_pagelist(freelist);
3912
3913                         start_vpfn = iova->pfn_hi + 1;
3914                         free_iova_mem(iova);
3915                 }
3916                 break;
3917         }
3918
3919         return NOTIFY_OK;
3920 }
3921
3922 static struct notifier_block intel_iommu_memory_nb = {
3923         .notifier_call = intel_iommu_memory_notifier,
3924         .priority = 0
3925 };
3926
3927
3928 static ssize_t intel_iommu_show_version(struct device *dev,
3929                                         struct device_attribute *attr,
3930                                         char *buf)
3931 {
3932         struct intel_iommu *iommu = dev_get_drvdata(dev);
3933         u32 ver = readl(iommu->reg + DMAR_VER_REG);
3934         return sprintf(buf, "%d:%d\n",
3935                        DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
3936 }
3937 static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
3938
3939 static ssize_t intel_iommu_show_address(struct device *dev,
3940                                         struct device_attribute *attr,
3941                                         char *buf)
3942 {
3943         struct intel_iommu *iommu = dev_get_drvdata(dev);
3944         return sprintf(buf, "%llx\n", iommu->reg_phys);
3945 }
3946 static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
3947
3948 static ssize_t intel_iommu_show_cap(struct device *dev,
3949                                     struct device_attribute *attr,
3950                                     char *buf)
3951 {
3952         struct intel_iommu *iommu = dev_get_drvdata(dev);
3953         return sprintf(buf, "%llx\n", iommu->cap);
3954 }
3955 static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
3956
3957 static ssize_t intel_iommu_show_ecap(struct device *dev,
3958                                     struct device_attribute *attr,
3959                                     char *buf)
3960 {
3961         struct intel_iommu *iommu = dev_get_drvdata(dev);
3962         return sprintf(buf, "%llx\n", iommu->ecap);
3963 }
3964 static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
3965
3966 static struct attribute *intel_iommu_attrs[] = {
3967         &dev_attr_version.attr,
3968         &dev_attr_address.attr,
3969         &dev_attr_cap.attr,
3970         &dev_attr_ecap.attr,
3971         NULL,
3972 };
3973
3974 static struct attribute_group intel_iommu_group = {
3975         .name = "intel-iommu",
3976         .attrs = intel_iommu_attrs,
3977 };
3978
3979 const struct attribute_group *intel_iommu_groups[] = {
3980         &intel_iommu_group,
3981         NULL,
3982 };
3983
3984 int __init intel_iommu_init(void)
3985 {
3986         int ret = -ENODEV;
3987         struct dmar_drhd_unit *drhd;
3988         struct intel_iommu *iommu;
3989
3990         /* VT-d is required for a TXT/tboot launch, so enforce that */
3991         force_on = tboot_force_iommu();
3992
3993         if (iommu_init_mempool()) {
3994                 if (force_on)
3995                         panic("tboot: Failed to initialize iommu memory\n");
3996                 return -ENOMEM;
3997         }
3998
3999         down_write(&dmar_global_lock);
4000         if (dmar_table_init()) {
4001                 if (force_on)
4002                         panic("tboot: Failed to initialize DMAR table\n");
4003                 goto out_free_dmar;
4004         }
4005
4006         /*
4007          * Disable translation if already enabled prior to OS handover.
4008          */
4009         for_each_active_iommu(iommu, drhd)
4010                 if (iommu->gcmd & DMA_GCMD_TE)
4011                         iommu_disable_translation(iommu);
4012
4013         if (dmar_dev_scope_init() < 0) {
4014                 if (force_on)
4015                         panic("tboot: Failed to initialize DMAR device scope\n");
4016                 goto out_free_dmar;
4017         }
4018
4019         if (no_iommu || dmar_disabled)
4020                 goto out_free_dmar;
4021
4022         if (list_empty(&dmar_rmrr_units))
4023                 printk(KERN_INFO "DMAR: No RMRR found\n");
4024
4025         if (list_empty(&dmar_atsr_units))
4026                 printk(KERN_INFO "DMAR: No ATSR found\n");
4027
4028         if (dmar_init_reserved_ranges()) {
4029                 if (force_on)
4030                         panic("tboot: Failed to reserve iommu ranges\n");
4031                 goto out_free_reserved_range;
4032         }
4033
4034         init_no_remapping_devices();
4035
4036         ret = init_dmars();
4037         if (ret) {
4038                 if (force_on)
4039                         panic("tboot: Failed to initialize DMARs\n");
4040                 printk(KERN_ERR "IOMMU: dmar init failed\n");
4041                 goto out_free_reserved_range;
4042         }
4043         up_write(&dmar_global_lock);
4044         printk(KERN_INFO
4045         "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
4046
4047         init_timer(&unmap_timer);
4048 #ifdef CONFIG_SWIOTLB
4049         swiotlb = 0;
4050 #endif
4051         dma_ops = &intel_dma_ops;
4052
4053         init_iommu_pm_ops();
4054
4055         for_each_active_iommu(iommu, drhd)
4056                 iommu->iommu_dev = iommu_device_create(NULL, iommu,
4057                                                        intel_iommu_groups,
4058                                                        iommu->name);
4059
4060         bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
4061         bus_register_notifier(&pci_bus_type, &device_nb);
4062         if (si_domain && !hw_pass_through)
4063                 register_memory_notifier(&intel_iommu_memory_nb);
4064
4065         intel_iommu_enabled = 1;
4066
4067         return 0;
4068
4069 out_free_reserved_range:
4070         put_iova_domain(&reserved_iova_list);
4071 out_free_dmar:
4072         intel_iommu_free_dmars();
4073         up_write(&dmar_global_lock);
4074         iommu_exit_mempool();
4075         return ret;
4076 }
4077
4078 static int iommu_detach_dev_cb(struct pci_dev *pdev, u16 alias, void *opaque)
4079 {
4080         struct intel_iommu *iommu = opaque;
4081
4082         iommu_detach_dev(iommu, PCI_BUS_NUM(alias), alias & 0xff);
4083         return 0;
4084 }
4085
4086 /*
4087  * NB - intel-iommu lacks any sort of reference counting for the users of
4088  * dependent devices.  If multiple endpoints have intersecting dependent
4089  * devices, unbinding the driver from any one of them will possibly leave
4090  * the others unable to operate.
4091  */
4092 static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
4093                                            struct device *dev)
4094 {
4095         if (!iommu || !dev || !dev_is_pci(dev))
4096                 return;
4097
4098         pci_for_each_dma_alias(to_pci_dev(dev), &iommu_detach_dev_cb, iommu);
4099 }
4100
4101 static void domain_remove_one_dev_info(struct dmar_domain *domain,
4102                                        struct device *dev)
4103 {
4104         struct device_domain_info *info, *tmp;
4105         struct intel_iommu *iommu;
4106         unsigned long flags;
4107         int found = 0;
4108         u8 bus, devfn;
4109
4110         iommu = device_to_iommu(dev, &bus, &devfn);
4111         if (!iommu)
4112                 return;
4113
4114         spin_lock_irqsave(&device_domain_lock, flags);
4115         list_for_each_entry_safe(info, tmp, &domain->devices, link) {
4116                 if (info->iommu == iommu && info->bus == bus &&
4117                     info->devfn == devfn) {
4118                         unlink_domain_info(info);
4119                         spin_unlock_irqrestore(&device_domain_lock, flags);
4120
4121                         iommu_disable_dev_iotlb(info);
4122                         iommu_detach_dev(iommu, info->bus, info->devfn);
4123                         iommu_detach_dependent_devices(iommu, dev);
4124                         free_devinfo_mem(info);
4125
4126                         spin_lock_irqsave(&device_domain_lock, flags);
4127
4128                         if (found)
4129                                 break;
4130                         else
4131                                 continue;
4132                 }
4133
4134                 /* if there is no other devices under the same iommu
4135                  * owned by this domain, clear this iommu in iommu_bmp
4136                  * update iommu count and coherency
4137                  */
4138                 if (info->iommu == iommu)
4139                         found = 1;
4140         }
4141
4142         spin_unlock_irqrestore(&device_domain_lock, flags);
4143
4144         if (found == 0) {
4145                 domain_detach_iommu(domain, iommu);
4146                 if (!domain_type_is_vm_or_si(domain))
4147                         iommu_detach_domain(domain, iommu);
4148         }
4149 }
4150
4151 static int md_domain_init(struct dmar_domain *domain, int guest_width)
4152 {
4153         int adjust_width;
4154
4155         init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
4156         domain_reserve_special_ranges(domain);
4157
4158         /* calculate AGAW */
4159         domain->gaw = guest_width;
4160         adjust_width = guestwidth_to_adjustwidth(guest_width);
4161         domain->agaw = width_to_agaw(adjust_width);
4162
4163         domain->iommu_coherency = 0;
4164         domain->iommu_snooping = 0;
4165         domain->iommu_superpage = 0;
4166         domain->max_addr = 0;
4167
4168         /* always allocate the top pgd */
4169         domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
4170         if (!domain->pgd)
4171                 return -ENOMEM;
4172         domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
4173         return 0;
4174 }
4175
4176 static int intel_iommu_domain_init(struct iommu_domain *domain)
4177 {
4178         struct dmar_domain *dmar_domain;
4179
4180         dmar_domain = alloc_domain(DOMAIN_FLAG_VIRTUAL_MACHINE);
4181         if (!dmar_domain) {
4182                 printk(KERN_ERR
4183                         "intel_iommu_domain_init: dmar_domain == NULL\n");
4184                 return -ENOMEM;
4185         }
4186         if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
4187                 printk(KERN_ERR
4188                         "intel_iommu_domain_init() failed\n");
4189                 domain_exit(dmar_domain);
4190                 return -ENOMEM;
4191         }
4192         domain_update_iommu_cap(dmar_domain);
4193         domain->priv = dmar_domain;
4194
4195         domain->geometry.aperture_start = 0;
4196         domain->geometry.aperture_end   = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
4197         domain->geometry.force_aperture = true;
4198
4199         return 0;
4200 }
4201
4202 static void intel_iommu_domain_destroy(struct iommu_domain *domain)
4203 {
4204         struct dmar_domain *dmar_domain = domain->priv;
4205
4206         domain->priv = NULL;
4207         domain_exit(dmar_domain);
4208 }
4209
4210 static int intel_iommu_attach_device(struct iommu_domain *domain,
4211                                      struct device *dev)
4212 {
4213         struct dmar_domain *dmar_domain = domain->priv;
4214         struct intel_iommu *iommu;
4215         int addr_width;
4216         u8 bus, devfn;
4217
4218         /* normally dev is not mapped */
4219         if (unlikely(domain_context_mapped(dev))) {
4220                 struct dmar_domain *old_domain;
4221
4222                 old_domain = find_domain(dev);
4223                 if (old_domain) {
4224                         if (domain_type_is_vm_or_si(dmar_domain))
4225                                 domain_remove_one_dev_info(old_domain, dev);
4226                         else
4227                                 domain_remove_dev_info(old_domain);
4228                 }
4229         }
4230
4231         iommu = device_to_iommu(dev, &bus, &devfn);
4232         if (!iommu)
4233                 return -ENODEV;
4234
4235         /* check if this iommu agaw is sufficient for max mapped address */
4236         addr_width = agaw_to_width(iommu->agaw);
4237         if (addr_width > cap_mgaw(iommu->cap))
4238                 addr_width = cap_mgaw(iommu->cap);
4239
4240         if (dmar_domain->max_addr > (1LL << addr_width)) {
4241                 printk(KERN_ERR "%s: iommu width (%d) is not "
4242                        "sufficient for the mapped address (%llx)\n",
4243                        __func__, addr_width, dmar_domain->max_addr);
4244                 return -EFAULT;
4245         }
4246         dmar_domain->gaw = addr_width;
4247
4248         /*
4249          * Knock out extra levels of page tables if necessary
4250          */
4251         while (iommu->agaw < dmar_domain->agaw) {
4252                 struct dma_pte *pte;
4253
4254                 pte = dmar_domain->pgd;
4255                 if (dma_pte_present(pte)) {
4256                         dmar_domain->pgd = (struct dma_pte *)
4257                                 phys_to_virt(dma_pte_addr(pte));
4258                         free_pgtable_page(pte);
4259                 }
4260                 dmar_domain->agaw--;
4261         }
4262
4263         return domain_add_dev_info(dmar_domain, dev, CONTEXT_TT_MULTI_LEVEL);
4264 }
4265
4266 static void intel_iommu_detach_device(struct iommu_domain *domain,
4267                                       struct device *dev)
4268 {
4269         struct dmar_domain *dmar_domain = domain->priv;
4270
4271         domain_remove_one_dev_info(dmar_domain, dev);
4272 }
4273
4274 static int intel_iommu_map(struct iommu_domain *domain,
4275                            unsigned long iova, phys_addr_t hpa,
4276                            size_t size, int iommu_prot)
4277 {
4278         struct dmar_domain *dmar_domain = domain->priv;
4279         u64 max_addr;
4280         int prot = 0;
4281         int ret;
4282
4283         if (iommu_prot & IOMMU_READ)
4284                 prot |= DMA_PTE_READ;
4285         if (iommu_prot & IOMMU_WRITE)
4286                 prot |= DMA_PTE_WRITE;
4287         if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
4288                 prot |= DMA_PTE_SNP;
4289
4290         max_addr = iova + size;
4291         if (dmar_domain->max_addr < max_addr) {
4292                 u64 end;
4293
4294                 /* check if minimum agaw is sufficient for mapped address */
4295                 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
4296                 if (end < max_addr) {
4297                         printk(KERN_ERR "%s: iommu width (%d) is not "
4298                                "sufficient for the mapped address (%llx)\n",
4299                                __func__, dmar_domain->gaw, max_addr);
4300                         return -EFAULT;
4301                 }
4302                 dmar_domain->max_addr = max_addr;
4303         }
4304         /* Round up size to next multiple of PAGE_SIZE, if it and
4305            the low bits of hpa would take us onto the next page */
4306         size = aligned_nrpages(hpa, size);
4307         ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
4308                                  hpa >> VTD_PAGE_SHIFT, size, prot);
4309         return ret;
4310 }
4311
4312 static size_t intel_iommu_unmap(struct iommu_domain *domain,
4313                                 unsigned long iova, size_t size)
4314 {
4315         struct dmar_domain *dmar_domain = domain->priv;
4316         struct page *freelist = NULL;
4317         struct intel_iommu *iommu;
4318         unsigned long start_pfn, last_pfn;
4319         unsigned int npages;
4320         int iommu_id, num, ndomains, level = 0;
4321
4322         /* Cope with horrid API which requires us to unmap more than the
4323            size argument if it happens to be a large-page mapping. */
4324         if (!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level))
4325                 BUG();
4326
4327         if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
4328                 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
4329
4330         start_pfn = iova >> VTD_PAGE_SHIFT;
4331         last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
4332
4333         freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
4334
4335         npages = last_pfn - start_pfn + 1;
4336
4337         for_each_set_bit(iommu_id, dmar_domain->iommu_bmp, g_num_of_iommus) {
4338                iommu = g_iommus[iommu_id];
4339
4340                /*
4341                 * find bit position of dmar_domain
4342                 */
4343                ndomains = cap_ndoms(iommu->cap);
4344                for_each_set_bit(num, iommu->domain_ids, ndomains) {
4345                        if (iommu->domains[num] == dmar_domain)
4346                                iommu_flush_iotlb_psi(iommu, num, start_pfn,
4347                                                      npages, !freelist, 0);
4348                }
4349
4350         }
4351
4352         dma_free_pagelist(freelist);
4353
4354         if (dmar_domain->max_addr == iova + size)
4355                 dmar_domain->max_addr = iova;
4356
4357         return size;
4358 }
4359
4360 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
4361                                             dma_addr_t iova)
4362 {
4363         struct dmar_domain *dmar_domain = domain->priv;
4364         struct dma_pte *pte;
4365         int level = 0;
4366         u64 phys = 0;
4367
4368         pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
4369         if (pte)
4370                 phys = dma_pte_addr(pte);
4371
4372         return phys;
4373 }
4374
4375 static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
4376                                       unsigned long cap)
4377 {
4378         struct dmar_domain *dmar_domain = domain->priv;
4379
4380         if (cap == IOMMU_CAP_CACHE_COHERENCY)
4381                 return dmar_domain->iommu_snooping;
4382         if (cap == IOMMU_CAP_INTR_REMAP)
4383                 return irq_remapping_enabled;
4384
4385         return 0;
4386 }
4387
4388 static int intel_iommu_add_device(struct device *dev)
4389 {
4390         struct intel_iommu *iommu;
4391         struct iommu_group *group;
4392         u8 bus, devfn;
4393
4394         iommu = device_to_iommu(dev, &bus, &devfn);
4395         if (!iommu)
4396                 return -ENODEV;
4397
4398         iommu_device_link(iommu->iommu_dev, dev);
4399
4400         group = iommu_group_get_for_dev(dev);
4401
4402         if (IS_ERR(group))
4403                 return PTR_ERR(group);
4404
4405         iommu_group_put(group);
4406         return 0;
4407 }
4408
4409 static void intel_iommu_remove_device(struct device *dev)
4410 {
4411         struct intel_iommu *iommu;
4412         u8 bus, devfn;
4413
4414         iommu = device_to_iommu(dev, &bus, &devfn);
4415         if (!iommu)
4416                 return;
4417
4418         iommu_group_remove_device(dev);
4419
4420         iommu_device_unlink(iommu->iommu_dev, dev);
4421 }
4422
4423 static const struct iommu_ops intel_iommu_ops = {
4424         .domain_init    = intel_iommu_domain_init,
4425         .domain_destroy = intel_iommu_domain_destroy,
4426         .attach_dev     = intel_iommu_attach_device,
4427         .detach_dev     = intel_iommu_detach_device,
4428         .map            = intel_iommu_map,
4429         .unmap          = intel_iommu_unmap,
4430         .iova_to_phys   = intel_iommu_iova_to_phys,
4431         .domain_has_cap = intel_iommu_domain_has_cap,
4432         .add_device     = intel_iommu_add_device,
4433         .remove_device  = intel_iommu_remove_device,
4434         .pgsize_bitmap  = INTEL_IOMMU_PGSIZES,
4435 };
4436
4437 static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
4438 {
4439         /* G4x/GM45 integrated gfx dmar support is totally busted. */
4440         printk(KERN_INFO "DMAR: Disabling IOMMU for graphics on this chipset\n");
4441         dmar_map_gfx = 0;
4442 }
4443
4444 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
4445 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
4446 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
4447 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
4448 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
4449 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
4450 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
4451
4452 static void quirk_iommu_rwbf(struct pci_dev *dev)
4453 {
4454         /*
4455          * Mobile 4 Series Chipset neglects to set RWBF capability,
4456          * but needs it. Same seems to hold for the desktop versions.
4457          */
4458         printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
4459         rwbf_quirk = 1;
4460 }
4461
4462 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
4463 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
4464 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
4465 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
4466 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
4467 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
4468 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
4469
4470 #define GGC 0x52
4471 #define GGC_MEMORY_SIZE_MASK    (0xf << 8)
4472 #define GGC_MEMORY_SIZE_NONE    (0x0 << 8)
4473 #define GGC_MEMORY_SIZE_1M      (0x1 << 8)
4474 #define GGC_MEMORY_SIZE_2M      (0x3 << 8)
4475 #define GGC_MEMORY_VT_ENABLED   (0x8 << 8)
4476 #define GGC_MEMORY_SIZE_2M_VT   (0x9 << 8)
4477 #define GGC_MEMORY_SIZE_3M_VT   (0xa << 8)
4478 #define GGC_MEMORY_SIZE_4M_VT   (0xb << 8)
4479
4480 static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
4481 {
4482         unsigned short ggc;
4483
4484         if (pci_read_config_word(dev, GGC, &ggc))
4485                 return;
4486
4487         if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
4488                 printk(KERN_INFO "DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
4489                 dmar_map_gfx = 0;
4490         } else if (dmar_map_gfx) {
4491                 /* we have to ensure the gfx device is idle before we flush */
4492                 printk(KERN_INFO "DMAR: Disabling batched IOTLB flush on Ironlake\n");
4493                 intel_iommu_strict = 1;
4494        }
4495 }
4496 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
4497 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
4498 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
4499 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
4500
4501 /* On Tylersburg chipsets, some BIOSes have been known to enable the
4502    ISOCH DMAR unit for the Azalia sound device, but not give it any
4503    TLB entries, which causes it to deadlock. Check for that.  We do
4504    this in a function called from init_dmars(), instead of in a PCI
4505    quirk, because we don't want to print the obnoxious "BIOS broken"
4506    message if VT-d is actually disabled.
4507 */
4508 static void __init check_tylersburg_isoch(void)
4509 {
4510         struct pci_dev *pdev;
4511         uint32_t vtisochctrl;
4512
4513         /* If there's no Azalia in the system anyway, forget it. */
4514         pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
4515         if (!pdev)
4516                 return;
4517         pci_dev_put(pdev);
4518
4519         /* System Management Registers. Might be hidden, in which case
4520            we can't do the sanity check. But that's OK, because the
4521            known-broken BIOSes _don't_ actually hide it, so far. */
4522         pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
4523         if (!pdev)
4524                 return;
4525
4526         if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
4527                 pci_dev_put(pdev);
4528                 return;
4529         }
4530
4531         pci_dev_put(pdev);
4532
4533         /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
4534         if (vtisochctrl & 1)
4535                 return;
4536
4537         /* Drop all bits other than the number of TLB entries */
4538         vtisochctrl &= 0x1c;
4539
4540         /* If we have the recommended number of TLB entries (16), fine. */
4541         if (vtisochctrl == 0x10)
4542                 return;
4543
4544         /* Zero TLB entries? You get to ride the short bus to school. */
4545         if (!vtisochctrl) {
4546                 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4547                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4548                      dmi_get_system_info(DMI_BIOS_VENDOR),
4549                      dmi_get_system_info(DMI_BIOS_VERSION),
4550                      dmi_get_system_info(DMI_PRODUCT_VERSION));
4551                 iommu_identity_mapping |= IDENTMAP_AZALIA;
4552                 return;
4553         }
4554         
4555         printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
4556                vtisochctrl);
4557 }