UPSTREAM: PCI: rockchip: cleanup bit definition for PCIE_RC_CONFIG_LCS
[firefly-linux-kernel-4.4.55.git] / drivers / iommu / intel-iommu.c
1 /*
2  * Copyright © 2006-2014 Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * Authors: David Woodhouse <dwmw2@infradead.org>,
14  *          Ashok Raj <ashok.raj@intel.com>,
15  *          Shaohua Li <shaohua.li@intel.com>,
16  *          Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
17  *          Fenghua Yu <fenghua.yu@intel.com>
18  *          Joerg Roedel <jroedel@suse.de>
19  */
20
21 #define pr_fmt(fmt)     "DMAR: " fmt
22
23 #include <linux/init.h>
24 #include <linux/bitmap.h>
25 #include <linux/debugfs.h>
26 #include <linux/export.h>
27 #include <linux/slab.h>
28 #include <linux/irq.h>
29 #include <linux/interrupt.h>
30 #include <linux/spinlock.h>
31 #include <linux/pci.h>
32 #include <linux/dmar.h>
33 #include <linux/dma-mapping.h>
34 #include <linux/mempool.h>
35 #include <linux/memory.h>
36 #include <linux/timer.h>
37 #include <linux/io.h>
38 #include <linux/iova.h>
39 #include <linux/iommu.h>
40 #include <linux/intel-iommu.h>
41 #include <linux/syscore_ops.h>
42 #include <linux/tboot.h>
43 #include <linux/dmi.h>
44 #include <linux/pci-ats.h>
45 #include <linux/memblock.h>
46 #include <linux/dma-contiguous.h>
47 #include <linux/crash_dump.h>
48 #include <asm/irq_remapping.h>
49 #include <asm/cacheflush.h>
50 #include <asm/iommu.h>
51
52 #include "irq_remapping.h"
53
54 #define ROOT_SIZE               VTD_PAGE_SIZE
55 #define CONTEXT_SIZE            VTD_PAGE_SIZE
56
57 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
58 #define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
59 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
60 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
61
62 #define IOAPIC_RANGE_START      (0xfee00000)
63 #define IOAPIC_RANGE_END        (0xfeefffff)
64 #define IOVA_START_ADDR         (0x1000)
65
66 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
67
68 #define MAX_AGAW_WIDTH 64
69 #define MAX_AGAW_PFN_WIDTH      (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
70
71 #define __DOMAIN_MAX_PFN(gaw)  ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
72 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
73
74 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
75    to match. That way, we can use 'unsigned long' for PFNs with impunity. */
76 #define DOMAIN_MAX_PFN(gaw)     ((unsigned long) min_t(uint64_t, \
77                                 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
78 #define DOMAIN_MAX_ADDR(gaw)    (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
79
80 /* IO virtual address start page frame number */
81 #define IOVA_START_PFN          (1)
82
83 #define IOVA_PFN(addr)          ((addr) >> PAGE_SHIFT)
84 #define DMA_32BIT_PFN           IOVA_PFN(DMA_BIT_MASK(32))
85 #define DMA_64BIT_PFN           IOVA_PFN(DMA_BIT_MASK(64))
86
87 /* page table handling */
88 #define LEVEL_STRIDE            (9)
89 #define LEVEL_MASK              (((u64)1 << LEVEL_STRIDE) - 1)
90
91 /*
92  * This bitmap is used to advertise the page sizes our hardware support
93  * to the IOMMU core, which will then use this information to split
94  * physically contiguous memory regions it is mapping into page sizes
95  * that we support.
96  *
97  * Traditionally the IOMMU core just handed us the mappings directly,
98  * after making sure the size is an order of a 4KiB page and that the
99  * mapping has natural alignment.
100  *
101  * To retain this behavior, we currently advertise that we support
102  * all page sizes that are an order of 4KiB.
103  *
104  * If at some point we'd like to utilize the IOMMU core's new behavior,
105  * we could change this to advertise the real page sizes we support.
106  */
107 #define INTEL_IOMMU_PGSIZES     (~0xFFFUL)
108
109 static inline int agaw_to_level(int agaw)
110 {
111         return agaw + 2;
112 }
113
114 static inline int agaw_to_width(int agaw)
115 {
116         return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
117 }
118
119 static inline int width_to_agaw(int width)
120 {
121         return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
122 }
123
124 static inline unsigned int level_to_offset_bits(int level)
125 {
126         return (level - 1) * LEVEL_STRIDE;
127 }
128
129 static inline int pfn_level_offset(unsigned long pfn, int level)
130 {
131         return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
132 }
133
134 static inline unsigned long level_mask(int level)
135 {
136         return -1UL << level_to_offset_bits(level);
137 }
138
139 static inline unsigned long level_size(int level)
140 {
141         return 1UL << level_to_offset_bits(level);
142 }
143
144 static inline unsigned long align_to_level(unsigned long pfn, int level)
145 {
146         return (pfn + level_size(level) - 1) & level_mask(level);
147 }
148
149 static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
150 {
151         return  1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
152 }
153
154 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
155    are never going to work. */
156 static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
157 {
158         return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
159 }
160
161 static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
162 {
163         return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
164 }
165 static inline unsigned long page_to_dma_pfn(struct page *pg)
166 {
167         return mm_to_dma_pfn(page_to_pfn(pg));
168 }
169 static inline unsigned long virt_to_dma_pfn(void *p)
170 {
171         return page_to_dma_pfn(virt_to_page(p));
172 }
173
174 /* global iommu list, set NULL for ignored DMAR units */
175 static struct intel_iommu **g_iommus;
176
177 static void __init check_tylersburg_isoch(void);
178 static int rwbf_quirk;
179
180 /*
181  * set to 1 to panic kernel if can't successfully enable VT-d
182  * (used when kernel is launched w/ TXT)
183  */
184 static int force_on = 0;
185
186 /*
187  * 0: Present
188  * 1-11: Reserved
189  * 12-63: Context Ptr (12 - (haw-1))
190  * 64-127: Reserved
191  */
192 struct root_entry {
193         u64     lo;
194         u64     hi;
195 };
196 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
197
198 /*
199  * Take a root_entry and return the Lower Context Table Pointer (LCTP)
200  * if marked present.
201  */
202 static phys_addr_t root_entry_lctp(struct root_entry *re)
203 {
204         if (!(re->lo & 1))
205                 return 0;
206
207         return re->lo & VTD_PAGE_MASK;
208 }
209
210 /*
211  * Take a root_entry and return the Upper Context Table Pointer (UCTP)
212  * if marked present.
213  */
214 static phys_addr_t root_entry_uctp(struct root_entry *re)
215 {
216         if (!(re->hi & 1))
217                 return 0;
218
219         return re->hi & VTD_PAGE_MASK;
220 }
221 /*
222  * low 64 bits:
223  * 0: present
224  * 1: fault processing disable
225  * 2-3: translation type
226  * 12-63: address space root
227  * high 64 bits:
228  * 0-2: address width
229  * 3-6: aval
230  * 8-23: domain id
231  */
232 struct context_entry {
233         u64 lo;
234         u64 hi;
235 };
236
237 static inline void context_clear_pasid_enable(struct context_entry *context)
238 {
239         context->lo &= ~(1ULL << 11);
240 }
241
242 static inline bool context_pasid_enabled(struct context_entry *context)
243 {
244         return !!(context->lo & (1ULL << 11));
245 }
246
247 static inline void context_set_copied(struct context_entry *context)
248 {
249         context->hi |= (1ull << 3);
250 }
251
252 static inline bool context_copied(struct context_entry *context)
253 {
254         return !!(context->hi & (1ULL << 3));
255 }
256
257 static inline bool __context_present(struct context_entry *context)
258 {
259         return (context->lo & 1);
260 }
261
262 static inline bool context_present(struct context_entry *context)
263 {
264         return context_pasid_enabled(context) ?
265              __context_present(context) :
266              __context_present(context) && !context_copied(context);
267 }
268
269 static inline void context_set_present(struct context_entry *context)
270 {
271         context->lo |= 1;
272 }
273
274 static inline void context_set_fault_enable(struct context_entry *context)
275 {
276         context->lo &= (((u64)-1) << 2) | 1;
277 }
278
279 static inline void context_set_translation_type(struct context_entry *context,
280                                                 unsigned long value)
281 {
282         context->lo &= (((u64)-1) << 4) | 3;
283         context->lo |= (value & 3) << 2;
284 }
285
286 static inline void context_set_address_root(struct context_entry *context,
287                                             unsigned long value)
288 {
289         context->lo &= ~VTD_PAGE_MASK;
290         context->lo |= value & VTD_PAGE_MASK;
291 }
292
293 static inline void context_set_address_width(struct context_entry *context,
294                                              unsigned long value)
295 {
296         context->hi |= value & 7;
297 }
298
299 static inline void context_set_domain_id(struct context_entry *context,
300                                          unsigned long value)
301 {
302         context->hi |= (value & ((1 << 16) - 1)) << 8;
303 }
304
305 static inline int context_domain_id(struct context_entry *c)
306 {
307         return((c->hi >> 8) & 0xffff);
308 }
309
310 static inline void context_clear_entry(struct context_entry *context)
311 {
312         context->lo = 0;
313         context->hi = 0;
314 }
315
316 /*
317  * 0: readable
318  * 1: writable
319  * 2-6: reserved
320  * 7: super page
321  * 8-10: available
322  * 11: snoop behavior
323  * 12-63: Host physcial address
324  */
325 struct dma_pte {
326         u64 val;
327 };
328
329 static inline void dma_clear_pte(struct dma_pte *pte)
330 {
331         pte->val = 0;
332 }
333
334 static inline u64 dma_pte_addr(struct dma_pte *pte)
335 {
336 #ifdef CONFIG_64BIT
337         return pte->val & VTD_PAGE_MASK;
338 #else
339         /* Must have a full atomic 64-bit read */
340         return  __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
341 #endif
342 }
343
344 static inline bool dma_pte_present(struct dma_pte *pte)
345 {
346         return (pte->val & 3) != 0;
347 }
348
349 static inline bool dma_pte_superpage(struct dma_pte *pte)
350 {
351         return (pte->val & DMA_PTE_LARGE_PAGE);
352 }
353
354 static inline int first_pte_in_page(struct dma_pte *pte)
355 {
356         return !((unsigned long)pte & ~VTD_PAGE_MASK);
357 }
358
359 /*
360  * This domain is a statically identity mapping domain.
361  *      1. This domain creats a static 1:1 mapping to all usable memory.
362  *      2. It maps to each iommu if successful.
363  *      3. Each iommu mapps to this domain if successful.
364  */
365 static struct dmar_domain *si_domain;
366 static int hw_pass_through = 1;
367
368 /*
369  * Domain represents a virtual machine, more than one devices
370  * across iommus may be owned in one domain, e.g. kvm guest.
371  */
372 #define DOMAIN_FLAG_VIRTUAL_MACHINE     (1 << 0)
373
374 /* si_domain contains mulitple devices */
375 #define DOMAIN_FLAG_STATIC_IDENTITY     (1 << 1)
376
377 #define for_each_domain_iommu(idx, domain)                      \
378         for (idx = 0; idx < g_num_of_iommus; idx++)             \
379                 if (domain->iommu_refcnt[idx])
380
381 struct dmar_domain {
382         int     nid;                    /* node id */
383
384         unsigned        iommu_refcnt[DMAR_UNITS_SUPPORTED];
385                                         /* Refcount of devices per iommu */
386
387
388         u16             iommu_did[DMAR_UNITS_SUPPORTED];
389                                         /* Domain ids per IOMMU. Use u16 since
390                                          * domain ids are 16 bit wide according
391                                          * to VT-d spec, section 9.3 */
392
393         struct list_head devices;       /* all devices' list */
394         struct iova_domain iovad;       /* iova's that belong to this domain */
395
396         struct dma_pte  *pgd;           /* virtual address */
397         int             gaw;            /* max guest address width */
398
399         /* adjusted guest address width, 0 is level 2 30-bit */
400         int             agaw;
401
402         int             flags;          /* flags to find out type of domain */
403
404         int             iommu_coherency;/* indicate coherency of iommu access */
405         int             iommu_snooping; /* indicate snooping control feature*/
406         int             iommu_count;    /* reference count of iommu */
407         int             iommu_superpage;/* Level of superpages supported:
408                                            0 == 4KiB (no superpages), 1 == 2MiB,
409                                            2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
410         u64             max_addr;       /* maximum mapped address */
411
412         struct iommu_domain domain;     /* generic domain data structure for
413                                            iommu core */
414 };
415
416 /* PCI domain-device relationship */
417 struct device_domain_info {
418         struct list_head link;  /* link to domain siblings */
419         struct list_head global; /* link to global list */
420         u8 bus;                 /* PCI bus number */
421         u8 devfn;               /* PCI devfn number */
422         u8 pasid_supported:3;
423         u8 pasid_enabled:1;
424         u8 pri_supported:1;
425         u8 pri_enabled:1;
426         u8 ats_supported:1;
427         u8 ats_enabled:1;
428         u8 ats_qdep;
429         struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
430         struct intel_iommu *iommu; /* IOMMU used by this device */
431         struct dmar_domain *domain; /* pointer to domain */
432 };
433
434 struct dmar_rmrr_unit {
435         struct list_head list;          /* list of rmrr units   */
436         struct acpi_dmar_header *hdr;   /* ACPI header          */
437         u64     base_address;           /* reserved base address*/
438         u64     end_address;            /* reserved end address */
439         struct dmar_dev_scope *devices; /* target devices */
440         int     devices_cnt;            /* target device count */
441 };
442
443 struct dmar_atsr_unit {
444         struct list_head list;          /* list of ATSR units */
445         struct acpi_dmar_header *hdr;   /* ACPI header */
446         struct dmar_dev_scope *devices; /* target devices */
447         int devices_cnt;                /* target device count */
448         u8 include_all:1;               /* include all ports */
449 };
450
451 static LIST_HEAD(dmar_atsr_units);
452 static LIST_HEAD(dmar_rmrr_units);
453
454 #define for_each_rmrr_units(rmrr) \
455         list_for_each_entry(rmrr, &dmar_rmrr_units, list)
456
457 static void flush_unmaps_timeout(unsigned long data);
458
459 static DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0);
460
461 #define HIGH_WATER_MARK 250
462 struct deferred_flush_tables {
463         int next;
464         struct iova *iova[HIGH_WATER_MARK];
465         struct dmar_domain *domain[HIGH_WATER_MARK];
466         struct page *freelist[HIGH_WATER_MARK];
467 };
468
469 static struct deferred_flush_tables *deferred_flush;
470
471 /* bitmap for indexing intel_iommus */
472 static int g_num_of_iommus;
473
474 static DEFINE_SPINLOCK(async_umap_flush_lock);
475 static LIST_HEAD(unmaps_to_do);
476
477 static int timer_on;
478 static long list_size;
479
480 static void domain_exit(struct dmar_domain *domain);
481 static void domain_remove_dev_info(struct dmar_domain *domain);
482 static void dmar_remove_one_dev_info(struct dmar_domain *domain,
483                                      struct device *dev);
484 static void __dmar_remove_one_dev_info(struct device_domain_info *info);
485 static void domain_context_clear(struct intel_iommu *iommu,
486                                  struct device *dev);
487 static int domain_detach_iommu(struct dmar_domain *domain,
488                                struct intel_iommu *iommu);
489
490 #ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
491 int dmar_disabled = 0;
492 #else
493 int dmar_disabled = 1;
494 #endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
495
496 int intel_iommu_enabled = 0;
497 EXPORT_SYMBOL_GPL(intel_iommu_enabled);
498
499 static int dmar_map_gfx = 1;
500 static int dmar_forcedac;
501 static int intel_iommu_strict;
502 static int intel_iommu_superpage = 1;
503 static int intel_iommu_ecs = 1;
504 static int intel_iommu_pasid28;
505 static int iommu_identity_mapping;
506
507 #define IDENTMAP_ALL            1
508 #define IDENTMAP_GFX            2
509 #define IDENTMAP_AZALIA         4
510
511 /* Broadwell and Skylake have broken ECS support — normal so-called "second
512  * level" translation of DMA requests-without-PASID doesn't actually happen
513  * unless you also set the NESTE bit in an extended context-entry. Which of
514  * course means that SVM doesn't work because it's trying to do nested
515  * translation of the physical addresses it finds in the process page tables,
516  * through the IOVA->phys mapping found in the "second level" page tables.
517  *
518  * The VT-d specification was retroactively changed to change the definition
519  * of the capability bits and pretend that Broadwell/Skylake never happened...
520  * but unfortunately the wrong bit was changed. It's ECS which is broken, but
521  * for some reason it was the PASID capability bit which was redefined (from
522  * bit 28 on BDW/SKL to bit 40 in future).
523  *
524  * So our test for ECS needs to eschew those implementations which set the old
525  * PASID capabiity bit 28, since those are the ones on which ECS is broken.
526  * Unless we are working around the 'pasid28' limitations, that is, by putting
527  * the device into passthrough mode for normal DMA and thus masking the bug.
528  */
529 #define ecs_enabled(iommu) (intel_iommu_ecs && ecap_ecs(iommu->ecap) && \
530                             (intel_iommu_pasid28 || !ecap_broken_pasid(iommu->ecap)))
531 /* PASID support is thus enabled if ECS is enabled and *either* of the old
532  * or new capability bits are set. */
533 #define pasid_enabled(iommu) (ecs_enabled(iommu) &&                     \
534                               (ecap_pasid(iommu->ecap) || ecap_broken_pasid(iommu->ecap)))
535
536 int intel_iommu_gfx_mapped;
537 EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
538
539 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
540 static DEFINE_SPINLOCK(device_domain_lock);
541 static LIST_HEAD(device_domain_list);
542
543 static const struct iommu_ops intel_iommu_ops;
544
545 static bool translation_pre_enabled(struct intel_iommu *iommu)
546 {
547         return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
548 }
549
550 static void clear_translation_pre_enabled(struct intel_iommu *iommu)
551 {
552         iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
553 }
554
555 static void init_translation_status(struct intel_iommu *iommu)
556 {
557         u32 gsts;
558
559         gsts = readl(iommu->reg + DMAR_GSTS_REG);
560         if (gsts & DMA_GSTS_TES)
561                 iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
562 }
563
564 /* Convert generic 'struct iommu_domain to private struct dmar_domain */
565 static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
566 {
567         return container_of(dom, struct dmar_domain, domain);
568 }
569
570 static int __init intel_iommu_setup(char *str)
571 {
572         if (!str)
573                 return -EINVAL;
574         while (*str) {
575                 if (!strncmp(str, "on", 2)) {
576                         dmar_disabled = 0;
577                         pr_info("IOMMU enabled\n");
578                 } else if (!strncmp(str, "off", 3)) {
579                         dmar_disabled = 1;
580                         pr_info("IOMMU disabled\n");
581                 } else if (!strncmp(str, "igfx_off", 8)) {
582                         dmar_map_gfx = 0;
583                         pr_info("Disable GFX device mapping\n");
584                 } else if (!strncmp(str, "forcedac", 8)) {
585                         pr_info("Forcing DAC for PCI devices\n");
586                         dmar_forcedac = 1;
587                 } else if (!strncmp(str, "strict", 6)) {
588                         pr_info("Disable batched IOTLB flush\n");
589                         intel_iommu_strict = 1;
590                 } else if (!strncmp(str, "sp_off", 6)) {
591                         pr_info("Disable supported super page\n");
592                         intel_iommu_superpage = 0;
593                 } else if (!strncmp(str, "ecs_off", 7)) {
594                         printk(KERN_INFO
595                                 "Intel-IOMMU: disable extended context table support\n");
596                         intel_iommu_ecs = 0;
597                 } else if (!strncmp(str, "pasid28", 7)) {
598                         printk(KERN_INFO
599                                 "Intel-IOMMU: enable pre-production PASID support\n");
600                         intel_iommu_pasid28 = 1;
601                         iommu_identity_mapping |= IDENTMAP_GFX;
602                 }
603
604                 str += strcspn(str, ",");
605                 while (*str == ',')
606                         str++;
607         }
608         return 0;
609 }
610 __setup("intel_iommu=", intel_iommu_setup);
611
612 static struct kmem_cache *iommu_domain_cache;
613 static struct kmem_cache *iommu_devinfo_cache;
614
615 static struct dmar_domain* get_iommu_domain(struct intel_iommu *iommu, u16 did)
616 {
617         struct dmar_domain **domains;
618         int idx = did >> 8;
619
620         domains = iommu->domains[idx];
621         if (!domains)
622                 return NULL;
623
624         return domains[did & 0xff];
625 }
626
627 static void set_iommu_domain(struct intel_iommu *iommu, u16 did,
628                              struct dmar_domain *domain)
629 {
630         struct dmar_domain **domains;
631         int idx = did >> 8;
632
633         if (!iommu->domains[idx]) {
634                 size_t size = 256 * sizeof(struct dmar_domain *);
635                 iommu->domains[idx] = kzalloc(size, GFP_ATOMIC);
636         }
637
638         domains = iommu->domains[idx];
639         if (WARN_ON(!domains))
640                 return;
641         else
642                 domains[did & 0xff] = domain;
643 }
644
645 static inline void *alloc_pgtable_page(int node)
646 {
647         struct page *page;
648         void *vaddr = NULL;
649
650         page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
651         if (page)
652                 vaddr = page_address(page);
653         return vaddr;
654 }
655
656 static inline void free_pgtable_page(void *vaddr)
657 {
658         free_page((unsigned long)vaddr);
659 }
660
661 static inline void *alloc_domain_mem(void)
662 {
663         return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
664 }
665
666 static void free_domain_mem(void *vaddr)
667 {
668         kmem_cache_free(iommu_domain_cache, vaddr);
669 }
670
671 static inline void * alloc_devinfo_mem(void)
672 {
673         return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
674 }
675
676 static inline void free_devinfo_mem(void *vaddr)
677 {
678         kmem_cache_free(iommu_devinfo_cache, vaddr);
679 }
680
681 static inline int domain_type_is_vm(struct dmar_domain *domain)
682 {
683         return domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE;
684 }
685
686 static inline int domain_type_is_si(struct dmar_domain *domain)
687 {
688         return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
689 }
690
691 static inline int domain_type_is_vm_or_si(struct dmar_domain *domain)
692 {
693         return domain->flags & (DOMAIN_FLAG_VIRTUAL_MACHINE |
694                                 DOMAIN_FLAG_STATIC_IDENTITY);
695 }
696
697 static inline int domain_pfn_supported(struct dmar_domain *domain,
698                                        unsigned long pfn)
699 {
700         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
701
702         return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
703 }
704
705 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
706 {
707         unsigned long sagaw;
708         int agaw = -1;
709
710         sagaw = cap_sagaw(iommu->cap);
711         for (agaw = width_to_agaw(max_gaw);
712              agaw >= 0; agaw--) {
713                 if (test_bit(agaw, &sagaw))
714                         break;
715         }
716
717         return agaw;
718 }
719
720 /*
721  * Calculate max SAGAW for each iommu.
722  */
723 int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
724 {
725         return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
726 }
727
728 /*
729  * calculate agaw for each iommu.
730  * "SAGAW" may be different across iommus, use a default agaw, and
731  * get a supported less agaw for iommus that don't support the default agaw.
732  */
733 int iommu_calculate_agaw(struct intel_iommu *iommu)
734 {
735         return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
736 }
737
738 /* This functionin only returns single iommu in a domain */
739 static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
740 {
741         int iommu_id;
742
743         /* si_domain and vm domain should not get here. */
744         BUG_ON(domain_type_is_vm_or_si(domain));
745         for_each_domain_iommu(iommu_id, domain)
746                 break;
747
748         if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
749                 return NULL;
750
751         return g_iommus[iommu_id];
752 }
753
754 static void domain_update_iommu_coherency(struct dmar_domain *domain)
755 {
756         struct dmar_drhd_unit *drhd;
757         struct intel_iommu *iommu;
758         bool found = false;
759         int i;
760
761         domain->iommu_coherency = 1;
762
763         for_each_domain_iommu(i, domain) {
764                 found = true;
765                 if (!ecap_coherent(g_iommus[i]->ecap)) {
766                         domain->iommu_coherency = 0;
767                         break;
768                 }
769         }
770         if (found)
771                 return;
772
773         /* No hardware attached; use lowest common denominator */
774         rcu_read_lock();
775         for_each_active_iommu(iommu, drhd) {
776                 if (!ecap_coherent(iommu->ecap)) {
777                         domain->iommu_coherency = 0;
778                         break;
779                 }
780         }
781         rcu_read_unlock();
782 }
783
784 static int domain_update_iommu_snooping(struct intel_iommu *skip)
785 {
786         struct dmar_drhd_unit *drhd;
787         struct intel_iommu *iommu;
788         int ret = 1;
789
790         rcu_read_lock();
791         for_each_active_iommu(iommu, drhd) {
792                 if (iommu != skip) {
793                         if (!ecap_sc_support(iommu->ecap)) {
794                                 ret = 0;
795                                 break;
796                         }
797                 }
798         }
799         rcu_read_unlock();
800
801         return ret;
802 }
803
804 static int domain_update_iommu_superpage(struct intel_iommu *skip)
805 {
806         struct dmar_drhd_unit *drhd;
807         struct intel_iommu *iommu;
808         int mask = 0xf;
809
810         if (!intel_iommu_superpage) {
811                 return 0;
812         }
813
814         /* set iommu_superpage to the smallest common denominator */
815         rcu_read_lock();
816         for_each_active_iommu(iommu, drhd) {
817                 if (iommu != skip) {
818                         mask &= cap_super_page_val(iommu->cap);
819                         if (!mask)
820                                 break;
821                 }
822         }
823         rcu_read_unlock();
824
825         return fls(mask);
826 }
827
828 /* Some capabilities may be different across iommus */
829 static void domain_update_iommu_cap(struct dmar_domain *domain)
830 {
831         domain_update_iommu_coherency(domain);
832         domain->iommu_snooping = domain_update_iommu_snooping(NULL);
833         domain->iommu_superpage = domain_update_iommu_superpage(NULL);
834 }
835
836 static inline struct context_entry *iommu_context_addr(struct intel_iommu *iommu,
837                                                        u8 bus, u8 devfn, int alloc)
838 {
839         struct root_entry *root = &iommu->root_entry[bus];
840         struct context_entry *context;
841         u64 *entry;
842
843         entry = &root->lo;
844         if (ecs_enabled(iommu)) {
845                 if (devfn >= 0x80) {
846                         devfn -= 0x80;
847                         entry = &root->hi;
848                 }
849                 devfn *= 2;
850         }
851         if (*entry & 1)
852                 context = phys_to_virt(*entry & VTD_PAGE_MASK);
853         else {
854                 unsigned long phy_addr;
855                 if (!alloc)
856                         return NULL;
857
858                 context = alloc_pgtable_page(iommu->node);
859                 if (!context)
860                         return NULL;
861
862                 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
863                 phy_addr = virt_to_phys((void *)context);
864                 *entry = phy_addr | 1;
865                 __iommu_flush_cache(iommu, entry, sizeof(*entry));
866         }
867         return &context[devfn];
868 }
869
870 static int iommu_dummy(struct device *dev)
871 {
872         return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
873 }
874
875 static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
876 {
877         struct dmar_drhd_unit *drhd = NULL;
878         struct intel_iommu *iommu;
879         struct device *tmp;
880         struct pci_dev *ptmp, *pdev = NULL;
881         u16 segment = 0;
882         int i;
883
884         if (iommu_dummy(dev))
885                 return NULL;
886
887         if (dev_is_pci(dev)) {
888                 pdev = to_pci_dev(dev);
889                 segment = pci_domain_nr(pdev->bus);
890         } else if (has_acpi_companion(dev))
891                 dev = &ACPI_COMPANION(dev)->dev;
892
893         rcu_read_lock();
894         for_each_active_iommu(iommu, drhd) {
895                 if (pdev && segment != drhd->segment)
896                         continue;
897
898                 for_each_active_dev_scope(drhd->devices,
899                                           drhd->devices_cnt, i, tmp) {
900                         if (tmp == dev) {
901                                 *bus = drhd->devices[i].bus;
902                                 *devfn = drhd->devices[i].devfn;
903                                 goto out;
904                         }
905
906                         if (!pdev || !dev_is_pci(tmp))
907                                 continue;
908
909                         ptmp = to_pci_dev(tmp);
910                         if (ptmp->subordinate &&
911                             ptmp->subordinate->number <= pdev->bus->number &&
912                             ptmp->subordinate->busn_res.end >= pdev->bus->number)
913                                 goto got_pdev;
914                 }
915
916                 if (pdev && drhd->include_all) {
917                 got_pdev:
918                         *bus = pdev->bus->number;
919                         *devfn = pdev->devfn;
920                         goto out;
921                 }
922         }
923         iommu = NULL;
924  out:
925         rcu_read_unlock();
926
927         return iommu;
928 }
929
930 static void domain_flush_cache(struct dmar_domain *domain,
931                                void *addr, int size)
932 {
933         if (!domain->iommu_coherency)
934                 clflush_cache_range(addr, size);
935 }
936
937 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
938 {
939         struct context_entry *context;
940         int ret = 0;
941         unsigned long flags;
942
943         spin_lock_irqsave(&iommu->lock, flags);
944         context = iommu_context_addr(iommu, bus, devfn, 0);
945         if (context)
946                 ret = context_present(context);
947         spin_unlock_irqrestore(&iommu->lock, flags);
948         return ret;
949 }
950
951 static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
952 {
953         struct context_entry *context;
954         unsigned long flags;
955
956         spin_lock_irqsave(&iommu->lock, flags);
957         context = iommu_context_addr(iommu, bus, devfn, 0);
958         if (context) {
959                 context_clear_entry(context);
960                 __iommu_flush_cache(iommu, context, sizeof(*context));
961         }
962         spin_unlock_irqrestore(&iommu->lock, flags);
963 }
964
965 static void free_context_table(struct intel_iommu *iommu)
966 {
967         int i;
968         unsigned long flags;
969         struct context_entry *context;
970
971         spin_lock_irqsave(&iommu->lock, flags);
972         if (!iommu->root_entry) {
973                 goto out;
974         }
975         for (i = 0; i < ROOT_ENTRY_NR; i++) {
976                 context = iommu_context_addr(iommu, i, 0, 0);
977                 if (context)
978                         free_pgtable_page(context);
979
980                 if (!ecs_enabled(iommu))
981                         continue;
982
983                 context = iommu_context_addr(iommu, i, 0x80, 0);
984                 if (context)
985                         free_pgtable_page(context);
986
987         }
988         free_pgtable_page(iommu->root_entry);
989         iommu->root_entry = NULL;
990 out:
991         spin_unlock_irqrestore(&iommu->lock, flags);
992 }
993
994 static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
995                                       unsigned long pfn, int *target_level)
996 {
997         struct dma_pte *parent, *pte = NULL;
998         int level = agaw_to_level(domain->agaw);
999         int offset;
1000
1001         BUG_ON(!domain->pgd);
1002
1003         if (!domain_pfn_supported(domain, pfn))
1004                 /* Address beyond IOMMU's addressing capabilities. */
1005                 return NULL;
1006
1007         parent = domain->pgd;
1008
1009         while (1) {
1010                 void *tmp_page;
1011
1012                 offset = pfn_level_offset(pfn, level);
1013                 pte = &parent[offset];
1014                 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
1015                         break;
1016                 if (level == *target_level)
1017                         break;
1018
1019                 if (!dma_pte_present(pte)) {
1020                         uint64_t pteval;
1021
1022                         tmp_page = alloc_pgtable_page(domain->nid);
1023
1024                         if (!tmp_page)
1025                                 return NULL;
1026
1027                         domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
1028                         pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
1029                         if (cmpxchg64(&pte->val, 0ULL, pteval))
1030                                 /* Someone else set it while we were thinking; use theirs. */
1031                                 free_pgtable_page(tmp_page);
1032                         else
1033                                 domain_flush_cache(domain, pte, sizeof(*pte));
1034                 }
1035                 if (level == 1)
1036                         break;
1037
1038                 parent = phys_to_virt(dma_pte_addr(pte));
1039                 level--;
1040         }
1041
1042         if (!*target_level)
1043                 *target_level = level;
1044
1045         return pte;
1046 }
1047
1048
1049 /* return address's pte at specific level */
1050 static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
1051                                          unsigned long pfn,
1052                                          int level, int *large_page)
1053 {
1054         struct dma_pte *parent, *pte = NULL;
1055         int total = agaw_to_level(domain->agaw);
1056         int offset;
1057
1058         parent = domain->pgd;
1059         while (level <= total) {
1060                 offset = pfn_level_offset(pfn, total);
1061                 pte = &parent[offset];
1062                 if (level == total)
1063                         return pte;
1064
1065                 if (!dma_pte_present(pte)) {
1066                         *large_page = total;
1067                         break;
1068                 }
1069
1070                 if (dma_pte_superpage(pte)) {
1071                         *large_page = total;
1072                         return pte;
1073                 }
1074
1075                 parent = phys_to_virt(dma_pte_addr(pte));
1076                 total--;
1077         }
1078         return NULL;
1079 }
1080
1081 /* clear last level pte, a tlb flush should be followed */
1082 static void dma_pte_clear_range(struct dmar_domain *domain,
1083                                 unsigned long start_pfn,
1084                                 unsigned long last_pfn)
1085 {
1086         unsigned int large_page = 1;
1087         struct dma_pte *first_pte, *pte;
1088
1089         BUG_ON(!domain_pfn_supported(domain, start_pfn));
1090         BUG_ON(!domain_pfn_supported(domain, last_pfn));
1091         BUG_ON(start_pfn > last_pfn);
1092
1093         /* we don't need lock here; nobody else touches the iova range */
1094         do {
1095                 large_page = 1;
1096                 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
1097                 if (!pte) {
1098                         start_pfn = align_to_level(start_pfn + 1, large_page + 1);
1099                         continue;
1100                 }
1101                 do {
1102                         dma_clear_pte(pte);
1103                         start_pfn += lvl_to_nr_pages(large_page);
1104                         pte++;
1105                 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
1106
1107                 domain_flush_cache(domain, first_pte,
1108                                    (void *)pte - (void *)first_pte);
1109
1110         } while (start_pfn && start_pfn <= last_pfn);
1111 }
1112
1113 static void dma_pte_free_level(struct dmar_domain *domain, int level,
1114                                struct dma_pte *pte, unsigned long pfn,
1115                                unsigned long start_pfn, unsigned long last_pfn)
1116 {
1117         pfn = max(start_pfn, pfn);
1118         pte = &pte[pfn_level_offset(pfn, level)];
1119
1120         do {
1121                 unsigned long level_pfn;
1122                 struct dma_pte *level_pte;
1123
1124                 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
1125                         goto next;
1126
1127                 level_pfn = pfn & level_mask(level - 1);
1128                 level_pte = phys_to_virt(dma_pte_addr(pte));
1129
1130                 if (level > 2)
1131                         dma_pte_free_level(domain, level - 1, level_pte,
1132                                            level_pfn, start_pfn, last_pfn);
1133
1134                 /* If range covers entire pagetable, free it */
1135                 if (!(start_pfn > level_pfn ||
1136                       last_pfn < level_pfn + level_size(level) - 1)) {
1137                         dma_clear_pte(pte);
1138                         domain_flush_cache(domain, pte, sizeof(*pte));
1139                         free_pgtable_page(level_pte);
1140                 }
1141 next:
1142                 pfn += level_size(level);
1143         } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1144 }
1145
1146 /* free page table pages. last level pte should already be cleared */
1147 static void dma_pte_free_pagetable(struct dmar_domain *domain,
1148                                    unsigned long start_pfn,
1149                                    unsigned long last_pfn)
1150 {
1151         BUG_ON(!domain_pfn_supported(domain, start_pfn));
1152         BUG_ON(!domain_pfn_supported(domain, last_pfn));
1153         BUG_ON(start_pfn > last_pfn);
1154
1155         dma_pte_clear_range(domain, start_pfn, last_pfn);
1156
1157         /* We don't need lock here; nobody else touches the iova range */
1158         dma_pte_free_level(domain, agaw_to_level(domain->agaw),
1159                            domain->pgd, 0, start_pfn, last_pfn);
1160
1161         /* free pgd */
1162         if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1163                 free_pgtable_page(domain->pgd);
1164                 domain->pgd = NULL;
1165         }
1166 }
1167
1168 /* When a page at a given level is being unlinked from its parent, we don't
1169    need to *modify* it at all. All we need to do is make a list of all the
1170    pages which can be freed just as soon as we've flushed the IOTLB and we
1171    know the hardware page-walk will no longer touch them.
1172    The 'pte' argument is the *parent* PTE, pointing to the page that is to
1173    be freed. */
1174 static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1175                                             int level, struct dma_pte *pte,
1176                                             struct page *freelist)
1177 {
1178         struct page *pg;
1179
1180         pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1181         pg->freelist = freelist;
1182         freelist = pg;
1183
1184         if (level == 1)
1185                 return freelist;
1186
1187         pte = page_address(pg);
1188         do {
1189                 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1190                         freelist = dma_pte_list_pagetables(domain, level - 1,
1191                                                            pte, freelist);
1192                 pte++;
1193         } while (!first_pte_in_page(pte));
1194
1195         return freelist;
1196 }
1197
1198 static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1199                                         struct dma_pte *pte, unsigned long pfn,
1200                                         unsigned long start_pfn,
1201                                         unsigned long last_pfn,
1202                                         struct page *freelist)
1203 {
1204         struct dma_pte *first_pte = NULL, *last_pte = NULL;
1205
1206         pfn = max(start_pfn, pfn);
1207         pte = &pte[pfn_level_offset(pfn, level)];
1208
1209         do {
1210                 unsigned long level_pfn;
1211
1212                 if (!dma_pte_present(pte))
1213                         goto next;
1214
1215                 level_pfn = pfn & level_mask(level);
1216
1217                 /* If range covers entire pagetable, free it */
1218                 if (start_pfn <= level_pfn &&
1219                     last_pfn >= level_pfn + level_size(level) - 1) {
1220                         /* These suborbinate page tables are going away entirely. Don't
1221                            bother to clear them; we're just going to *free* them. */
1222                         if (level > 1 && !dma_pte_superpage(pte))
1223                                 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1224
1225                         dma_clear_pte(pte);
1226                         if (!first_pte)
1227                                 first_pte = pte;
1228                         last_pte = pte;
1229                 } else if (level > 1) {
1230                         /* Recurse down into a level that isn't *entirely* obsolete */
1231                         freelist = dma_pte_clear_level(domain, level - 1,
1232                                                        phys_to_virt(dma_pte_addr(pte)),
1233                                                        level_pfn, start_pfn, last_pfn,
1234                                                        freelist);
1235                 }
1236 next:
1237                 pfn += level_size(level);
1238         } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1239
1240         if (first_pte)
1241                 domain_flush_cache(domain, first_pte,
1242                                    (void *)++last_pte - (void *)first_pte);
1243
1244         return freelist;
1245 }
1246
1247 /* We can't just free the pages because the IOMMU may still be walking
1248    the page tables, and may have cached the intermediate levels. The
1249    pages can only be freed after the IOTLB flush has been done. */
1250 static struct page *domain_unmap(struct dmar_domain *domain,
1251                                  unsigned long start_pfn,
1252                                  unsigned long last_pfn)
1253 {
1254         struct page *freelist = NULL;
1255
1256         BUG_ON(!domain_pfn_supported(domain, start_pfn));
1257         BUG_ON(!domain_pfn_supported(domain, last_pfn));
1258         BUG_ON(start_pfn > last_pfn);
1259
1260         /* we don't need lock here; nobody else touches the iova range */
1261         freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1262                                        domain->pgd, 0, start_pfn, last_pfn, NULL);
1263
1264         /* free pgd */
1265         if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1266                 struct page *pgd_page = virt_to_page(domain->pgd);
1267                 pgd_page->freelist = freelist;
1268                 freelist = pgd_page;
1269
1270                 domain->pgd = NULL;
1271         }
1272
1273         return freelist;
1274 }
1275
1276 static void dma_free_pagelist(struct page *freelist)
1277 {
1278         struct page *pg;
1279
1280         while ((pg = freelist)) {
1281                 freelist = pg->freelist;
1282                 free_pgtable_page(page_address(pg));
1283         }
1284 }
1285
1286 /* iommu handling */
1287 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1288 {
1289         struct root_entry *root;
1290         unsigned long flags;
1291
1292         root = (struct root_entry *)alloc_pgtable_page(iommu->node);
1293         if (!root) {
1294                 pr_err("Allocating root entry for %s failed\n",
1295                         iommu->name);
1296                 return -ENOMEM;
1297         }
1298
1299         __iommu_flush_cache(iommu, root, ROOT_SIZE);
1300
1301         spin_lock_irqsave(&iommu->lock, flags);
1302         iommu->root_entry = root;
1303         spin_unlock_irqrestore(&iommu->lock, flags);
1304
1305         return 0;
1306 }
1307
1308 static void iommu_set_root_entry(struct intel_iommu *iommu)
1309 {
1310         u64 addr;
1311         u32 sts;
1312         unsigned long flag;
1313
1314         addr = virt_to_phys(iommu->root_entry);
1315         if (ecs_enabled(iommu))
1316                 addr |= DMA_RTADDR_RTT;
1317
1318         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1319         dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
1320
1321         writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
1322
1323         /* Make sure hardware complete it */
1324         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1325                       readl, (sts & DMA_GSTS_RTPS), sts);
1326
1327         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1328 }
1329
1330 static void iommu_flush_write_buffer(struct intel_iommu *iommu)
1331 {
1332         u32 val;
1333         unsigned long flag;
1334
1335         if (!rwbf_quirk && !cap_rwbf(iommu->cap))
1336                 return;
1337
1338         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1339         writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
1340
1341         /* Make sure hardware complete it */
1342         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1343                       readl, (!(val & DMA_GSTS_WBFS)), val);
1344
1345         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1346 }
1347
1348 /* return value determine if we need a write buffer flush */
1349 static void __iommu_flush_context(struct intel_iommu *iommu,
1350                                   u16 did, u16 source_id, u8 function_mask,
1351                                   u64 type)
1352 {
1353         u64 val = 0;
1354         unsigned long flag;
1355
1356         switch (type) {
1357         case DMA_CCMD_GLOBAL_INVL:
1358                 val = DMA_CCMD_GLOBAL_INVL;
1359                 break;
1360         case DMA_CCMD_DOMAIN_INVL:
1361                 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1362                 break;
1363         case DMA_CCMD_DEVICE_INVL:
1364                 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1365                         | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1366                 break;
1367         default:
1368                 BUG();
1369         }
1370         val |= DMA_CCMD_ICC;
1371
1372         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1373         dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1374
1375         /* Make sure hardware complete it */
1376         IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1377                 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1378
1379         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1380 }
1381
1382 /* return value determine if we need a write buffer flush */
1383 static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1384                                 u64 addr, unsigned int size_order, u64 type)
1385 {
1386         int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1387         u64 val = 0, val_iva = 0;
1388         unsigned long flag;
1389
1390         switch (type) {
1391         case DMA_TLB_GLOBAL_FLUSH:
1392                 /* global flush doesn't need set IVA_REG */
1393                 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1394                 break;
1395         case DMA_TLB_DSI_FLUSH:
1396                 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1397                 break;
1398         case DMA_TLB_PSI_FLUSH:
1399                 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1400                 /* IH bit is passed in as part of address */
1401                 val_iva = size_order | addr;
1402                 break;
1403         default:
1404                 BUG();
1405         }
1406         /* Note: set drain read/write */
1407 #if 0
1408         /*
1409          * This is probably to be super secure.. Looks like we can
1410          * ignore it without any impact.
1411          */
1412         if (cap_read_drain(iommu->cap))
1413                 val |= DMA_TLB_READ_DRAIN;
1414 #endif
1415         if (cap_write_drain(iommu->cap))
1416                 val |= DMA_TLB_WRITE_DRAIN;
1417
1418         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1419         /* Note: Only uses first TLB reg currently */
1420         if (val_iva)
1421                 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1422         dmar_writeq(iommu->reg + tlb_offset + 8, val);
1423
1424         /* Make sure hardware complete it */
1425         IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1426                 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1427
1428         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1429
1430         /* check IOTLB invalidation granularity */
1431         if (DMA_TLB_IAIG(val) == 0)
1432                 pr_err("Flush IOTLB failed\n");
1433         if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1434                 pr_debug("TLB flush request %Lx, actual %Lx\n",
1435                         (unsigned long long)DMA_TLB_IIRG(type),
1436                         (unsigned long long)DMA_TLB_IAIG(val));
1437 }
1438
1439 static struct device_domain_info *
1440 iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1441                          u8 bus, u8 devfn)
1442 {
1443         struct device_domain_info *info;
1444
1445         assert_spin_locked(&device_domain_lock);
1446
1447         if (!iommu->qi)
1448                 return NULL;
1449
1450         list_for_each_entry(info, &domain->devices, link)
1451                 if (info->iommu == iommu && info->bus == bus &&
1452                     info->devfn == devfn) {
1453                         if (info->ats_supported && info->dev)
1454                                 return info;
1455                         break;
1456                 }
1457
1458         return NULL;
1459 }
1460
1461 static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1462 {
1463         struct pci_dev *pdev;
1464
1465         if (!info || !dev_is_pci(info->dev))
1466                 return;
1467
1468         pdev = to_pci_dev(info->dev);
1469
1470 #ifdef CONFIG_INTEL_IOMMU_SVM
1471         /* The PCIe spec, in its wisdom, declares that the behaviour of
1472            the device if you enable PASID support after ATS support is
1473            undefined. So always enable PASID support on devices which
1474            have it, even if we can't yet know if we're ever going to
1475            use it. */
1476         if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
1477                 info->pasid_enabled = 1;
1478
1479         if (info->pri_supported && !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32))
1480                 info->pri_enabled = 1;
1481 #endif
1482         if (info->ats_supported && !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
1483                 info->ats_enabled = 1;
1484                 info->ats_qdep = pci_ats_queue_depth(pdev);
1485         }
1486 }
1487
1488 static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1489 {
1490         struct pci_dev *pdev;
1491
1492         if (!dev_is_pci(info->dev))
1493                 return;
1494
1495         pdev = to_pci_dev(info->dev);
1496
1497         if (info->ats_enabled) {
1498                 pci_disable_ats(pdev);
1499                 info->ats_enabled = 0;
1500         }
1501 #ifdef CONFIG_INTEL_IOMMU_SVM
1502         if (info->pri_enabled) {
1503                 pci_disable_pri(pdev);
1504                 info->pri_enabled = 0;
1505         }
1506         if (info->pasid_enabled) {
1507                 pci_disable_pasid(pdev);
1508                 info->pasid_enabled = 0;
1509         }
1510 #endif
1511 }
1512
1513 static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1514                                   u64 addr, unsigned mask)
1515 {
1516         u16 sid, qdep;
1517         unsigned long flags;
1518         struct device_domain_info *info;
1519
1520         spin_lock_irqsave(&device_domain_lock, flags);
1521         list_for_each_entry(info, &domain->devices, link) {
1522                 if (!info->ats_enabled)
1523                         continue;
1524
1525                 sid = info->bus << 8 | info->devfn;
1526                 qdep = info->ats_qdep;
1527                 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1528         }
1529         spin_unlock_irqrestore(&device_domain_lock, flags);
1530 }
1531
1532 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
1533                                   struct dmar_domain *domain,
1534                                   unsigned long pfn, unsigned int pages,
1535                                   int ih, int map)
1536 {
1537         unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1538         uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1539         u16 did = domain->iommu_did[iommu->seq_id];
1540
1541         BUG_ON(pages == 0);
1542
1543         if (ih)
1544                 ih = 1 << 6;
1545         /*
1546          * Fallback to domain selective flush if no PSI support or the size is
1547          * too big.
1548          * PSI requires page size to be 2 ^ x, and the base address is naturally
1549          * aligned to the size
1550          */
1551         if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1552                 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1553                                                 DMA_TLB_DSI_FLUSH);
1554         else
1555                 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
1556                                                 DMA_TLB_PSI_FLUSH);
1557
1558         /*
1559          * In caching mode, changes of pages from non-present to present require
1560          * flush. However, device IOTLB doesn't need to be flushed in this case.
1561          */
1562         if (!cap_caching_mode(iommu->cap) || !map)
1563                 iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
1564                                       addr, mask);
1565 }
1566
1567 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1568 {
1569         u32 pmen;
1570         unsigned long flags;
1571
1572         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1573         pmen = readl(iommu->reg + DMAR_PMEN_REG);
1574         pmen &= ~DMA_PMEN_EPM;
1575         writel(pmen, iommu->reg + DMAR_PMEN_REG);
1576
1577         /* wait for the protected region status bit to clear */
1578         IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1579                 readl, !(pmen & DMA_PMEN_PRS), pmen);
1580
1581         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1582 }
1583
1584 static void iommu_enable_translation(struct intel_iommu *iommu)
1585 {
1586         u32 sts;
1587         unsigned long flags;
1588
1589         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1590         iommu->gcmd |= DMA_GCMD_TE;
1591         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1592
1593         /* Make sure hardware complete it */
1594         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1595                       readl, (sts & DMA_GSTS_TES), sts);
1596
1597         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1598 }
1599
1600 static void iommu_disable_translation(struct intel_iommu *iommu)
1601 {
1602         u32 sts;
1603         unsigned long flag;
1604
1605         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1606         iommu->gcmd &= ~DMA_GCMD_TE;
1607         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1608
1609         /* Make sure hardware complete it */
1610         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1611                       readl, (!(sts & DMA_GSTS_TES)), sts);
1612
1613         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1614 }
1615
1616
1617 static int iommu_init_domains(struct intel_iommu *iommu)
1618 {
1619         u32 ndomains, nlongs;
1620         size_t size;
1621
1622         ndomains = cap_ndoms(iommu->cap);
1623         pr_debug("%s: Number of Domains supported <%d>\n",
1624                  iommu->name, ndomains);
1625         nlongs = BITS_TO_LONGS(ndomains);
1626
1627         spin_lock_init(&iommu->lock);
1628
1629         iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1630         if (!iommu->domain_ids) {
1631                 pr_err("%s: Allocating domain id array failed\n",
1632                        iommu->name);
1633                 return -ENOMEM;
1634         }
1635
1636         size = ((ndomains >> 8) + 1) * sizeof(struct dmar_domain **);
1637         iommu->domains = kzalloc(size, GFP_KERNEL);
1638
1639         if (iommu->domains) {
1640                 size = 256 * sizeof(struct dmar_domain *);
1641                 iommu->domains[0] = kzalloc(size, GFP_KERNEL);
1642         }
1643
1644         if (!iommu->domains || !iommu->domains[0]) {
1645                 pr_err("%s: Allocating domain array failed\n",
1646                        iommu->name);
1647                 kfree(iommu->domain_ids);
1648                 kfree(iommu->domains);
1649                 iommu->domain_ids = NULL;
1650                 iommu->domains    = NULL;
1651                 return -ENOMEM;
1652         }
1653
1654
1655
1656         /*
1657          * If Caching mode is set, then invalid translations are tagged
1658          * with domain-id 0, hence we need to pre-allocate it. We also
1659          * use domain-id 0 as a marker for non-allocated domain-id, so
1660          * make sure it is not used for a real domain.
1661          */
1662         set_bit(0, iommu->domain_ids);
1663
1664         return 0;
1665 }
1666
1667 static void disable_dmar_iommu(struct intel_iommu *iommu)
1668 {
1669         struct device_domain_info *info, *tmp;
1670         unsigned long flags;
1671
1672         if (!iommu->domains || !iommu->domain_ids)
1673                 return;
1674
1675         spin_lock_irqsave(&device_domain_lock, flags);
1676         list_for_each_entry_safe(info, tmp, &device_domain_list, global) {
1677                 struct dmar_domain *domain;
1678
1679                 if (info->iommu != iommu)
1680                         continue;
1681
1682                 if (!info->dev || !info->domain)
1683                         continue;
1684
1685                 domain = info->domain;
1686
1687                 dmar_remove_one_dev_info(domain, info->dev);
1688
1689                 if (!domain_type_is_vm_or_si(domain))
1690                         domain_exit(domain);
1691         }
1692         spin_unlock_irqrestore(&device_domain_lock, flags);
1693
1694         if (iommu->gcmd & DMA_GCMD_TE)
1695                 iommu_disable_translation(iommu);
1696 }
1697
1698 static void free_dmar_iommu(struct intel_iommu *iommu)
1699 {
1700         if ((iommu->domains) && (iommu->domain_ids)) {
1701                 int elems = (cap_ndoms(iommu->cap) >> 8) + 1;
1702                 int i;
1703
1704                 for (i = 0; i < elems; i++)
1705                         kfree(iommu->domains[i]);
1706                 kfree(iommu->domains);
1707                 kfree(iommu->domain_ids);
1708                 iommu->domains = NULL;
1709                 iommu->domain_ids = NULL;
1710         }
1711
1712         g_iommus[iommu->seq_id] = NULL;
1713
1714         /* free context mapping */
1715         free_context_table(iommu);
1716
1717 #ifdef CONFIG_INTEL_IOMMU_SVM
1718         if (pasid_enabled(iommu)) {
1719                 if (ecap_prs(iommu->ecap))
1720                         intel_svm_finish_prq(iommu);
1721                 intel_svm_free_pasid_tables(iommu);
1722         }
1723 #endif
1724 }
1725
1726 static struct dmar_domain *alloc_domain(int flags)
1727 {
1728         struct dmar_domain *domain;
1729
1730         domain = alloc_domain_mem();
1731         if (!domain)
1732                 return NULL;
1733
1734         memset(domain, 0, sizeof(*domain));
1735         domain->nid = -1;
1736         domain->flags = flags;
1737         INIT_LIST_HEAD(&domain->devices);
1738
1739         return domain;
1740 }
1741
1742 /* Must be called with iommu->lock */
1743 static int domain_attach_iommu(struct dmar_domain *domain,
1744                                struct intel_iommu *iommu)
1745 {
1746         unsigned long ndomains;
1747         int num;
1748
1749         assert_spin_locked(&device_domain_lock);
1750         assert_spin_locked(&iommu->lock);
1751
1752         domain->iommu_refcnt[iommu->seq_id] += 1;
1753         domain->iommu_count += 1;
1754         if (domain->iommu_refcnt[iommu->seq_id] == 1) {
1755                 ndomains = cap_ndoms(iommu->cap);
1756                 num      = find_first_zero_bit(iommu->domain_ids, ndomains);
1757
1758                 if (num >= ndomains) {
1759                         pr_err("%s: No free domain ids\n", iommu->name);
1760                         domain->iommu_refcnt[iommu->seq_id] -= 1;
1761                         domain->iommu_count -= 1;
1762                         return -ENOSPC;
1763                 }
1764
1765                 set_bit(num, iommu->domain_ids);
1766                 set_iommu_domain(iommu, num, domain);
1767
1768                 domain->iommu_did[iommu->seq_id] = num;
1769                 domain->nid                      = iommu->node;
1770
1771                 domain_update_iommu_cap(domain);
1772         }
1773
1774         return 0;
1775 }
1776
1777 static int domain_detach_iommu(struct dmar_domain *domain,
1778                                struct intel_iommu *iommu)
1779 {
1780         int num, count = INT_MAX;
1781
1782         assert_spin_locked(&device_domain_lock);
1783         assert_spin_locked(&iommu->lock);
1784
1785         domain->iommu_refcnt[iommu->seq_id] -= 1;
1786         count = --domain->iommu_count;
1787         if (domain->iommu_refcnt[iommu->seq_id] == 0) {
1788                 num = domain->iommu_did[iommu->seq_id];
1789                 clear_bit(num, iommu->domain_ids);
1790                 set_iommu_domain(iommu, num, NULL);
1791
1792                 domain_update_iommu_cap(domain);
1793                 domain->iommu_did[iommu->seq_id] = 0;
1794         }
1795
1796         return count;
1797 }
1798
1799 static struct iova_domain reserved_iova_list;
1800 static struct lock_class_key reserved_rbtree_key;
1801
1802 static int dmar_init_reserved_ranges(void)
1803 {
1804         struct pci_dev *pdev = NULL;
1805         struct iova *iova;
1806         int i;
1807
1808         init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN,
1809                         DMA_32BIT_PFN);
1810
1811         lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1812                 &reserved_rbtree_key);
1813
1814         /* IOAPIC ranges shouldn't be accessed by DMA */
1815         iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1816                 IOVA_PFN(IOAPIC_RANGE_END));
1817         if (!iova) {
1818                 pr_err("Reserve IOAPIC range failed\n");
1819                 return -ENODEV;
1820         }
1821
1822         /* Reserve all PCI MMIO to avoid peer-to-peer access */
1823         for_each_pci_dev(pdev) {
1824                 struct resource *r;
1825
1826                 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1827                         r = &pdev->resource[i];
1828                         if (!r->flags || !(r->flags & IORESOURCE_MEM))
1829                                 continue;
1830                         iova = reserve_iova(&reserved_iova_list,
1831                                             IOVA_PFN(r->start),
1832                                             IOVA_PFN(r->end));
1833                         if (!iova) {
1834                                 pr_err("Reserve iova failed\n");
1835                                 return -ENODEV;
1836                         }
1837                 }
1838         }
1839         return 0;
1840 }
1841
1842 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1843 {
1844         copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1845 }
1846
1847 static inline int guestwidth_to_adjustwidth(int gaw)
1848 {
1849         int agaw;
1850         int r = (gaw - 12) % 9;
1851
1852         if (r == 0)
1853                 agaw = gaw;
1854         else
1855                 agaw = gaw + 9 - r;
1856         if (agaw > 64)
1857                 agaw = 64;
1858         return agaw;
1859 }
1860
1861 static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
1862                        int guest_width)
1863 {
1864         int adjust_width, agaw;
1865         unsigned long sagaw;
1866
1867         init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN,
1868                         DMA_32BIT_PFN);
1869         domain_reserve_special_ranges(domain);
1870
1871         /* calculate AGAW */
1872         if (guest_width > cap_mgaw(iommu->cap))
1873                 guest_width = cap_mgaw(iommu->cap);
1874         domain->gaw = guest_width;
1875         adjust_width = guestwidth_to_adjustwidth(guest_width);
1876         agaw = width_to_agaw(adjust_width);
1877         sagaw = cap_sagaw(iommu->cap);
1878         if (!test_bit(agaw, &sagaw)) {
1879                 /* hardware doesn't support it, choose a bigger one */
1880                 pr_debug("Hardware doesn't support agaw %d\n", agaw);
1881                 agaw = find_next_bit(&sagaw, 5, agaw);
1882                 if (agaw >= 5)
1883                         return -ENODEV;
1884         }
1885         domain->agaw = agaw;
1886
1887         if (ecap_coherent(iommu->ecap))
1888                 domain->iommu_coherency = 1;
1889         else
1890                 domain->iommu_coherency = 0;
1891
1892         if (ecap_sc_support(iommu->ecap))
1893                 domain->iommu_snooping = 1;
1894         else
1895                 domain->iommu_snooping = 0;
1896
1897         if (intel_iommu_superpage)
1898                 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1899         else
1900                 domain->iommu_superpage = 0;
1901
1902         domain->nid = iommu->node;
1903
1904         /* always allocate the top pgd */
1905         domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
1906         if (!domain->pgd)
1907                 return -ENOMEM;
1908         __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1909         return 0;
1910 }
1911
1912 static void domain_exit(struct dmar_domain *domain)
1913 {
1914         struct page *freelist = NULL;
1915
1916         /* Domain 0 is reserved, so dont process it */
1917         if (!domain)
1918                 return;
1919
1920         /* Flush any lazy unmaps that may reference this domain */
1921         if (!intel_iommu_strict)
1922                 flush_unmaps_timeout(0);
1923
1924         /* Remove associated devices and clear attached or cached domains */
1925         rcu_read_lock();
1926         domain_remove_dev_info(domain);
1927         rcu_read_unlock();
1928
1929         /* destroy iovas */
1930         put_iova_domain(&domain->iovad);
1931
1932         freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1933
1934         dma_free_pagelist(freelist);
1935
1936         free_domain_mem(domain);
1937 }
1938
1939 static int domain_context_mapping_one(struct dmar_domain *domain,
1940                                       struct intel_iommu *iommu,
1941                                       u8 bus, u8 devfn)
1942 {
1943         u16 did = domain->iommu_did[iommu->seq_id];
1944         int translation = CONTEXT_TT_MULTI_LEVEL;
1945         struct device_domain_info *info = NULL;
1946         struct context_entry *context;
1947         unsigned long flags;
1948         struct dma_pte *pgd;
1949         int ret, agaw;
1950
1951         WARN_ON(did == 0);
1952
1953         if (hw_pass_through && domain_type_is_si(domain))
1954                 translation = CONTEXT_TT_PASS_THROUGH;
1955
1956         pr_debug("Set context mapping for %02x:%02x.%d\n",
1957                 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1958
1959         BUG_ON(!domain->pgd);
1960
1961         spin_lock_irqsave(&device_domain_lock, flags);
1962         spin_lock(&iommu->lock);
1963
1964         ret = -ENOMEM;
1965         context = iommu_context_addr(iommu, bus, devfn, 1);
1966         if (!context)
1967                 goto out_unlock;
1968
1969         ret = 0;
1970         if (context_present(context))
1971                 goto out_unlock;
1972
1973         pgd = domain->pgd;
1974
1975         context_clear_entry(context);
1976         context_set_domain_id(context, did);
1977
1978         /*
1979          * Skip top levels of page tables for iommu which has less agaw
1980          * than default.  Unnecessary for PT mode.
1981          */
1982         if (translation != CONTEXT_TT_PASS_THROUGH) {
1983                 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1984                         ret = -ENOMEM;
1985                         pgd = phys_to_virt(dma_pte_addr(pgd));
1986                         if (!dma_pte_present(pgd))
1987                                 goto out_unlock;
1988                 }
1989
1990                 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
1991                 if (info && info->ats_supported)
1992                         translation = CONTEXT_TT_DEV_IOTLB;
1993                 else
1994                         translation = CONTEXT_TT_MULTI_LEVEL;
1995
1996                 context_set_address_root(context, virt_to_phys(pgd));
1997                 context_set_address_width(context, iommu->agaw);
1998         } else {
1999                 /*
2000                  * In pass through mode, AW must be programmed to
2001                  * indicate the largest AGAW value supported by
2002                  * hardware. And ASR is ignored by hardware.
2003                  */
2004                 context_set_address_width(context, iommu->msagaw);
2005         }
2006
2007         context_set_translation_type(context, translation);
2008         context_set_fault_enable(context);
2009         context_set_present(context);
2010         domain_flush_cache(domain, context, sizeof(*context));
2011
2012         /*
2013          * It's a non-present to present mapping. If hardware doesn't cache
2014          * non-present entry we only need to flush the write-buffer. If the
2015          * _does_ cache non-present entries, then it does so in the special
2016          * domain #0, which we have to flush:
2017          */
2018         if (cap_caching_mode(iommu->cap)) {
2019                 iommu->flush.flush_context(iommu, 0,
2020                                            (((u16)bus) << 8) | devfn,
2021                                            DMA_CCMD_MASK_NOBIT,
2022                                            DMA_CCMD_DEVICE_INVL);
2023                 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
2024         } else {
2025                 iommu_flush_write_buffer(iommu);
2026         }
2027         iommu_enable_dev_iotlb(info);
2028
2029         ret = 0;
2030
2031 out_unlock:
2032         spin_unlock(&iommu->lock);
2033         spin_unlock_irqrestore(&device_domain_lock, flags);
2034
2035         return ret;
2036 }
2037
2038 struct domain_context_mapping_data {
2039         struct dmar_domain *domain;
2040         struct intel_iommu *iommu;
2041 };
2042
2043 static int domain_context_mapping_cb(struct pci_dev *pdev,
2044                                      u16 alias, void *opaque)
2045 {
2046         struct domain_context_mapping_data *data = opaque;
2047
2048         return domain_context_mapping_one(data->domain, data->iommu,
2049                                           PCI_BUS_NUM(alias), alias & 0xff);
2050 }
2051
2052 static int
2053 domain_context_mapping(struct dmar_domain *domain, struct device *dev)
2054 {
2055         struct intel_iommu *iommu;
2056         u8 bus, devfn;
2057         struct domain_context_mapping_data data;
2058
2059         iommu = device_to_iommu(dev, &bus, &devfn);
2060         if (!iommu)
2061                 return -ENODEV;
2062
2063         if (!dev_is_pci(dev))
2064                 return domain_context_mapping_one(domain, iommu, bus, devfn);
2065
2066         data.domain = domain;
2067         data.iommu = iommu;
2068
2069         return pci_for_each_dma_alias(to_pci_dev(dev),
2070                                       &domain_context_mapping_cb, &data);
2071 }
2072
2073 static int domain_context_mapped_cb(struct pci_dev *pdev,
2074                                     u16 alias, void *opaque)
2075 {
2076         struct intel_iommu *iommu = opaque;
2077
2078         return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
2079 }
2080
2081 static int domain_context_mapped(struct device *dev)
2082 {
2083         struct intel_iommu *iommu;
2084         u8 bus, devfn;
2085
2086         iommu = device_to_iommu(dev, &bus, &devfn);
2087         if (!iommu)
2088                 return -ENODEV;
2089
2090         if (!dev_is_pci(dev))
2091                 return device_context_mapped(iommu, bus, devfn);
2092
2093         return !pci_for_each_dma_alias(to_pci_dev(dev),
2094                                        domain_context_mapped_cb, iommu);
2095 }
2096
2097 /* Returns a number of VTD pages, but aligned to MM page size */
2098 static inline unsigned long aligned_nrpages(unsigned long host_addr,
2099                                             size_t size)
2100 {
2101         host_addr &= ~PAGE_MASK;
2102         return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
2103 }
2104
2105 /* Return largest possible superpage level for a given mapping */
2106 static inline int hardware_largepage_caps(struct dmar_domain *domain,
2107                                           unsigned long iov_pfn,
2108                                           unsigned long phy_pfn,
2109                                           unsigned long pages)
2110 {
2111         int support, level = 1;
2112         unsigned long pfnmerge;
2113
2114         support = domain->iommu_superpage;
2115
2116         /* To use a large page, the virtual *and* physical addresses
2117            must be aligned to 2MiB/1GiB/etc. Lower bits set in either
2118            of them will mean we have to use smaller pages. So just
2119            merge them and check both at once. */
2120         pfnmerge = iov_pfn | phy_pfn;
2121
2122         while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
2123                 pages >>= VTD_STRIDE_SHIFT;
2124                 if (!pages)
2125                         break;
2126                 pfnmerge >>= VTD_STRIDE_SHIFT;
2127                 level++;
2128                 support--;
2129         }
2130         return level;
2131 }
2132
2133 static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2134                             struct scatterlist *sg, unsigned long phys_pfn,
2135                             unsigned long nr_pages, int prot)
2136 {
2137         struct dma_pte *first_pte = NULL, *pte = NULL;
2138         phys_addr_t uninitialized_var(pteval);
2139         unsigned long sg_res = 0;
2140         unsigned int largepage_lvl = 0;
2141         unsigned long lvl_pages = 0;
2142
2143         BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
2144
2145         if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
2146                 return -EINVAL;
2147
2148         prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
2149
2150         if (!sg) {
2151                 sg_res = nr_pages;
2152                 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
2153         }
2154
2155         while (nr_pages > 0) {
2156                 uint64_t tmp;
2157
2158                 if (!sg_res) {
2159                         sg_res = aligned_nrpages(sg->offset, sg->length);
2160                         sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
2161                         sg->dma_length = sg->length;
2162                         pteval = page_to_phys(sg_page(sg)) | prot;
2163                         phys_pfn = pteval >> VTD_PAGE_SHIFT;
2164                 }
2165
2166                 if (!pte) {
2167                         largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2168
2169                         first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
2170                         if (!pte)
2171                                 return -ENOMEM;
2172                         /* It is large page*/
2173                         if (largepage_lvl > 1) {
2174                                 unsigned long nr_superpages, end_pfn;
2175
2176                                 pteval |= DMA_PTE_LARGE_PAGE;
2177                                 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2178
2179                                 nr_superpages = sg_res / lvl_pages;
2180                                 end_pfn = iov_pfn + nr_superpages * lvl_pages - 1;
2181
2182                                 /*
2183                                  * Ensure that old small page tables are
2184                                  * removed to make room for superpage(s).
2185                                  */
2186                                 dma_pte_free_pagetable(domain, iov_pfn, end_pfn);
2187                         } else {
2188                                 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
2189                         }
2190
2191                 }
2192                 /* We don't need lock here, nobody else
2193                  * touches the iova range
2194                  */
2195                 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
2196                 if (tmp) {
2197                         static int dumps = 5;
2198                         pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2199                                 iov_pfn, tmp, (unsigned long long)pteval);
2200                         if (dumps) {
2201                                 dumps--;
2202                                 debug_dma_dump_mappings(NULL);
2203                         }
2204                         WARN_ON(1);
2205                 }
2206
2207                 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2208
2209                 BUG_ON(nr_pages < lvl_pages);
2210                 BUG_ON(sg_res < lvl_pages);
2211
2212                 nr_pages -= lvl_pages;
2213                 iov_pfn += lvl_pages;
2214                 phys_pfn += lvl_pages;
2215                 pteval += lvl_pages * VTD_PAGE_SIZE;
2216                 sg_res -= lvl_pages;
2217
2218                 /* If the next PTE would be the first in a new page, then we
2219                    need to flush the cache on the entries we've just written.
2220                    And then we'll need to recalculate 'pte', so clear it and
2221                    let it get set again in the if (!pte) block above.
2222
2223                    If we're done (!nr_pages) we need to flush the cache too.
2224
2225                    Also if we've been setting superpages, we may need to
2226                    recalculate 'pte' and switch back to smaller pages for the
2227                    end of the mapping, if the trailing size is not enough to
2228                    use another superpage (i.e. sg_res < lvl_pages). */
2229                 pte++;
2230                 if (!nr_pages || first_pte_in_page(pte) ||
2231                     (largepage_lvl > 1 && sg_res < lvl_pages)) {
2232                         domain_flush_cache(domain, first_pte,
2233                                            (void *)pte - (void *)first_pte);
2234                         pte = NULL;
2235                 }
2236
2237                 if (!sg_res && nr_pages)
2238                         sg = sg_next(sg);
2239         }
2240         return 0;
2241 }
2242
2243 static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2244                                     struct scatterlist *sg, unsigned long nr_pages,
2245                                     int prot)
2246 {
2247         return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
2248 }
2249
2250 static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2251                                      unsigned long phys_pfn, unsigned long nr_pages,
2252                                      int prot)
2253 {
2254         return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
2255 }
2256
2257 static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn)
2258 {
2259         if (!iommu)
2260                 return;
2261
2262         clear_context_table(iommu, bus, devfn);
2263         iommu->flush.flush_context(iommu, 0, 0, 0,
2264                                            DMA_CCMD_GLOBAL_INVL);
2265         iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2266 }
2267
2268 static inline void unlink_domain_info(struct device_domain_info *info)
2269 {
2270         assert_spin_locked(&device_domain_lock);
2271         list_del(&info->link);
2272         list_del(&info->global);
2273         if (info->dev)
2274                 info->dev->archdata.iommu = NULL;
2275 }
2276
2277 static void domain_remove_dev_info(struct dmar_domain *domain)
2278 {
2279         struct device_domain_info *info, *tmp;
2280         unsigned long flags;
2281
2282         spin_lock_irqsave(&device_domain_lock, flags);
2283         list_for_each_entry_safe(info, tmp, &domain->devices, link)
2284                 __dmar_remove_one_dev_info(info);
2285         spin_unlock_irqrestore(&device_domain_lock, flags);
2286 }
2287
2288 /*
2289  * find_domain
2290  * Note: we use struct device->archdata.iommu stores the info
2291  */
2292 static struct dmar_domain *find_domain(struct device *dev)
2293 {
2294         struct device_domain_info *info;
2295
2296         /* No lock here, assumes no domain exit in normal case */
2297         info = dev->archdata.iommu;
2298         if (info)
2299                 return info->domain;
2300         return NULL;
2301 }
2302
2303 static inline struct device_domain_info *
2304 dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2305 {
2306         struct device_domain_info *info;
2307
2308         list_for_each_entry(info, &device_domain_list, global)
2309                 if (info->iommu->segment == segment && info->bus == bus &&
2310                     info->devfn == devfn)
2311                         return info;
2312
2313         return NULL;
2314 }
2315
2316 static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
2317                                                     int bus, int devfn,
2318                                                     struct device *dev,
2319                                                     struct dmar_domain *domain)
2320 {
2321         struct dmar_domain *found = NULL;
2322         struct device_domain_info *info;
2323         unsigned long flags;
2324         int ret;
2325
2326         info = alloc_devinfo_mem();
2327         if (!info)
2328                 return NULL;
2329
2330         info->bus = bus;
2331         info->devfn = devfn;
2332         info->ats_supported = info->pasid_supported = info->pri_supported = 0;
2333         info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0;
2334         info->ats_qdep = 0;
2335         info->dev = dev;
2336         info->domain = domain;
2337         info->iommu = iommu;
2338
2339         if (dev && dev_is_pci(dev)) {
2340                 struct pci_dev *pdev = to_pci_dev(info->dev);
2341
2342                 if (ecap_dev_iotlb_support(iommu->ecap) &&
2343                     pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS) &&
2344                     dmar_find_matched_atsr_unit(pdev))
2345                         info->ats_supported = 1;
2346
2347                 if (ecs_enabled(iommu)) {
2348                         if (pasid_enabled(iommu)) {
2349                                 int features = pci_pasid_features(pdev);
2350                                 if (features >= 0)
2351                                         info->pasid_supported = features | 1;
2352                         }
2353
2354                         if (info->ats_supported && ecap_prs(iommu->ecap) &&
2355                             pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI))
2356                                 info->pri_supported = 1;
2357                 }
2358         }
2359
2360         spin_lock_irqsave(&device_domain_lock, flags);
2361         if (dev)
2362                 found = find_domain(dev);
2363
2364         if (!found) {
2365                 struct device_domain_info *info2;
2366                 info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
2367                 if (info2) {
2368                         found      = info2->domain;
2369                         info2->dev = dev;
2370                 }
2371         }
2372
2373         if (found) {
2374                 spin_unlock_irqrestore(&device_domain_lock, flags);
2375                 free_devinfo_mem(info);
2376                 /* Caller must free the original domain */
2377                 return found;
2378         }
2379
2380         spin_lock(&iommu->lock);
2381         ret = domain_attach_iommu(domain, iommu);
2382         spin_unlock(&iommu->lock);
2383
2384         if (ret) {
2385                 spin_unlock_irqrestore(&device_domain_lock, flags);
2386                 free_devinfo_mem(info);
2387                 return NULL;
2388         }
2389
2390         list_add(&info->link, &domain->devices);
2391         list_add(&info->global, &device_domain_list);
2392         if (dev)
2393                 dev->archdata.iommu = info;
2394         spin_unlock_irqrestore(&device_domain_lock, flags);
2395
2396         if (dev && domain_context_mapping(domain, dev)) {
2397                 pr_err("Domain context map for %s failed\n", dev_name(dev));
2398                 dmar_remove_one_dev_info(domain, dev);
2399                 return NULL;
2400         }
2401
2402         return domain;
2403 }
2404
2405 static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
2406 {
2407         *(u16 *)opaque = alias;
2408         return 0;
2409 }
2410
2411 /* domain is initialized */
2412 static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
2413 {
2414         struct device_domain_info *info = NULL;
2415         struct dmar_domain *domain, *tmp;
2416         struct intel_iommu *iommu;
2417         u16 req_id, dma_alias;
2418         unsigned long flags;
2419         u8 bus, devfn;
2420
2421         domain = find_domain(dev);
2422         if (domain)
2423                 return domain;
2424
2425         iommu = device_to_iommu(dev, &bus, &devfn);
2426         if (!iommu)
2427                 return NULL;
2428
2429         req_id = ((u16)bus << 8) | devfn;
2430
2431         if (dev_is_pci(dev)) {
2432                 struct pci_dev *pdev = to_pci_dev(dev);
2433
2434                 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2435
2436                 spin_lock_irqsave(&device_domain_lock, flags);
2437                 info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
2438                                                       PCI_BUS_NUM(dma_alias),
2439                                                       dma_alias & 0xff);
2440                 if (info) {
2441                         iommu = info->iommu;
2442                         domain = info->domain;
2443                 }
2444                 spin_unlock_irqrestore(&device_domain_lock, flags);
2445
2446                 /* DMA alias already has a domain, uses it */
2447                 if (info)
2448                         goto found_domain;
2449         }
2450
2451         /* Allocate and initialize new domain for the device */
2452         domain = alloc_domain(0);
2453         if (!domain)
2454                 return NULL;
2455         if (domain_init(domain, iommu, gaw)) {
2456                 domain_exit(domain);
2457                 return NULL;
2458         }
2459
2460         /* register PCI DMA alias device */
2461         if (req_id != dma_alias && dev_is_pci(dev)) {
2462                 tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias),
2463                                                dma_alias & 0xff, NULL, domain);
2464
2465                 if (!tmp || tmp != domain) {
2466                         domain_exit(domain);
2467                         domain = tmp;
2468                 }
2469
2470                 if (!domain)
2471                         return NULL;
2472         }
2473
2474 found_domain:
2475         tmp = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
2476
2477         if (!tmp || tmp != domain) {
2478                 domain_exit(domain);
2479                 domain = tmp;
2480         }
2481
2482         return domain;
2483 }
2484
2485 static int iommu_domain_identity_map(struct dmar_domain *domain,
2486                                      unsigned long long start,
2487                                      unsigned long long end)
2488 {
2489         unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2490         unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2491
2492         if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2493                           dma_to_mm_pfn(last_vpfn))) {
2494                 pr_err("Reserving iova failed\n");
2495                 return -ENOMEM;
2496         }
2497
2498         pr_debug("Mapping reserved region %llx-%llx\n", start, end);
2499         /*
2500          * RMRR range might have overlap with physical memory range,
2501          * clear it first
2502          */
2503         dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2504
2505         return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2506                                   last_vpfn - first_vpfn + 1,
2507                                   DMA_PTE_READ|DMA_PTE_WRITE);
2508 }
2509
2510 static int domain_prepare_identity_map(struct device *dev,
2511                                        struct dmar_domain *domain,
2512                                        unsigned long long start,
2513                                        unsigned long long end)
2514 {
2515         /* For _hardware_ passthrough, don't bother. But for software
2516            passthrough, we do it anyway -- it may indicate a memory
2517            range which is reserved in E820, so which didn't get set
2518            up to start with in si_domain */
2519         if (domain == si_domain && hw_pass_through) {
2520                 pr_warn("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2521                         dev_name(dev), start, end);
2522                 return 0;
2523         }
2524
2525         pr_info("Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2526                 dev_name(dev), start, end);
2527
2528         if (end < start) {
2529                 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2530                         "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2531                         dmi_get_system_info(DMI_BIOS_VENDOR),
2532                         dmi_get_system_info(DMI_BIOS_VERSION),
2533                      dmi_get_system_info(DMI_PRODUCT_VERSION));
2534                 return -EIO;
2535         }
2536
2537         if (end >> agaw_to_width(domain->agaw)) {
2538                 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2539                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2540                      agaw_to_width(domain->agaw),
2541                      dmi_get_system_info(DMI_BIOS_VENDOR),
2542                      dmi_get_system_info(DMI_BIOS_VERSION),
2543                      dmi_get_system_info(DMI_PRODUCT_VERSION));
2544                 return -EIO;
2545         }
2546
2547         return iommu_domain_identity_map(domain, start, end);
2548 }
2549
2550 static int iommu_prepare_identity_map(struct device *dev,
2551                                       unsigned long long start,
2552                                       unsigned long long end)
2553 {
2554         struct dmar_domain *domain;
2555         int ret;
2556
2557         domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2558         if (!domain)
2559                 return -ENOMEM;
2560
2561         ret = domain_prepare_identity_map(dev, domain, start, end);
2562         if (ret)
2563                 domain_exit(domain);
2564
2565         return ret;
2566 }
2567
2568 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2569                                          struct device *dev)
2570 {
2571         if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2572                 return 0;
2573         return iommu_prepare_identity_map(dev, rmrr->base_address,
2574                                           rmrr->end_address);
2575 }
2576
2577 #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
2578 static inline void iommu_prepare_isa(void)
2579 {
2580         struct pci_dev *pdev;
2581         int ret;
2582
2583         pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2584         if (!pdev)
2585                 return;
2586
2587         pr_info("Prepare 0-16MiB unity mapping for LPC\n");
2588         ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1);
2589
2590         if (ret)
2591                 pr_err("Failed to create 0-16MiB identity map - floppy might not work\n");
2592
2593         pci_dev_put(pdev);
2594 }
2595 #else
2596 static inline void iommu_prepare_isa(void)
2597 {
2598         return;
2599 }
2600 #endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
2601
2602 static int md_domain_init(struct dmar_domain *domain, int guest_width);
2603
2604 static int __init si_domain_init(int hw)
2605 {
2606         int nid, ret = 0;
2607
2608         si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2609         if (!si_domain)
2610                 return -EFAULT;
2611
2612         if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2613                 domain_exit(si_domain);
2614                 return -EFAULT;
2615         }
2616
2617         pr_debug("Identity mapping domain allocated\n");
2618
2619         if (hw)
2620                 return 0;
2621
2622         for_each_online_node(nid) {
2623                 unsigned long start_pfn, end_pfn;
2624                 int i;
2625
2626                 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2627                         ret = iommu_domain_identity_map(si_domain,
2628                                         PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2629                         if (ret)
2630                                 return ret;
2631                 }
2632         }
2633
2634         return 0;
2635 }
2636
2637 static int identity_mapping(struct device *dev)
2638 {
2639         struct device_domain_info *info;
2640
2641         if (likely(!iommu_identity_mapping))
2642                 return 0;
2643
2644         info = dev->archdata.iommu;
2645         if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2646                 return (info->domain == si_domain);
2647
2648         return 0;
2649 }
2650
2651 static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
2652 {
2653         struct dmar_domain *ndomain;
2654         struct intel_iommu *iommu;
2655         u8 bus, devfn;
2656
2657         iommu = device_to_iommu(dev, &bus, &devfn);
2658         if (!iommu)
2659                 return -ENODEV;
2660
2661         ndomain = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
2662         if (ndomain != domain)
2663                 return -EBUSY;
2664
2665         return 0;
2666 }
2667
2668 static bool device_has_rmrr(struct device *dev)
2669 {
2670         struct dmar_rmrr_unit *rmrr;
2671         struct device *tmp;
2672         int i;
2673
2674         rcu_read_lock();
2675         for_each_rmrr_units(rmrr) {
2676                 /*
2677                  * Return TRUE if this RMRR contains the device that
2678                  * is passed in.
2679                  */
2680                 for_each_active_dev_scope(rmrr->devices,
2681                                           rmrr->devices_cnt, i, tmp)
2682                         if (tmp == dev) {
2683                                 rcu_read_unlock();
2684                                 return true;
2685                         }
2686         }
2687         rcu_read_unlock();
2688         return false;
2689 }
2690
2691 /*
2692  * There are a couple cases where we need to restrict the functionality of
2693  * devices associated with RMRRs.  The first is when evaluating a device for
2694  * identity mapping because problems exist when devices are moved in and out
2695  * of domains and their respective RMRR information is lost.  This means that
2696  * a device with associated RMRRs will never be in a "passthrough" domain.
2697  * The second is use of the device through the IOMMU API.  This interface
2698  * expects to have full control of the IOVA space for the device.  We cannot
2699  * satisfy both the requirement that RMRR access is maintained and have an
2700  * unencumbered IOVA space.  We also have no ability to quiesce the device's
2701  * use of the RMRR space or even inform the IOMMU API user of the restriction.
2702  * We therefore prevent devices associated with an RMRR from participating in
2703  * the IOMMU API, which eliminates them from device assignment.
2704  *
2705  * In both cases we assume that PCI USB devices with RMRRs have them largely
2706  * for historical reasons and that the RMRR space is not actively used post
2707  * boot.  This exclusion may change if vendors begin to abuse it.
2708  *
2709  * The same exception is made for graphics devices, with the requirement that
2710  * any use of the RMRR regions will be torn down before assigning the device
2711  * to a guest.
2712  */
2713 static bool device_is_rmrr_locked(struct device *dev)
2714 {
2715         if (!device_has_rmrr(dev))
2716                 return false;
2717
2718         if (dev_is_pci(dev)) {
2719                 struct pci_dev *pdev = to_pci_dev(dev);
2720
2721                 if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
2722                         return false;
2723         }
2724
2725         return true;
2726 }
2727
2728 static int iommu_should_identity_map(struct device *dev, int startup)
2729 {
2730
2731         if (dev_is_pci(dev)) {
2732                 struct pci_dev *pdev = to_pci_dev(dev);
2733
2734                 if (device_is_rmrr_locked(dev))
2735                         return 0;
2736
2737                 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2738                         return 1;
2739
2740                 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2741                         return 1;
2742
2743                 if (!(iommu_identity_mapping & IDENTMAP_ALL))
2744                         return 0;
2745
2746                 /*
2747                  * We want to start off with all devices in the 1:1 domain, and
2748                  * take them out later if we find they can't access all of memory.
2749                  *
2750                  * However, we can't do this for PCI devices behind bridges,
2751                  * because all PCI devices behind the same bridge will end up
2752                  * with the same source-id on their transactions.
2753                  *
2754                  * Practically speaking, we can't change things around for these
2755                  * devices at run-time, because we can't be sure there'll be no
2756                  * DMA transactions in flight for any of their siblings.
2757                  *
2758                  * So PCI devices (unless they're on the root bus) as well as
2759                  * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2760                  * the 1:1 domain, just in _case_ one of their siblings turns out
2761                  * not to be able to map all of memory.
2762                  */
2763                 if (!pci_is_pcie(pdev)) {
2764                         if (!pci_is_root_bus(pdev->bus))
2765                                 return 0;
2766                         if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2767                                 return 0;
2768                 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
2769                         return 0;
2770         } else {
2771                 if (device_has_rmrr(dev))
2772                         return 0;
2773         }
2774
2775         /*
2776          * At boot time, we don't yet know if devices will be 64-bit capable.
2777          * Assume that they will — if they turn out not to be, then we can
2778          * take them out of the 1:1 domain later.
2779          */
2780         if (!startup) {
2781                 /*
2782                  * If the device's dma_mask is less than the system's memory
2783                  * size then this is not a candidate for identity mapping.
2784                  */
2785                 u64 dma_mask = *dev->dma_mask;
2786
2787                 if (dev->coherent_dma_mask &&
2788                     dev->coherent_dma_mask < dma_mask)
2789                         dma_mask = dev->coherent_dma_mask;
2790
2791                 return dma_mask >= dma_get_required_mask(dev);
2792         }
2793
2794         return 1;
2795 }
2796
2797 static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw)
2798 {
2799         int ret;
2800
2801         if (!iommu_should_identity_map(dev, 1))
2802                 return 0;
2803
2804         ret = domain_add_dev_info(si_domain, dev);
2805         if (!ret)
2806                 pr_info("%s identity mapping for device %s\n",
2807                         hw ? "Hardware" : "Software", dev_name(dev));
2808         else if (ret == -ENODEV)
2809                 /* device not associated with an iommu */
2810                 ret = 0;
2811
2812         return ret;
2813 }
2814
2815
2816 static int __init iommu_prepare_static_identity_mapping(int hw)
2817 {
2818         struct pci_dev *pdev = NULL;
2819         struct dmar_drhd_unit *drhd;
2820         struct intel_iommu *iommu;
2821         struct device *dev;
2822         int i;
2823         int ret = 0;
2824
2825         for_each_pci_dev(pdev) {
2826                 ret = dev_prepare_static_identity_mapping(&pdev->dev, hw);
2827                 if (ret)
2828                         return ret;
2829         }
2830
2831         for_each_active_iommu(iommu, drhd)
2832                 for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) {
2833                         struct acpi_device_physical_node *pn;
2834                         struct acpi_device *adev;
2835
2836                         if (dev->bus != &acpi_bus_type)
2837                                 continue;
2838
2839                         adev= to_acpi_device(dev);
2840                         mutex_lock(&adev->physical_node_lock);
2841                         list_for_each_entry(pn, &adev->physical_node_list, node) {
2842                                 ret = dev_prepare_static_identity_mapping(pn->dev, hw);
2843                                 if (ret)
2844                                         break;
2845                         }
2846                         mutex_unlock(&adev->physical_node_lock);
2847                         if (ret)
2848                                 return ret;
2849                 }
2850
2851         return 0;
2852 }
2853
2854 static void intel_iommu_init_qi(struct intel_iommu *iommu)
2855 {
2856         /*
2857          * Start from the sane iommu hardware state.
2858          * If the queued invalidation is already initialized by us
2859          * (for example, while enabling interrupt-remapping) then
2860          * we got the things already rolling from a sane state.
2861          */
2862         if (!iommu->qi) {
2863                 /*
2864                  * Clear any previous faults.
2865                  */
2866                 dmar_fault(-1, iommu);
2867                 /*
2868                  * Disable queued invalidation if supported and already enabled
2869                  * before OS handover.
2870                  */
2871                 dmar_disable_qi(iommu);
2872         }
2873
2874         if (dmar_enable_qi(iommu)) {
2875                 /*
2876                  * Queued Invalidate not enabled, use Register Based Invalidate
2877                  */
2878                 iommu->flush.flush_context = __iommu_flush_context;
2879                 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2880                 pr_info("%s: Using Register based invalidation\n",
2881                         iommu->name);
2882         } else {
2883                 iommu->flush.flush_context = qi_flush_context;
2884                 iommu->flush.flush_iotlb = qi_flush_iotlb;
2885                 pr_info("%s: Using Queued invalidation\n", iommu->name);
2886         }
2887 }
2888
2889 static int copy_context_table(struct intel_iommu *iommu,
2890                               struct root_entry *old_re,
2891                               struct context_entry **tbl,
2892                               int bus, bool ext)
2893 {
2894         int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
2895         struct context_entry *new_ce = NULL, ce;
2896         struct context_entry *old_ce = NULL;
2897         struct root_entry re;
2898         phys_addr_t old_ce_phys;
2899
2900         tbl_idx = ext ? bus * 2 : bus;
2901         memcpy(&re, old_re, sizeof(re));
2902
2903         for (devfn = 0; devfn < 256; devfn++) {
2904                 /* First calculate the correct index */
2905                 idx = (ext ? devfn * 2 : devfn) % 256;
2906
2907                 if (idx == 0) {
2908                         /* First save what we may have and clean up */
2909                         if (new_ce) {
2910                                 tbl[tbl_idx] = new_ce;
2911                                 __iommu_flush_cache(iommu, new_ce,
2912                                                     VTD_PAGE_SIZE);
2913                                 pos = 1;
2914                         }
2915
2916                         if (old_ce)
2917                                 iounmap(old_ce);
2918
2919                         ret = 0;
2920                         if (devfn < 0x80)
2921                                 old_ce_phys = root_entry_lctp(&re);
2922                         else
2923                                 old_ce_phys = root_entry_uctp(&re);
2924
2925                         if (!old_ce_phys) {
2926                                 if (ext && devfn == 0) {
2927                                         /* No LCTP, try UCTP */
2928                                         devfn = 0x7f;
2929                                         continue;
2930                                 } else {
2931                                         goto out;
2932                                 }
2933                         }
2934
2935                         ret = -ENOMEM;
2936                         old_ce = memremap(old_ce_phys, PAGE_SIZE,
2937                                         MEMREMAP_WB);
2938                         if (!old_ce)
2939                                 goto out;
2940
2941                         new_ce = alloc_pgtable_page(iommu->node);
2942                         if (!new_ce)
2943                                 goto out_unmap;
2944
2945                         ret = 0;
2946                 }
2947
2948                 /* Now copy the context entry */
2949                 memcpy(&ce, old_ce + idx, sizeof(ce));
2950
2951                 if (!__context_present(&ce))
2952                         continue;
2953
2954                 did = context_domain_id(&ce);
2955                 if (did >= 0 && did < cap_ndoms(iommu->cap))
2956                         set_bit(did, iommu->domain_ids);
2957
2958                 /*
2959                  * We need a marker for copied context entries. This
2960                  * marker needs to work for the old format as well as
2961                  * for extended context entries.
2962                  *
2963                  * Bit 67 of the context entry is used. In the old
2964                  * format this bit is available to software, in the
2965                  * extended format it is the PGE bit, but PGE is ignored
2966                  * by HW if PASIDs are disabled (and thus still
2967                  * available).
2968                  *
2969                  * So disable PASIDs first and then mark the entry
2970                  * copied. This means that we don't copy PASID
2971                  * translations from the old kernel, but this is fine as
2972                  * faults there are not fatal.
2973                  */
2974                 context_clear_pasid_enable(&ce);
2975                 context_set_copied(&ce);
2976
2977                 new_ce[idx] = ce;
2978         }
2979
2980         tbl[tbl_idx + pos] = new_ce;
2981
2982         __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
2983
2984 out_unmap:
2985         memunmap(old_ce);
2986
2987 out:
2988         return ret;
2989 }
2990
2991 static int copy_translation_tables(struct intel_iommu *iommu)
2992 {
2993         struct context_entry **ctxt_tbls;
2994         struct root_entry *old_rt;
2995         phys_addr_t old_rt_phys;
2996         int ctxt_table_entries;
2997         unsigned long flags;
2998         u64 rtaddr_reg;
2999         int bus, ret;
3000         bool new_ext, ext;
3001
3002         rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
3003         ext        = !!(rtaddr_reg & DMA_RTADDR_RTT);
3004         new_ext    = !!ecap_ecs(iommu->ecap);
3005
3006         /*
3007          * The RTT bit can only be changed when translation is disabled,
3008          * but disabling translation means to open a window for data
3009          * corruption. So bail out and don't copy anything if we would
3010          * have to change the bit.
3011          */
3012         if (new_ext != ext)
3013                 return -EINVAL;
3014
3015         old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
3016         if (!old_rt_phys)
3017                 return -EINVAL;
3018
3019         old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB);
3020         if (!old_rt)
3021                 return -ENOMEM;
3022
3023         /* This is too big for the stack - allocate it from slab */
3024         ctxt_table_entries = ext ? 512 : 256;
3025         ret = -ENOMEM;
3026         ctxt_tbls = kzalloc(ctxt_table_entries * sizeof(void *), GFP_KERNEL);
3027         if (!ctxt_tbls)
3028                 goto out_unmap;
3029
3030         for (bus = 0; bus < 256; bus++) {
3031                 ret = copy_context_table(iommu, &old_rt[bus],
3032                                          ctxt_tbls, bus, ext);
3033                 if (ret) {
3034                         pr_err("%s: Failed to copy context table for bus %d\n",
3035                                 iommu->name, bus);
3036                         continue;
3037                 }
3038         }
3039
3040         spin_lock_irqsave(&iommu->lock, flags);
3041
3042         /* Context tables are copied, now write them to the root_entry table */
3043         for (bus = 0; bus < 256; bus++) {
3044                 int idx = ext ? bus * 2 : bus;
3045                 u64 val;
3046
3047                 if (ctxt_tbls[idx]) {
3048                         val = virt_to_phys(ctxt_tbls[idx]) | 1;
3049                         iommu->root_entry[bus].lo = val;
3050                 }
3051
3052                 if (!ext || !ctxt_tbls[idx + 1])
3053                         continue;
3054
3055                 val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
3056                 iommu->root_entry[bus].hi = val;
3057         }
3058
3059         spin_unlock_irqrestore(&iommu->lock, flags);
3060
3061         kfree(ctxt_tbls);
3062
3063         __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
3064
3065         ret = 0;
3066
3067 out_unmap:
3068         memunmap(old_rt);
3069
3070         return ret;
3071 }
3072
3073 static int __init init_dmars(void)
3074 {
3075         struct dmar_drhd_unit *drhd;
3076         struct dmar_rmrr_unit *rmrr;
3077         bool copied_tables = false;
3078         struct device *dev;
3079         struct intel_iommu *iommu;
3080         int i, ret;
3081
3082         /*
3083          * for each drhd
3084          *    allocate root
3085          *    initialize and program root entry to not present
3086          * endfor
3087          */
3088         for_each_drhd_unit(drhd) {
3089                 /*
3090                  * lock not needed as this is only incremented in the single
3091                  * threaded kernel __init code path all other access are read
3092                  * only
3093                  */
3094                 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
3095                         g_num_of_iommus++;
3096                         continue;
3097                 }
3098                 pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED);
3099         }
3100
3101         /* Preallocate enough resources for IOMMU hot-addition */
3102         if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
3103                 g_num_of_iommus = DMAR_UNITS_SUPPORTED;
3104
3105         g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
3106                         GFP_KERNEL);
3107         if (!g_iommus) {
3108                 pr_err("Allocating global iommu array failed\n");
3109                 ret = -ENOMEM;
3110                 goto error;
3111         }
3112
3113         deferred_flush = kzalloc(g_num_of_iommus *
3114                 sizeof(struct deferred_flush_tables), GFP_KERNEL);
3115         if (!deferred_flush) {
3116                 ret = -ENOMEM;
3117                 goto free_g_iommus;
3118         }
3119
3120         for_each_active_iommu(iommu, drhd) {
3121                 g_iommus[iommu->seq_id] = iommu;
3122
3123                 intel_iommu_init_qi(iommu);
3124
3125                 ret = iommu_init_domains(iommu);
3126                 if (ret)
3127                         goto free_iommu;
3128
3129                 init_translation_status(iommu);
3130
3131                 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
3132                         iommu_disable_translation(iommu);
3133                         clear_translation_pre_enabled(iommu);
3134                         pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
3135                                 iommu->name);
3136                 }
3137
3138                 /*
3139                  * TBD:
3140                  * we could share the same root & context tables
3141                  * among all IOMMU's. Need to Split it later.
3142                  */
3143                 ret = iommu_alloc_root_entry(iommu);
3144                 if (ret)
3145                         goto free_iommu;
3146
3147                 if (translation_pre_enabled(iommu)) {
3148                         pr_info("Translation already enabled - trying to copy translation structures\n");
3149
3150                         ret = copy_translation_tables(iommu);
3151                         if (ret) {
3152                                 /*
3153                                  * We found the IOMMU with translation
3154                                  * enabled - but failed to copy over the
3155                                  * old root-entry table. Try to proceed
3156                                  * by disabling translation now and
3157                                  * allocating a clean root-entry table.
3158                                  * This might cause DMAR faults, but
3159                                  * probably the dump will still succeed.
3160                                  */
3161                                 pr_err("Failed to copy translation tables from previous kernel for %s\n",
3162                                        iommu->name);
3163                                 iommu_disable_translation(iommu);
3164                                 clear_translation_pre_enabled(iommu);
3165                         } else {
3166                                 pr_info("Copied translation tables from previous kernel for %s\n",
3167                                         iommu->name);
3168                                 copied_tables = true;
3169                         }
3170                 }
3171
3172                 if (!ecap_pass_through(iommu->ecap))
3173                         hw_pass_through = 0;
3174 #ifdef CONFIG_INTEL_IOMMU_SVM
3175                 if (pasid_enabled(iommu))
3176                         intel_svm_alloc_pasid_tables(iommu);
3177 #endif
3178         }
3179
3180         /*
3181          * Now that qi is enabled on all iommus, set the root entry and flush
3182          * caches. This is required on some Intel X58 chipsets, otherwise the
3183          * flush_context function will loop forever and the boot hangs.
3184          */
3185         for_each_active_iommu(iommu, drhd) {
3186                 iommu_flush_write_buffer(iommu);
3187                 iommu_set_root_entry(iommu);
3188                 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
3189                 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3190         }
3191
3192         if (iommu_pass_through)
3193                 iommu_identity_mapping |= IDENTMAP_ALL;
3194
3195 #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
3196         iommu_identity_mapping |= IDENTMAP_GFX;
3197 #endif
3198
3199         if (iommu_identity_mapping) {
3200                 ret = si_domain_init(hw_pass_through);
3201                 if (ret)
3202                         goto free_iommu;
3203         }
3204
3205         check_tylersburg_isoch();
3206
3207         /*
3208          * If we copied translations from a previous kernel in the kdump
3209          * case, we can not assign the devices to domains now, as that
3210          * would eliminate the old mappings. So skip this part and defer
3211          * the assignment to device driver initialization time.
3212          */
3213         if (copied_tables)
3214                 goto domains_done;
3215
3216         /*
3217          * If pass through is not set or not enabled, setup context entries for
3218          * identity mappings for rmrr, gfx, and isa and may fall back to static
3219          * identity mapping if iommu_identity_mapping is set.
3220          */
3221         if (iommu_identity_mapping) {
3222                 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
3223                 if (ret) {
3224                         pr_crit("Failed to setup IOMMU pass-through\n");
3225                         goto free_iommu;
3226                 }
3227         }
3228         /*
3229          * For each rmrr
3230          *   for each dev attached to rmrr
3231          *   do
3232          *     locate drhd for dev, alloc domain for dev
3233          *     allocate free domain
3234          *     allocate page table entries for rmrr
3235          *     if context not allocated for bus
3236          *           allocate and init context
3237          *           set present in root table for this bus
3238          *     init context with domain, translation etc
3239          *    endfor
3240          * endfor
3241          */
3242         pr_info("Setting RMRR:\n");
3243         for_each_rmrr_units(rmrr) {
3244                 /* some BIOS lists non-exist devices in DMAR table. */
3245                 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
3246                                           i, dev) {
3247                         ret = iommu_prepare_rmrr_dev(rmrr, dev);
3248                         if (ret)
3249                                 pr_err("Mapping reserved region failed\n");
3250                 }
3251         }
3252
3253         iommu_prepare_isa();
3254
3255 domains_done:
3256
3257         /*
3258          * for each drhd
3259          *   enable fault log
3260          *   global invalidate context cache
3261          *   global invalidate iotlb
3262          *   enable translation
3263          */
3264         for_each_iommu(iommu, drhd) {
3265                 if (drhd->ignored) {
3266                         /*
3267                          * we always have to disable PMRs or DMA may fail on
3268                          * this device
3269                          */
3270                         if (force_on)
3271                                 iommu_disable_protect_mem_regions(iommu);
3272                         continue;
3273                 }
3274
3275                 iommu_flush_write_buffer(iommu);
3276
3277 #ifdef CONFIG_INTEL_IOMMU_SVM
3278                 if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) {
3279                         ret = intel_svm_enable_prq(iommu);
3280                         if (ret)
3281                                 goto free_iommu;
3282                 }
3283 #endif
3284                 ret = dmar_set_interrupt(iommu);
3285                 if (ret)
3286                         goto free_iommu;
3287
3288                 if (!translation_pre_enabled(iommu))
3289                         iommu_enable_translation(iommu);
3290
3291                 iommu_disable_protect_mem_regions(iommu);
3292         }
3293
3294         return 0;
3295
3296 free_iommu:
3297         for_each_active_iommu(iommu, drhd) {
3298                 disable_dmar_iommu(iommu);
3299                 free_dmar_iommu(iommu);
3300         }
3301         kfree(deferred_flush);
3302 free_g_iommus:
3303         kfree(g_iommus);
3304 error:
3305         return ret;
3306 }
3307
3308 /* This takes a number of _MM_ pages, not VTD pages */
3309 static struct iova *intel_alloc_iova(struct device *dev,
3310                                      struct dmar_domain *domain,
3311                                      unsigned long nrpages, uint64_t dma_mask)
3312 {
3313         struct iova *iova = NULL;
3314
3315         /* Restrict dma_mask to the width that the iommu can handle */
3316         dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
3317         /* Ensure we reserve the whole size-aligned region */
3318         nrpages = __roundup_pow_of_two(nrpages);
3319
3320         if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
3321                 /*
3322                  * First try to allocate an io virtual address in
3323                  * DMA_BIT_MASK(32) and if that fails then try allocating
3324                  * from higher range
3325                  */
3326                 iova = alloc_iova(&domain->iovad, nrpages,
3327                                   IOVA_PFN(DMA_BIT_MASK(32)), 1);
3328                 if (iova)
3329                         return iova;
3330         }
3331         iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
3332         if (unlikely(!iova)) {
3333                 pr_err("Allocating %ld-page iova for %s failed",
3334                        nrpages, dev_name(dev));
3335                 return NULL;
3336         }
3337
3338         return iova;
3339 }
3340
3341 static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
3342 {
3343         struct dmar_rmrr_unit *rmrr;
3344         struct dmar_domain *domain;
3345         struct device *i_dev;
3346         int i, ret;
3347
3348         domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
3349         if (!domain) {
3350                 pr_err("Allocating domain for %s failed\n",
3351                        dev_name(dev));
3352                 return NULL;
3353         }
3354
3355         /* We have a new domain - setup possible RMRRs for the device */
3356         rcu_read_lock();
3357         for_each_rmrr_units(rmrr) {
3358                 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
3359                                           i, i_dev) {
3360                         if (i_dev != dev)
3361                                 continue;
3362
3363                         ret = domain_prepare_identity_map(dev, domain,
3364                                                           rmrr->base_address,
3365                                                           rmrr->end_address);
3366                         if (ret)
3367                                 dev_err(dev, "Mapping reserved region failed\n");
3368                 }
3369         }
3370         rcu_read_unlock();
3371
3372         return domain;
3373 }
3374
3375 static inline struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
3376 {
3377         struct device_domain_info *info;
3378
3379         /* No lock here, assumes no domain exit in normal case */
3380         info = dev->archdata.iommu;
3381         if (likely(info))
3382                 return info->domain;
3383
3384         return __get_valid_domain_for_dev(dev);
3385 }
3386
3387 /* Check if the dev needs to go through non-identity map and unmap process.*/
3388 static int iommu_no_mapping(struct device *dev)
3389 {
3390         int found;
3391
3392         if (iommu_dummy(dev))
3393                 return 1;
3394
3395         if (!iommu_identity_mapping)
3396                 return 0;
3397
3398         found = identity_mapping(dev);
3399         if (found) {
3400                 if (iommu_should_identity_map(dev, 0))
3401                         return 1;
3402                 else {
3403                         /*
3404                          * 32 bit DMA is removed from si_domain and fall back
3405                          * to non-identity mapping.
3406                          */
3407                         dmar_remove_one_dev_info(si_domain, dev);
3408                         pr_info("32bit %s uses non-identity mapping\n",
3409                                 dev_name(dev));
3410                         return 0;
3411                 }
3412         } else {
3413                 /*
3414                  * In case of a detached 64 bit DMA device from vm, the device
3415                  * is put into si_domain for identity mapping.
3416                  */
3417                 if (iommu_should_identity_map(dev, 0)) {
3418                         int ret;
3419                         ret = domain_add_dev_info(si_domain, dev);
3420                         if (!ret) {
3421                                 pr_info("64bit %s uses identity mapping\n",
3422                                         dev_name(dev));
3423                                 return 1;
3424                         }
3425                 }
3426         }
3427
3428         return 0;
3429 }
3430
3431 static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
3432                                      size_t size, int dir, u64 dma_mask)
3433 {
3434         struct dmar_domain *domain;
3435         phys_addr_t start_paddr;
3436         struct iova *iova;
3437         int prot = 0;
3438         int ret;
3439         struct intel_iommu *iommu;
3440         unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
3441
3442         BUG_ON(dir == DMA_NONE);
3443
3444         if (iommu_no_mapping(dev))
3445                 return paddr;
3446
3447         domain = get_valid_domain_for_dev(dev);
3448         if (!domain)
3449                 return 0;
3450
3451         iommu = domain_get_iommu(domain);
3452         size = aligned_nrpages(paddr, size);
3453
3454         iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
3455         if (!iova)
3456                 goto error;
3457
3458         /*
3459          * Check if DMAR supports zero-length reads on write only
3460          * mappings..
3461          */
3462         if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3463                         !cap_zlr(iommu->cap))
3464                 prot |= DMA_PTE_READ;
3465         if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3466                 prot |= DMA_PTE_WRITE;
3467         /*
3468          * paddr - (paddr + size) might be partial page, we should map the whole
3469          * page.  Note: if two part of one page are separately mapped, we
3470          * might have two guest_addr mapping to the same host paddr, but this
3471          * is not a big problem
3472          */
3473         ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
3474                                  mm_to_dma_pfn(paddr_pfn), size, prot);
3475         if (ret)
3476                 goto error;
3477
3478         /* it's a non-present to present mapping. Only flush if caching mode */
3479         if (cap_caching_mode(iommu->cap))
3480                 iommu_flush_iotlb_psi(iommu, domain,
3481                                       mm_to_dma_pfn(iova->pfn_lo),
3482                                       size, 0, 1);
3483         else
3484                 iommu_flush_write_buffer(iommu);
3485
3486         start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
3487         start_paddr += paddr & ~PAGE_MASK;
3488         return start_paddr;
3489
3490 error:
3491         if (iova)
3492                 __free_iova(&domain->iovad, iova);
3493         pr_err("Device %s request: %zx@%llx dir %d --- failed\n",
3494                 dev_name(dev), size, (unsigned long long)paddr, dir);
3495         return 0;
3496 }
3497
3498 static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3499                                  unsigned long offset, size_t size,
3500                                  enum dma_data_direction dir,
3501                                  struct dma_attrs *attrs)
3502 {
3503         return __intel_map_single(dev, page_to_phys(page) + offset, size,
3504                                   dir, *dev->dma_mask);
3505 }
3506
3507 static void flush_unmaps(void)
3508 {
3509         int i, j;
3510
3511         timer_on = 0;
3512
3513         /* just flush them all */
3514         for (i = 0; i < g_num_of_iommus; i++) {
3515                 struct intel_iommu *iommu = g_iommus[i];
3516                 if (!iommu)
3517                         continue;
3518
3519                 if (!deferred_flush[i].next)
3520                         continue;
3521
3522                 /* In caching mode, global flushes turn emulation expensive */
3523                 if (!cap_caching_mode(iommu->cap))
3524                         iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3525                                          DMA_TLB_GLOBAL_FLUSH);
3526                 for (j = 0; j < deferred_flush[i].next; j++) {
3527                         unsigned long mask;
3528                         struct iova *iova = deferred_flush[i].iova[j];
3529                         struct dmar_domain *domain = deferred_flush[i].domain[j];
3530
3531                         /* On real hardware multiple invalidations are expensive */
3532                         if (cap_caching_mode(iommu->cap))
3533                                 iommu_flush_iotlb_psi(iommu, domain,
3534                                         iova->pfn_lo, iova_size(iova),
3535                                         !deferred_flush[i].freelist[j], 0);
3536                         else {
3537                                 mask = ilog2(mm_to_dma_pfn(iova_size(iova)));
3538                                 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
3539                                                 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
3540                         }
3541                         __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
3542                         if (deferred_flush[i].freelist[j])
3543                                 dma_free_pagelist(deferred_flush[i].freelist[j]);
3544                 }
3545                 deferred_flush[i].next = 0;
3546         }
3547
3548         list_size = 0;
3549 }
3550
3551 static void flush_unmaps_timeout(unsigned long data)
3552 {
3553         unsigned long flags;
3554
3555         spin_lock_irqsave(&async_umap_flush_lock, flags);
3556         flush_unmaps();
3557         spin_unlock_irqrestore(&async_umap_flush_lock, flags);
3558 }
3559
3560 static void add_unmap(struct dmar_domain *dom, struct iova *iova, struct page *freelist)
3561 {
3562         unsigned long flags;
3563         int next, iommu_id;
3564         struct intel_iommu *iommu;
3565
3566         spin_lock_irqsave(&async_umap_flush_lock, flags);
3567         if (list_size == HIGH_WATER_MARK)
3568                 flush_unmaps();
3569
3570         iommu = domain_get_iommu(dom);
3571         iommu_id = iommu->seq_id;
3572
3573         next = deferred_flush[iommu_id].next;
3574         deferred_flush[iommu_id].domain[next] = dom;
3575         deferred_flush[iommu_id].iova[next] = iova;
3576         deferred_flush[iommu_id].freelist[next] = freelist;
3577         deferred_flush[iommu_id].next++;
3578
3579         if (!timer_on) {
3580                 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
3581                 timer_on = 1;
3582         }
3583         list_size++;
3584         spin_unlock_irqrestore(&async_umap_flush_lock, flags);
3585 }
3586
3587 static void intel_unmap(struct device *dev, dma_addr_t dev_addr)
3588 {
3589         struct dmar_domain *domain;
3590         unsigned long start_pfn, last_pfn;
3591         struct iova *iova;
3592         struct intel_iommu *iommu;
3593         struct page *freelist;
3594
3595         if (iommu_no_mapping(dev))
3596                 return;
3597
3598         domain = find_domain(dev);
3599         BUG_ON(!domain);
3600
3601         iommu = domain_get_iommu(domain);
3602
3603         iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
3604         if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
3605                       (unsigned long long)dev_addr))
3606                 return;
3607
3608         start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3609         last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
3610
3611         pr_debug("Device %s unmapping: pfn %lx-%lx\n",
3612                  dev_name(dev), start_pfn, last_pfn);
3613
3614         freelist = domain_unmap(domain, start_pfn, last_pfn);
3615
3616         if (intel_iommu_strict) {
3617                 iommu_flush_iotlb_psi(iommu, domain, start_pfn,
3618                                       last_pfn - start_pfn + 1, !freelist, 0);
3619                 /* free iova */
3620                 __free_iova(&domain->iovad, iova);
3621                 dma_free_pagelist(freelist);
3622         } else {
3623                 add_unmap(domain, iova, freelist);
3624                 /*
3625                  * queue up the release of the unmap to save the 1/6th of the
3626                  * cpu used up by the iotlb flush operation...
3627                  */
3628         }
3629 }
3630
3631 static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3632                              size_t size, enum dma_data_direction dir,
3633                              struct dma_attrs *attrs)
3634 {
3635         intel_unmap(dev, dev_addr);
3636 }
3637
3638 static void *intel_alloc_coherent(struct device *dev, size_t size,
3639                                   dma_addr_t *dma_handle, gfp_t flags,
3640                                   struct dma_attrs *attrs)
3641 {
3642         struct page *page = NULL;
3643         int order;
3644
3645         size = PAGE_ALIGN(size);
3646         order = get_order(size);
3647
3648         if (!iommu_no_mapping(dev))
3649                 flags &= ~(GFP_DMA | GFP_DMA32);
3650         else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) {
3651                 if (dev->coherent_dma_mask < DMA_BIT_MASK(32))
3652                         flags |= GFP_DMA;
3653                 else
3654                         flags |= GFP_DMA32;
3655         }
3656
3657         if (gfpflags_allow_blocking(flags)) {
3658                 unsigned int count = size >> PAGE_SHIFT;
3659
3660                 page = dma_alloc_from_contiguous(dev, count, order);
3661                 if (page && iommu_no_mapping(dev) &&
3662                     page_to_phys(page) + size > dev->coherent_dma_mask) {
3663                         dma_release_from_contiguous(dev, page, count);
3664                         page = NULL;
3665                 }
3666         }
3667
3668         if (!page)
3669                 page = alloc_pages(flags, order);
3670         if (!page)
3671                 return NULL;
3672         memset(page_address(page), 0, size);
3673
3674         *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
3675                                          DMA_BIDIRECTIONAL,
3676                                          dev->coherent_dma_mask);
3677         if (*dma_handle)
3678                 return page_address(page);
3679         if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3680                 __free_pages(page, order);
3681
3682         return NULL;
3683 }
3684
3685 static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
3686                                 dma_addr_t dma_handle, struct dma_attrs *attrs)
3687 {
3688         int order;
3689         struct page *page = virt_to_page(vaddr);
3690
3691         size = PAGE_ALIGN(size);
3692         order = get_order(size);
3693
3694         intel_unmap(dev, dma_handle);
3695         if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3696                 __free_pages(page, order);
3697 }
3698
3699 static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
3700                            int nelems, enum dma_data_direction dir,
3701                            struct dma_attrs *attrs)
3702 {
3703         intel_unmap(dev, sglist[0].dma_address);
3704 }
3705
3706 static int intel_nontranslate_map_sg(struct device *hddev,
3707         struct scatterlist *sglist, int nelems, int dir)
3708 {
3709         int i;
3710         struct scatterlist *sg;
3711
3712         for_each_sg(sglist, sg, nelems, i) {
3713                 BUG_ON(!sg_page(sg));
3714                 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
3715                 sg->dma_length = sg->length;
3716         }
3717         return nelems;
3718 }
3719
3720 static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
3721                         enum dma_data_direction dir, struct dma_attrs *attrs)
3722 {
3723         int i;
3724         struct dmar_domain *domain;
3725         size_t size = 0;
3726         int prot = 0;
3727         struct iova *iova = NULL;
3728         int ret;
3729         struct scatterlist *sg;
3730         unsigned long start_vpfn;
3731         struct intel_iommu *iommu;
3732
3733         BUG_ON(dir == DMA_NONE);
3734         if (iommu_no_mapping(dev))
3735                 return intel_nontranslate_map_sg(dev, sglist, nelems, dir);
3736
3737         domain = get_valid_domain_for_dev(dev);
3738         if (!domain)
3739                 return 0;
3740
3741         iommu = domain_get_iommu(domain);
3742
3743         for_each_sg(sglist, sg, nelems, i)
3744                 size += aligned_nrpages(sg->offset, sg->length);
3745
3746         iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
3747                                 *dev->dma_mask);
3748         if (!iova) {
3749                 sglist->dma_length = 0;
3750                 return 0;
3751         }
3752
3753         /*
3754          * Check if DMAR supports zero-length reads on write only
3755          * mappings..
3756          */
3757         if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3758                         !cap_zlr(iommu->cap))
3759                 prot |= DMA_PTE_READ;
3760         if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3761                 prot |= DMA_PTE_WRITE;
3762
3763         start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
3764
3765         ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
3766         if (unlikely(ret)) {
3767                 dma_pte_free_pagetable(domain, start_vpfn,
3768                                        start_vpfn + size - 1);
3769                 __free_iova(&domain->iovad, iova);
3770                 return 0;
3771         }
3772
3773         /* it's a non-present to present mapping. Only flush if caching mode */
3774         if (cap_caching_mode(iommu->cap))
3775                 iommu_flush_iotlb_psi(iommu, domain, start_vpfn, size, 0, 1);
3776         else
3777                 iommu_flush_write_buffer(iommu);
3778
3779         return nelems;
3780 }
3781
3782 static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3783 {
3784         return !dma_addr;
3785 }
3786
3787 struct dma_map_ops intel_dma_ops = {
3788         .alloc = intel_alloc_coherent,
3789         .free = intel_free_coherent,
3790         .map_sg = intel_map_sg,
3791         .unmap_sg = intel_unmap_sg,
3792         .map_page = intel_map_page,
3793         .unmap_page = intel_unmap_page,
3794         .mapping_error = intel_mapping_error,
3795 };
3796
3797 static inline int iommu_domain_cache_init(void)
3798 {
3799         int ret = 0;
3800
3801         iommu_domain_cache = kmem_cache_create("iommu_domain",
3802                                          sizeof(struct dmar_domain),
3803                                          0,
3804                                          SLAB_HWCACHE_ALIGN,
3805
3806                                          NULL);
3807         if (!iommu_domain_cache) {
3808                 pr_err("Couldn't create iommu_domain cache\n");
3809                 ret = -ENOMEM;
3810         }
3811
3812         return ret;
3813 }
3814
3815 static inline int iommu_devinfo_cache_init(void)
3816 {
3817         int ret = 0;
3818
3819         iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3820                                          sizeof(struct device_domain_info),
3821                                          0,
3822                                          SLAB_HWCACHE_ALIGN,
3823                                          NULL);
3824         if (!iommu_devinfo_cache) {
3825                 pr_err("Couldn't create devinfo cache\n");
3826                 ret = -ENOMEM;
3827         }
3828
3829         return ret;
3830 }
3831
3832 static int __init iommu_init_mempool(void)
3833 {
3834         int ret;
3835         ret = iova_cache_get();
3836         if (ret)
3837                 return ret;
3838
3839         ret = iommu_domain_cache_init();
3840         if (ret)
3841                 goto domain_error;
3842
3843         ret = iommu_devinfo_cache_init();
3844         if (!ret)
3845                 return ret;
3846
3847         kmem_cache_destroy(iommu_domain_cache);
3848 domain_error:
3849         iova_cache_put();
3850
3851         return -ENOMEM;
3852 }
3853
3854 static void __init iommu_exit_mempool(void)
3855 {
3856         kmem_cache_destroy(iommu_devinfo_cache);
3857         kmem_cache_destroy(iommu_domain_cache);
3858         iova_cache_put();
3859 }
3860
3861 static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3862 {
3863         struct dmar_drhd_unit *drhd;
3864         u32 vtbar;
3865         int rc;
3866
3867         /* We know that this device on this chipset has its own IOMMU.
3868          * If we find it under a different IOMMU, then the BIOS is lying
3869          * to us. Hope that the IOMMU for this device is actually
3870          * disabled, and it needs no translation...
3871          */
3872         rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3873         if (rc) {
3874                 /* "can't" happen */
3875                 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3876                 return;
3877         }
3878         vtbar &= 0xffff0000;
3879
3880         /* we know that the this iommu should be at offset 0xa000 from vtbar */
3881         drhd = dmar_find_matched_drhd_unit(pdev);
3882         if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3883                             TAINT_FIRMWARE_WORKAROUND,
3884                             "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3885                 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3886 }
3887 DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3888
3889 static void __init init_no_remapping_devices(void)
3890 {
3891         struct dmar_drhd_unit *drhd;
3892         struct device *dev;
3893         int i;
3894
3895         for_each_drhd_unit(drhd) {
3896                 if (!drhd->include_all) {
3897                         for_each_active_dev_scope(drhd->devices,
3898                                                   drhd->devices_cnt, i, dev)
3899                                 break;
3900                         /* ignore DMAR unit if no devices exist */
3901                         if (i == drhd->devices_cnt)
3902                                 drhd->ignored = 1;
3903                 }
3904         }
3905
3906         for_each_active_drhd_unit(drhd) {
3907                 if (drhd->include_all)
3908                         continue;
3909
3910                 for_each_active_dev_scope(drhd->devices,
3911                                           drhd->devices_cnt, i, dev)
3912                         if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
3913                                 break;
3914                 if (i < drhd->devices_cnt)
3915                         continue;
3916
3917                 /* This IOMMU has *only* gfx devices. Either bypass it or
3918                    set the gfx_mapped flag, as appropriate */
3919                 if (dmar_map_gfx) {
3920                         intel_iommu_gfx_mapped = 1;
3921                 } else {
3922                         drhd->ignored = 1;
3923                         for_each_active_dev_scope(drhd->devices,
3924                                                   drhd->devices_cnt, i, dev)
3925                                 dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3926                 }
3927         }
3928 }
3929
3930 #ifdef CONFIG_SUSPEND
3931 static int init_iommu_hw(void)
3932 {
3933         struct dmar_drhd_unit *drhd;
3934         struct intel_iommu *iommu = NULL;
3935
3936         for_each_active_iommu(iommu, drhd)
3937                 if (iommu->qi)
3938                         dmar_reenable_qi(iommu);
3939
3940         for_each_iommu(iommu, drhd) {
3941                 if (drhd->ignored) {
3942                         /*
3943                          * we always have to disable PMRs or DMA may fail on
3944                          * this device
3945                          */
3946                         if (force_on)
3947                                 iommu_disable_protect_mem_regions(iommu);
3948                         continue;
3949                 }
3950         
3951                 iommu_flush_write_buffer(iommu);
3952
3953                 iommu_set_root_entry(iommu);
3954
3955                 iommu->flush.flush_context(iommu, 0, 0, 0,
3956                                            DMA_CCMD_GLOBAL_INVL);
3957                 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3958                 iommu_enable_translation(iommu);
3959                 iommu_disable_protect_mem_regions(iommu);
3960         }
3961
3962         return 0;
3963 }
3964
3965 static void iommu_flush_all(void)
3966 {
3967         struct dmar_drhd_unit *drhd;
3968         struct intel_iommu *iommu;
3969
3970         for_each_active_iommu(iommu, drhd) {
3971                 iommu->flush.flush_context(iommu, 0, 0, 0,
3972                                            DMA_CCMD_GLOBAL_INVL);
3973                 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3974                                          DMA_TLB_GLOBAL_FLUSH);
3975         }
3976 }
3977
3978 static int iommu_suspend(void)
3979 {
3980         struct dmar_drhd_unit *drhd;
3981         struct intel_iommu *iommu = NULL;
3982         unsigned long flag;
3983
3984         for_each_active_iommu(iommu, drhd) {
3985                 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3986                                                  GFP_ATOMIC);
3987                 if (!iommu->iommu_state)
3988                         goto nomem;
3989         }
3990
3991         iommu_flush_all();
3992
3993         for_each_active_iommu(iommu, drhd) {
3994                 iommu_disable_translation(iommu);
3995
3996                 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3997
3998                 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3999                         readl(iommu->reg + DMAR_FECTL_REG);
4000                 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
4001                         readl(iommu->reg + DMAR_FEDATA_REG);
4002                 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
4003                         readl(iommu->reg + DMAR_FEADDR_REG);
4004                 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
4005                         readl(iommu->reg + DMAR_FEUADDR_REG);
4006
4007                 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
4008         }
4009         return 0;
4010
4011 nomem:
4012         for_each_active_iommu(iommu, drhd)
4013                 kfree(iommu->iommu_state);
4014
4015         return -ENOMEM;
4016 }
4017
4018 static void iommu_resume(void)
4019 {
4020         struct dmar_drhd_unit *drhd;
4021         struct intel_iommu *iommu = NULL;
4022         unsigned long flag;
4023
4024         if (init_iommu_hw()) {
4025                 if (force_on)
4026                         panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
4027                 else
4028                         WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
4029                 return;
4030         }
4031
4032         for_each_active_iommu(iommu, drhd) {
4033
4034                 raw_spin_lock_irqsave(&iommu->register_lock, flag);
4035
4036                 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
4037                         iommu->reg + DMAR_FECTL_REG);
4038                 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
4039                         iommu->reg + DMAR_FEDATA_REG);
4040                 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
4041                         iommu->reg + DMAR_FEADDR_REG);
4042                 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
4043                         iommu->reg + DMAR_FEUADDR_REG);
4044
4045                 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
4046         }
4047
4048         for_each_active_iommu(iommu, drhd)
4049                 kfree(iommu->iommu_state);
4050 }
4051
4052 static struct syscore_ops iommu_syscore_ops = {
4053         .resume         = iommu_resume,
4054         .suspend        = iommu_suspend,
4055 };
4056
4057 static void __init init_iommu_pm_ops(void)
4058 {
4059         register_syscore_ops(&iommu_syscore_ops);
4060 }
4061
4062 #else
4063 static inline void init_iommu_pm_ops(void) {}
4064 #endif  /* CONFIG_PM */
4065
4066
4067 int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
4068 {
4069         struct acpi_dmar_reserved_memory *rmrr;
4070         struct dmar_rmrr_unit *rmrru;
4071
4072         rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
4073         if (!rmrru)
4074                 return -ENOMEM;
4075
4076         rmrru->hdr = header;
4077         rmrr = (struct acpi_dmar_reserved_memory *)header;
4078         rmrru->base_address = rmrr->base_address;
4079         rmrru->end_address = rmrr->end_address;
4080         rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
4081                                 ((void *)rmrr) + rmrr->header.length,
4082                                 &rmrru->devices_cnt);
4083         if (rmrru->devices_cnt && rmrru->devices == NULL) {
4084                 kfree(rmrru);
4085                 return -ENOMEM;
4086         }
4087
4088         list_add(&rmrru->list, &dmar_rmrr_units);
4089
4090         return 0;
4091 }
4092
4093 static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
4094 {
4095         struct dmar_atsr_unit *atsru;
4096         struct acpi_dmar_atsr *tmp;
4097
4098         list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4099                 tmp = (struct acpi_dmar_atsr *)atsru->hdr;
4100                 if (atsr->segment != tmp->segment)
4101                         continue;
4102                 if (atsr->header.length != tmp->header.length)
4103                         continue;
4104                 if (memcmp(atsr, tmp, atsr->header.length) == 0)
4105                         return atsru;
4106         }
4107
4108         return NULL;
4109 }
4110
4111 int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4112 {
4113         struct acpi_dmar_atsr *atsr;
4114         struct dmar_atsr_unit *atsru;
4115
4116         if (system_state != SYSTEM_BOOTING && !intel_iommu_enabled)
4117                 return 0;
4118
4119         atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4120         atsru = dmar_find_atsr(atsr);
4121         if (atsru)
4122                 return 0;
4123
4124         atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
4125         if (!atsru)
4126                 return -ENOMEM;
4127
4128         /*
4129          * If memory is allocated from slab by ACPI _DSM method, we need to
4130          * copy the memory content because the memory buffer will be freed
4131          * on return.
4132          */
4133         atsru->hdr = (void *)(atsru + 1);
4134         memcpy(atsru->hdr, hdr, hdr->length);
4135         atsru->include_all = atsr->flags & 0x1;
4136         if (!atsru->include_all) {
4137                 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
4138                                 (void *)atsr + atsr->header.length,
4139                                 &atsru->devices_cnt);
4140                 if (atsru->devices_cnt && atsru->devices == NULL) {
4141                         kfree(atsru);
4142                         return -ENOMEM;
4143                 }
4144         }
4145
4146         list_add_rcu(&atsru->list, &dmar_atsr_units);
4147
4148         return 0;
4149 }
4150
4151 static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
4152 {
4153         dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
4154         kfree(atsru);
4155 }
4156
4157 int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4158 {
4159         struct acpi_dmar_atsr *atsr;
4160         struct dmar_atsr_unit *atsru;
4161
4162         atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4163         atsru = dmar_find_atsr(atsr);
4164         if (atsru) {
4165                 list_del_rcu(&atsru->list);
4166                 synchronize_rcu();
4167                 intel_iommu_free_atsr(atsru);
4168         }
4169
4170         return 0;
4171 }
4172
4173 int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4174 {
4175         int i;
4176         struct device *dev;
4177         struct acpi_dmar_atsr *atsr;
4178         struct dmar_atsr_unit *atsru;
4179
4180         atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4181         atsru = dmar_find_atsr(atsr);
4182         if (!atsru)
4183                 return 0;
4184
4185         if (!atsru->include_all && atsru->devices && atsru->devices_cnt) {
4186                 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
4187                                           i, dev)
4188                         return -EBUSY;
4189         }
4190
4191         return 0;
4192 }
4193
4194 static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
4195 {
4196         int sp, ret = 0;
4197         struct intel_iommu *iommu = dmaru->iommu;
4198
4199         if (g_iommus[iommu->seq_id])
4200                 return 0;
4201
4202         if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
4203                 pr_warn("%s: Doesn't support hardware pass through.\n",
4204                         iommu->name);
4205                 return -ENXIO;
4206         }
4207         if (!ecap_sc_support(iommu->ecap) &&
4208             domain_update_iommu_snooping(iommu)) {
4209                 pr_warn("%s: Doesn't support snooping.\n",
4210                         iommu->name);
4211                 return -ENXIO;
4212         }
4213         sp = domain_update_iommu_superpage(iommu) - 1;
4214         if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
4215                 pr_warn("%s: Doesn't support large page.\n",
4216                         iommu->name);
4217                 return -ENXIO;
4218         }
4219
4220         /*
4221          * Disable translation if already enabled prior to OS handover.
4222          */
4223         if (iommu->gcmd & DMA_GCMD_TE)
4224                 iommu_disable_translation(iommu);
4225
4226         g_iommus[iommu->seq_id] = iommu;
4227         ret = iommu_init_domains(iommu);
4228         if (ret == 0)
4229                 ret = iommu_alloc_root_entry(iommu);
4230         if (ret)
4231                 goto out;
4232
4233 #ifdef CONFIG_INTEL_IOMMU_SVM
4234         if (pasid_enabled(iommu))
4235                 intel_svm_alloc_pasid_tables(iommu);
4236 #endif
4237
4238         if (dmaru->ignored) {
4239                 /*
4240                  * we always have to disable PMRs or DMA may fail on this device
4241                  */
4242                 if (force_on)
4243                         iommu_disable_protect_mem_regions(iommu);
4244                 return 0;
4245         }
4246
4247         intel_iommu_init_qi(iommu);
4248         iommu_flush_write_buffer(iommu);
4249
4250 #ifdef CONFIG_INTEL_IOMMU_SVM
4251         if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) {
4252                 ret = intel_svm_enable_prq(iommu);
4253                 if (ret)
4254                         goto disable_iommu;
4255         }
4256 #endif
4257         ret = dmar_set_interrupt(iommu);
4258         if (ret)
4259                 goto disable_iommu;
4260
4261         iommu_set_root_entry(iommu);
4262         iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
4263         iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4264         iommu_enable_translation(iommu);
4265
4266         iommu_disable_protect_mem_regions(iommu);
4267         return 0;
4268
4269 disable_iommu:
4270         disable_dmar_iommu(iommu);
4271 out:
4272         free_dmar_iommu(iommu);
4273         return ret;
4274 }
4275
4276 int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
4277 {
4278         int ret = 0;
4279         struct intel_iommu *iommu = dmaru->iommu;
4280
4281         if (!intel_iommu_enabled)
4282                 return 0;
4283         if (iommu == NULL)
4284                 return -EINVAL;
4285
4286         if (insert) {
4287                 ret = intel_iommu_add(dmaru);
4288         } else {
4289                 disable_dmar_iommu(iommu);
4290                 free_dmar_iommu(iommu);
4291         }
4292
4293         return ret;
4294 }
4295
4296 static void intel_iommu_free_dmars(void)
4297 {
4298         struct dmar_rmrr_unit *rmrru, *rmrr_n;
4299         struct dmar_atsr_unit *atsru, *atsr_n;
4300
4301         list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
4302                 list_del(&rmrru->list);
4303                 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
4304                 kfree(rmrru);
4305         }
4306
4307         list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
4308                 list_del(&atsru->list);
4309                 intel_iommu_free_atsr(atsru);
4310         }
4311 }
4312
4313 int dmar_find_matched_atsr_unit(struct pci_dev *dev)
4314 {
4315         int i, ret = 1;
4316         struct pci_bus *bus;
4317         struct pci_dev *bridge = NULL;
4318         struct device *tmp;
4319         struct acpi_dmar_atsr *atsr;
4320         struct dmar_atsr_unit *atsru;
4321
4322         dev = pci_physfn(dev);
4323         for (bus = dev->bus; bus; bus = bus->parent) {
4324                 bridge = bus->self;
4325                 /* If it's an integrated device, allow ATS */
4326                 if (!bridge)
4327                         return 1;
4328                 /* Connected via non-PCIe: no ATS */
4329                 if (!pci_is_pcie(bridge) ||
4330                     pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
4331                         return 0;
4332                 /* If we found the root port, look it up in the ATSR */
4333                 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
4334                         break;
4335         }
4336
4337         rcu_read_lock();
4338         list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4339                 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4340                 if (atsr->segment != pci_domain_nr(dev->bus))
4341                         continue;
4342
4343                 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
4344                         if (tmp == &bridge->dev)
4345                                 goto out;
4346
4347                 if (atsru->include_all)
4348                         goto out;
4349         }
4350         ret = 0;
4351 out:
4352         rcu_read_unlock();
4353
4354         return ret;
4355 }
4356
4357 int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
4358 {
4359         int ret = 0;
4360         struct dmar_rmrr_unit *rmrru;
4361         struct dmar_atsr_unit *atsru;
4362         struct acpi_dmar_atsr *atsr;
4363         struct acpi_dmar_reserved_memory *rmrr;
4364
4365         if (!intel_iommu_enabled && system_state != SYSTEM_BOOTING)
4366                 return 0;
4367
4368         list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
4369                 rmrr = container_of(rmrru->hdr,
4370                                     struct acpi_dmar_reserved_memory, header);
4371                 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4372                         ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
4373                                 ((void *)rmrr) + rmrr->header.length,
4374                                 rmrr->segment, rmrru->devices,
4375                                 rmrru->devices_cnt);
4376                         if(ret < 0)
4377                                 return ret;
4378                 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
4379                         dmar_remove_dev_scope(info, rmrr->segment,
4380                                 rmrru->devices, rmrru->devices_cnt);
4381                 }
4382         }
4383
4384         list_for_each_entry(atsru, &dmar_atsr_units, list) {
4385                 if (atsru->include_all)
4386                         continue;
4387
4388                 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4389                 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4390                         ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
4391                                         (void *)atsr + atsr->header.length,
4392                                         atsr->segment, atsru->devices,
4393                                         atsru->devices_cnt);
4394                         if (ret > 0)
4395                                 break;
4396                         else if(ret < 0)
4397                                 return ret;
4398                 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
4399                         if (dmar_remove_dev_scope(info, atsr->segment,
4400                                         atsru->devices, atsru->devices_cnt))
4401                                 break;
4402                 }
4403         }
4404
4405         return 0;
4406 }
4407
4408 /*
4409  * Here we only respond to action of unbound device from driver.
4410  *
4411  * Added device is not attached to its DMAR domain here yet. That will happen
4412  * when mapping the device to iova.
4413  */
4414 static int device_notifier(struct notifier_block *nb,
4415                                   unsigned long action, void *data)
4416 {
4417         struct device *dev = data;
4418         struct dmar_domain *domain;
4419
4420         if (iommu_dummy(dev))
4421                 return 0;
4422
4423         if (action != BUS_NOTIFY_REMOVED_DEVICE)
4424                 return 0;
4425
4426         domain = find_domain(dev);
4427         if (!domain)
4428                 return 0;
4429
4430         dmar_remove_one_dev_info(domain, dev);
4431         if (!domain_type_is_vm_or_si(domain) && list_empty(&domain->devices))
4432                 domain_exit(domain);
4433
4434         return 0;
4435 }
4436
4437 static struct notifier_block device_nb = {
4438         .notifier_call = device_notifier,
4439 };
4440
4441 static int intel_iommu_memory_notifier(struct notifier_block *nb,
4442                                        unsigned long val, void *v)
4443 {
4444         struct memory_notify *mhp = v;
4445         unsigned long long start, end;
4446         unsigned long start_vpfn, last_vpfn;
4447
4448         switch (val) {
4449         case MEM_GOING_ONLINE:
4450                 start = mhp->start_pfn << PAGE_SHIFT;
4451                 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
4452                 if (iommu_domain_identity_map(si_domain, start, end)) {
4453                         pr_warn("Failed to build identity map for [%llx-%llx]\n",
4454                                 start, end);
4455                         return NOTIFY_BAD;
4456                 }
4457                 break;
4458
4459         case MEM_OFFLINE:
4460         case MEM_CANCEL_ONLINE:
4461                 start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
4462                 last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
4463                 while (start_vpfn <= last_vpfn) {
4464                         struct iova *iova;
4465                         struct dmar_drhd_unit *drhd;
4466                         struct intel_iommu *iommu;
4467                         struct page *freelist;
4468
4469                         iova = find_iova(&si_domain->iovad, start_vpfn);
4470                         if (iova == NULL) {
4471                                 pr_debug("Failed get IOVA for PFN %lx\n",
4472                                          start_vpfn);
4473                                 break;
4474                         }
4475
4476                         iova = split_and_remove_iova(&si_domain->iovad, iova,
4477                                                      start_vpfn, last_vpfn);
4478                         if (iova == NULL) {
4479                                 pr_warn("Failed to split IOVA PFN [%lx-%lx]\n",
4480                                         start_vpfn, last_vpfn);
4481                                 return NOTIFY_BAD;
4482                         }
4483
4484                         freelist = domain_unmap(si_domain, iova->pfn_lo,
4485                                                iova->pfn_hi);
4486
4487                         rcu_read_lock();
4488                         for_each_active_iommu(iommu, drhd)
4489                                 iommu_flush_iotlb_psi(iommu, si_domain,
4490                                         iova->pfn_lo, iova_size(iova),
4491                                         !freelist, 0);
4492                         rcu_read_unlock();
4493                         dma_free_pagelist(freelist);
4494
4495                         start_vpfn = iova->pfn_hi + 1;
4496                         free_iova_mem(iova);
4497                 }
4498                 break;
4499         }
4500
4501         return NOTIFY_OK;
4502 }
4503
4504 static struct notifier_block intel_iommu_memory_nb = {
4505         .notifier_call = intel_iommu_memory_notifier,
4506         .priority = 0
4507 };
4508
4509
4510 static ssize_t intel_iommu_show_version(struct device *dev,
4511                                         struct device_attribute *attr,
4512                                         char *buf)
4513 {
4514         struct intel_iommu *iommu = dev_get_drvdata(dev);
4515         u32 ver = readl(iommu->reg + DMAR_VER_REG);
4516         return sprintf(buf, "%d:%d\n",
4517                        DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
4518 }
4519 static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
4520
4521 static ssize_t intel_iommu_show_address(struct device *dev,
4522                                         struct device_attribute *attr,
4523                                         char *buf)
4524 {
4525         struct intel_iommu *iommu = dev_get_drvdata(dev);
4526         return sprintf(buf, "%llx\n", iommu->reg_phys);
4527 }
4528 static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
4529
4530 static ssize_t intel_iommu_show_cap(struct device *dev,
4531                                     struct device_attribute *attr,
4532                                     char *buf)
4533 {
4534         struct intel_iommu *iommu = dev_get_drvdata(dev);
4535         return sprintf(buf, "%llx\n", iommu->cap);
4536 }
4537 static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
4538
4539 static ssize_t intel_iommu_show_ecap(struct device *dev,
4540                                     struct device_attribute *attr,
4541                                     char *buf)
4542 {
4543         struct intel_iommu *iommu = dev_get_drvdata(dev);
4544         return sprintf(buf, "%llx\n", iommu->ecap);
4545 }
4546 static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
4547
4548 static ssize_t intel_iommu_show_ndoms(struct device *dev,
4549                                       struct device_attribute *attr,
4550                                       char *buf)
4551 {
4552         struct intel_iommu *iommu = dev_get_drvdata(dev);
4553         return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
4554 }
4555 static DEVICE_ATTR(domains_supported, S_IRUGO, intel_iommu_show_ndoms, NULL);
4556
4557 static ssize_t intel_iommu_show_ndoms_used(struct device *dev,
4558                                            struct device_attribute *attr,
4559                                            char *buf)
4560 {
4561         struct intel_iommu *iommu = dev_get_drvdata(dev);
4562         return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
4563                                                   cap_ndoms(iommu->cap)));
4564 }
4565 static DEVICE_ATTR(domains_used, S_IRUGO, intel_iommu_show_ndoms_used, NULL);
4566
4567 static struct attribute *intel_iommu_attrs[] = {
4568         &dev_attr_version.attr,
4569         &dev_attr_address.attr,
4570         &dev_attr_cap.attr,
4571         &dev_attr_ecap.attr,
4572         &dev_attr_domains_supported.attr,
4573         &dev_attr_domains_used.attr,
4574         NULL,
4575 };
4576
4577 static struct attribute_group intel_iommu_group = {
4578         .name = "intel-iommu",
4579         .attrs = intel_iommu_attrs,
4580 };
4581
4582 const struct attribute_group *intel_iommu_groups[] = {
4583         &intel_iommu_group,
4584         NULL,
4585 };
4586
4587 int __init intel_iommu_init(void)
4588 {
4589         int ret = -ENODEV;
4590         struct dmar_drhd_unit *drhd;
4591         struct intel_iommu *iommu;
4592
4593         /* VT-d is required for a TXT/tboot launch, so enforce that */
4594         force_on = tboot_force_iommu();
4595
4596         if (iommu_init_mempool()) {
4597                 if (force_on)
4598                         panic("tboot: Failed to initialize iommu memory\n");
4599                 return -ENOMEM;
4600         }
4601
4602         down_write(&dmar_global_lock);
4603         if (dmar_table_init()) {
4604                 if (force_on)
4605                         panic("tboot: Failed to initialize DMAR table\n");
4606                 goto out_free_dmar;
4607         }
4608
4609         if (dmar_dev_scope_init() < 0) {
4610                 if (force_on)
4611                         panic("tboot: Failed to initialize DMAR device scope\n");
4612                 goto out_free_dmar;
4613         }
4614
4615         if (no_iommu || dmar_disabled)
4616                 goto out_free_dmar;
4617
4618         if (list_empty(&dmar_rmrr_units))
4619                 pr_info("No RMRR found\n");
4620
4621         if (list_empty(&dmar_atsr_units))
4622                 pr_info("No ATSR found\n");
4623
4624         if (dmar_init_reserved_ranges()) {
4625                 if (force_on)
4626                         panic("tboot: Failed to reserve iommu ranges\n");
4627                 goto out_free_reserved_range;
4628         }
4629
4630         init_no_remapping_devices();
4631
4632         ret = init_dmars();
4633         if (ret) {
4634                 if (force_on)
4635                         panic("tboot: Failed to initialize DMARs\n");
4636                 pr_err("Initialization failed\n");
4637                 goto out_free_reserved_range;
4638         }
4639         up_write(&dmar_global_lock);
4640         pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
4641
4642         init_timer(&unmap_timer);
4643 #ifdef CONFIG_SWIOTLB
4644         swiotlb = 0;
4645 #endif
4646         dma_ops = &intel_dma_ops;
4647
4648         init_iommu_pm_ops();
4649
4650         for_each_active_iommu(iommu, drhd)
4651                 iommu->iommu_dev = iommu_device_create(NULL, iommu,
4652                                                        intel_iommu_groups,
4653                                                        "%s", iommu->name);
4654
4655         bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
4656         bus_register_notifier(&pci_bus_type, &device_nb);
4657         if (si_domain && !hw_pass_through)
4658                 register_memory_notifier(&intel_iommu_memory_nb);
4659
4660         intel_iommu_enabled = 1;
4661
4662         return 0;
4663
4664 out_free_reserved_range:
4665         put_iova_domain(&reserved_iova_list);
4666 out_free_dmar:
4667         intel_iommu_free_dmars();
4668         up_write(&dmar_global_lock);
4669         iommu_exit_mempool();
4670         return ret;
4671 }
4672
4673 static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
4674 {
4675         struct intel_iommu *iommu = opaque;
4676
4677         domain_context_clear_one(iommu, PCI_BUS_NUM(alias), alias & 0xff);
4678         return 0;
4679 }
4680
4681 /*
4682  * NB - intel-iommu lacks any sort of reference counting for the users of
4683  * dependent devices.  If multiple endpoints have intersecting dependent
4684  * devices, unbinding the driver from any one of them will possibly leave
4685  * the others unable to operate.
4686  */
4687 static void domain_context_clear(struct intel_iommu *iommu, struct device *dev)
4688 {
4689         if (!iommu || !dev || !dev_is_pci(dev))
4690                 return;
4691
4692         pci_for_each_dma_alias(to_pci_dev(dev), &domain_context_clear_one_cb, iommu);
4693 }
4694
4695 static void __dmar_remove_one_dev_info(struct device_domain_info *info)
4696 {
4697         struct intel_iommu *iommu;
4698         unsigned long flags;
4699
4700         assert_spin_locked(&device_domain_lock);
4701
4702         if (WARN_ON(!info))
4703                 return;
4704
4705         iommu = info->iommu;
4706
4707         if (info->dev) {
4708                 iommu_disable_dev_iotlb(info);
4709                 domain_context_clear(iommu, info->dev);
4710         }
4711
4712         unlink_domain_info(info);
4713
4714         spin_lock_irqsave(&iommu->lock, flags);
4715         domain_detach_iommu(info->domain, iommu);
4716         spin_unlock_irqrestore(&iommu->lock, flags);
4717
4718         free_devinfo_mem(info);
4719 }
4720
4721 static void dmar_remove_one_dev_info(struct dmar_domain *domain,
4722                                      struct device *dev)
4723 {
4724         struct device_domain_info *info;
4725         unsigned long flags;
4726
4727         spin_lock_irqsave(&device_domain_lock, flags);
4728         info = dev->archdata.iommu;
4729         __dmar_remove_one_dev_info(info);
4730         spin_unlock_irqrestore(&device_domain_lock, flags);
4731 }
4732
4733 static int md_domain_init(struct dmar_domain *domain, int guest_width)
4734 {
4735         int adjust_width;
4736
4737         init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN,
4738                         DMA_32BIT_PFN);
4739         domain_reserve_special_ranges(domain);
4740
4741         /* calculate AGAW */
4742         domain->gaw = guest_width;
4743         adjust_width = guestwidth_to_adjustwidth(guest_width);
4744         domain->agaw = width_to_agaw(adjust_width);
4745
4746         domain->iommu_coherency = 0;
4747         domain->iommu_snooping = 0;
4748         domain->iommu_superpage = 0;
4749         domain->max_addr = 0;
4750
4751         /* always allocate the top pgd */
4752         domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
4753         if (!domain->pgd)
4754                 return -ENOMEM;
4755         domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
4756         return 0;
4757 }
4758
4759 static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
4760 {
4761         struct dmar_domain *dmar_domain;
4762         struct iommu_domain *domain;
4763
4764         if (type != IOMMU_DOMAIN_UNMANAGED)
4765                 return NULL;
4766
4767         dmar_domain = alloc_domain(DOMAIN_FLAG_VIRTUAL_MACHINE);
4768         if (!dmar_domain) {
4769                 pr_err("Can't allocate dmar_domain\n");
4770                 return NULL;
4771         }
4772         if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
4773                 pr_err("Domain initialization failed\n");
4774                 domain_exit(dmar_domain);
4775                 return NULL;
4776         }
4777         domain_update_iommu_cap(dmar_domain);
4778
4779         domain = &dmar_domain->domain;
4780         domain->geometry.aperture_start = 0;
4781         domain->geometry.aperture_end   = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
4782         domain->geometry.force_aperture = true;
4783
4784         return domain;
4785 }
4786
4787 static void intel_iommu_domain_free(struct iommu_domain *domain)
4788 {
4789         domain_exit(to_dmar_domain(domain));
4790 }
4791
4792 static int intel_iommu_attach_device(struct iommu_domain *domain,
4793                                      struct device *dev)
4794 {
4795         struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4796         struct intel_iommu *iommu;
4797         int addr_width;
4798         u8 bus, devfn;
4799
4800         if (device_is_rmrr_locked(dev)) {
4801                 dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement.  Contact your platform vendor.\n");
4802                 return -EPERM;
4803         }
4804
4805         /* normally dev is not mapped */
4806         if (unlikely(domain_context_mapped(dev))) {
4807                 struct dmar_domain *old_domain;
4808
4809                 old_domain = find_domain(dev);
4810                 if (old_domain) {
4811                         rcu_read_lock();
4812                         dmar_remove_one_dev_info(old_domain, dev);
4813                         rcu_read_unlock();
4814
4815                         if (!domain_type_is_vm_or_si(old_domain) &&
4816                              list_empty(&old_domain->devices))
4817                                 domain_exit(old_domain);
4818                 }
4819         }
4820
4821         iommu = device_to_iommu(dev, &bus, &devfn);
4822         if (!iommu)
4823                 return -ENODEV;
4824
4825         /* check if this iommu agaw is sufficient for max mapped address */
4826         addr_width = agaw_to_width(iommu->agaw);
4827         if (addr_width > cap_mgaw(iommu->cap))
4828                 addr_width = cap_mgaw(iommu->cap);
4829
4830         if (dmar_domain->max_addr > (1LL << addr_width)) {
4831                 pr_err("%s: iommu width (%d) is not "
4832                        "sufficient for the mapped address (%llx)\n",
4833                        __func__, addr_width, dmar_domain->max_addr);
4834                 return -EFAULT;
4835         }
4836         dmar_domain->gaw = addr_width;
4837
4838         /*
4839          * Knock out extra levels of page tables if necessary
4840          */
4841         while (iommu->agaw < dmar_domain->agaw) {
4842                 struct dma_pte *pte;
4843
4844                 pte = dmar_domain->pgd;
4845                 if (dma_pte_present(pte)) {
4846                         dmar_domain->pgd = (struct dma_pte *)
4847                                 phys_to_virt(dma_pte_addr(pte));
4848                         free_pgtable_page(pte);
4849                 }
4850                 dmar_domain->agaw--;
4851         }
4852
4853         return domain_add_dev_info(dmar_domain, dev);
4854 }
4855
4856 static void intel_iommu_detach_device(struct iommu_domain *domain,
4857                                       struct device *dev)
4858 {
4859         dmar_remove_one_dev_info(to_dmar_domain(domain), dev);
4860 }
4861
4862 static int intel_iommu_map(struct iommu_domain *domain,
4863                            unsigned long iova, phys_addr_t hpa,
4864                            size_t size, int iommu_prot)
4865 {
4866         struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4867         u64 max_addr;
4868         int prot = 0;
4869         int ret;
4870
4871         if (iommu_prot & IOMMU_READ)
4872                 prot |= DMA_PTE_READ;
4873         if (iommu_prot & IOMMU_WRITE)
4874                 prot |= DMA_PTE_WRITE;
4875         if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
4876                 prot |= DMA_PTE_SNP;
4877
4878         max_addr = iova + size;
4879         if (dmar_domain->max_addr < max_addr) {
4880                 u64 end;
4881
4882                 /* check if minimum agaw is sufficient for mapped address */
4883                 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
4884                 if (end < max_addr) {
4885                         pr_err("%s: iommu width (%d) is not "
4886                                "sufficient for the mapped address (%llx)\n",
4887                                __func__, dmar_domain->gaw, max_addr);
4888                         return -EFAULT;
4889                 }
4890                 dmar_domain->max_addr = max_addr;
4891         }
4892         /* Round up size to next multiple of PAGE_SIZE, if it and
4893            the low bits of hpa would take us onto the next page */
4894         size = aligned_nrpages(hpa, size);
4895         ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
4896                                  hpa >> VTD_PAGE_SHIFT, size, prot);
4897         return ret;
4898 }
4899
4900 static size_t intel_iommu_unmap(struct iommu_domain *domain,
4901                                 unsigned long iova, size_t size)
4902 {
4903         struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4904         struct page *freelist = NULL;
4905         struct intel_iommu *iommu;
4906         unsigned long start_pfn, last_pfn;
4907         unsigned int npages;
4908         int iommu_id, level = 0;
4909
4910         /* Cope with horrid API which requires us to unmap more than the
4911            size argument if it happens to be a large-page mapping. */
4912         BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level));
4913
4914         if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
4915                 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
4916
4917         start_pfn = iova >> VTD_PAGE_SHIFT;
4918         last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
4919
4920         freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
4921
4922         npages = last_pfn - start_pfn + 1;
4923
4924         for_each_domain_iommu(iommu_id, dmar_domain) {
4925                 iommu = g_iommus[iommu_id];
4926
4927                 iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
4928                                       start_pfn, npages, !freelist, 0);
4929         }
4930
4931         dma_free_pagelist(freelist);
4932
4933         if (dmar_domain->max_addr == iova + size)
4934                 dmar_domain->max_addr = iova;
4935
4936         return size;
4937 }
4938
4939 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
4940                                             dma_addr_t iova)
4941 {
4942         struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4943         struct dma_pte *pte;
4944         int level = 0;
4945         u64 phys = 0;
4946
4947         pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
4948         if (pte)
4949                 phys = dma_pte_addr(pte);
4950
4951         return phys;
4952 }
4953
4954 static bool intel_iommu_capable(enum iommu_cap cap)
4955 {
4956         if (cap == IOMMU_CAP_CACHE_COHERENCY)
4957                 return domain_update_iommu_snooping(NULL) == 1;
4958         if (cap == IOMMU_CAP_INTR_REMAP)
4959                 return irq_remapping_enabled == 1;
4960
4961         return false;
4962 }
4963
4964 static int intel_iommu_add_device(struct device *dev)
4965 {
4966         struct intel_iommu *iommu;
4967         struct iommu_group *group;
4968         u8 bus, devfn;
4969
4970         iommu = device_to_iommu(dev, &bus, &devfn);
4971         if (!iommu)
4972                 return -ENODEV;
4973
4974         iommu_device_link(iommu->iommu_dev, dev);
4975
4976         group = iommu_group_get_for_dev(dev);
4977
4978         if (IS_ERR(group))
4979                 return PTR_ERR(group);
4980
4981         iommu_group_put(group);
4982         return 0;
4983 }
4984
4985 static void intel_iommu_remove_device(struct device *dev)
4986 {
4987         struct intel_iommu *iommu;
4988         u8 bus, devfn;
4989
4990         iommu = device_to_iommu(dev, &bus, &devfn);
4991         if (!iommu)
4992                 return;
4993
4994         iommu_group_remove_device(dev);
4995
4996         iommu_device_unlink(iommu->iommu_dev, dev);
4997 }
4998
4999 #ifdef CONFIG_INTEL_IOMMU_SVM
5000 int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev)
5001 {
5002         struct device_domain_info *info;
5003         struct context_entry *context;
5004         struct dmar_domain *domain;
5005         unsigned long flags;
5006         u64 ctx_lo;
5007         int ret;
5008
5009         domain = get_valid_domain_for_dev(sdev->dev);
5010         if (!domain)
5011                 return -EINVAL;
5012
5013         spin_lock_irqsave(&device_domain_lock, flags);
5014         spin_lock(&iommu->lock);
5015
5016         ret = -EINVAL;
5017         info = sdev->dev->archdata.iommu;
5018         if (!info || !info->pasid_supported)
5019                 goto out;
5020
5021         context = iommu_context_addr(iommu, info->bus, info->devfn, 0);
5022         if (WARN_ON(!context))
5023                 goto out;
5024
5025         ctx_lo = context[0].lo;
5026
5027         sdev->did = domain->iommu_did[iommu->seq_id];
5028         sdev->sid = PCI_DEVID(info->bus, info->devfn);
5029
5030         if (!(ctx_lo & CONTEXT_PASIDE)) {
5031                 context[1].hi = (u64)virt_to_phys(iommu->pasid_state_table);
5032                 context[1].lo = (u64)virt_to_phys(iommu->pasid_table) | ecap_pss(iommu->ecap);
5033                 wmb();
5034                 /* CONTEXT_TT_MULTI_LEVEL and CONTEXT_TT_DEV_IOTLB are both
5035                  * extended to permit requests-with-PASID if the PASIDE bit
5036                  * is set. which makes sense. For CONTEXT_TT_PASS_THROUGH,
5037                  * however, the PASIDE bit is ignored and requests-with-PASID
5038                  * are unconditionally blocked. Which makes less sense.
5039                  * So convert from CONTEXT_TT_PASS_THROUGH to one of the new
5040                  * "guest mode" translation types depending on whether ATS
5041                  * is available or not. Annoyingly, we can't use the new
5042                  * modes *unless* PASIDE is set. */
5043                 if ((ctx_lo & CONTEXT_TT_MASK) == (CONTEXT_TT_PASS_THROUGH << 2)) {
5044                         ctx_lo &= ~CONTEXT_TT_MASK;
5045                         if (info->ats_supported)
5046                                 ctx_lo |= CONTEXT_TT_PT_PASID_DEV_IOTLB << 2;
5047                         else
5048                                 ctx_lo |= CONTEXT_TT_PT_PASID << 2;
5049                 }
5050                 ctx_lo |= CONTEXT_PASIDE;
5051                 if (iommu->pasid_state_table)
5052                         ctx_lo |= CONTEXT_DINVE;
5053                 if (info->pri_supported)
5054                         ctx_lo |= CONTEXT_PRS;
5055                 context[0].lo = ctx_lo;
5056                 wmb();
5057                 iommu->flush.flush_context(iommu, sdev->did, sdev->sid,
5058                                            DMA_CCMD_MASK_NOBIT,
5059                                            DMA_CCMD_DEVICE_INVL);
5060         }
5061
5062         /* Enable PASID support in the device, if it wasn't already */
5063         if (!info->pasid_enabled)
5064                 iommu_enable_dev_iotlb(info);
5065
5066         if (info->ats_enabled) {
5067                 sdev->dev_iotlb = 1;
5068                 sdev->qdep = info->ats_qdep;
5069                 if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS)
5070                         sdev->qdep = 0;
5071         }
5072         ret = 0;
5073
5074  out:
5075         spin_unlock(&iommu->lock);
5076         spin_unlock_irqrestore(&device_domain_lock, flags);
5077
5078         return ret;
5079 }
5080
5081 struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
5082 {
5083         struct intel_iommu *iommu;
5084         u8 bus, devfn;
5085
5086         if (iommu_dummy(dev)) {
5087                 dev_warn(dev,
5088                          "No IOMMU translation for device; cannot enable SVM\n");
5089                 return NULL;
5090         }
5091
5092         iommu = device_to_iommu(dev, &bus, &devfn);
5093         if ((!iommu)) {
5094                 dev_err(dev, "No IOMMU for device; cannot enable SVM\n");
5095                 return NULL;
5096         }
5097
5098         if (!iommu->pasid_table) {
5099                 dev_err(dev, "PASID not enabled on IOMMU; cannot enable SVM\n");
5100                 return NULL;
5101         }
5102
5103         return iommu;
5104 }
5105 #endif /* CONFIG_INTEL_IOMMU_SVM */
5106
5107 static const struct iommu_ops intel_iommu_ops = {
5108         .capable        = intel_iommu_capable,
5109         .domain_alloc   = intel_iommu_domain_alloc,
5110         .domain_free    = intel_iommu_domain_free,
5111         .attach_dev     = intel_iommu_attach_device,
5112         .detach_dev     = intel_iommu_detach_device,
5113         .map            = intel_iommu_map,
5114         .unmap          = intel_iommu_unmap,
5115         .map_sg         = default_iommu_map_sg,
5116         .iova_to_phys   = intel_iommu_iova_to_phys,
5117         .add_device     = intel_iommu_add_device,
5118         .remove_device  = intel_iommu_remove_device,
5119         .device_group   = pci_device_group,
5120         .pgsize_bitmap  = INTEL_IOMMU_PGSIZES,
5121 };
5122
5123 static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
5124 {
5125         /* G4x/GM45 integrated gfx dmar support is totally busted. */
5126         pr_info("Disabling IOMMU for graphics on this chipset\n");
5127         dmar_map_gfx = 0;
5128 }
5129
5130 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
5131 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
5132 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
5133 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
5134 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
5135 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
5136 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
5137
5138 static void quirk_iommu_rwbf(struct pci_dev *dev)
5139 {
5140         /*
5141          * Mobile 4 Series Chipset neglects to set RWBF capability,
5142          * but needs it. Same seems to hold for the desktop versions.
5143          */
5144         pr_info("Forcing write-buffer flush capability\n");
5145         rwbf_quirk = 1;
5146 }
5147
5148 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
5149 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
5150 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
5151 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
5152 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
5153 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
5154 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
5155
5156 #define GGC 0x52
5157 #define GGC_MEMORY_SIZE_MASK    (0xf << 8)
5158 #define GGC_MEMORY_SIZE_NONE    (0x0 << 8)
5159 #define GGC_MEMORY_SIZE_1M      (0x1 << 8)
5160 #define GGC_MEMORY_SIZE_2M      (0x3 << 8)
5161 #define GGC_MEMORY_VT_ENABLED   (0x8 << 8)
5162 #define GGC_MEMORY_SIZE_2M_VT   (0x9 << 8)
5163 #define GGC_MEMORY_SIZE_3M_VT   (0xa << 8)
5164 #define GGC_MEMORY_SIZE_4M_VT   (0xb << 8)
5165
5166 static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
5167 {
5168         unsigned short ggc;
5169
5170         if (pci_read_config_word(dev, GGC, &ggc))
5171                 return;
5172
5173         if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
5174                 pr_info("BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
5175                 dmar_map_gfx = 0;
5176         } else if (dmar_map_gfx) {
5177                 /* we have to ensure the gfx device is idle before we flush */
5178                 pr_info("Disabling batched IOTLB flush on Ironlake\n");
5179                 intel_iommu_strict = 1;
5180        }
5181 }
5182 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
5183 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
5184 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
5185 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
5186
5187 /* On Tylersburg chipsets, some BIOSes have been known to enable the
5188    ISOCH DMAR unit for the Azalia sound device, but not give it any
5189    TLB entries, which causes it to deadlock. Check for that.  We do
5190    this in a function called from init_dmars(), instead of in a PCI
5191    quirk, because we don't want to print the obnoxious "BIOS broken"
5192    message if VT-d is actually disabled.
5193 */
5194 static void __init check_tylersburg_isoch(void)
5195 {
5196         struct pci_dev *pdev;
5197         uint32_t vtisochctrl;
5198
5199         /* If there's no Azalia in the system anyway, forget it. */
5200         pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
5201         if (!pdev)
5202                 return;
5203         pci_dev_put(pdev);
5204
5205         /* System Management Registers. Might be hidden, in which case
5206            we can't do the sanity check. But that's OK, because the
5207            known-broken BIOSes _don't_ actually hide it, so far. */
5208         pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
5209         if (!pdev)
5210                 return;
5211
5212         if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
5213                 pci_dev_put(pdev);
5214                 return;
5215         }
5216
5217         pci_dev_put(pdev);
5218
5219         /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
5220         if (vtisochctrl & 1)
5221                 return;
5222
5223         /* Drop all bits other than the number of TLB entries */
5224         vtisochctrl &= 0x1c;
5225
5226         /* If we have the recommended number of TLB entries (16), fine. */
5227         if (vtisochctrl == 0x10)
5228                 return;
5229
5230         /* Zero TLB entries? You get to ride the short bus to school. */
5231         if (!vtisochctrl) {
5232                 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
5233                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
5234                      dmi_get_system_info(DMI_BIOS_VENDOR),
5235                      dmi_get_system_info(DMI_BIOS_VERSION),
5236                      dmi_get_system_info(DMI_PRODUCT_VERSION));
5237                 iommu_identity_mapping |= IDENTMAP_AZALIA;
5238                 return;
5239         }
5240
5241         pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
5242                vtisochctrl);
5243 }