ARM64: DTS: Add rk3399-firefly uart4 device, node as /dev/ttyS1
[firefly-linux-kernel-4.4.55.git] / drivers / iommu / intel-iommu.c
1 /*
2  * Copyright © 2006-2014 Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * Authors: David Woodhouse <dwmw2@infradead.org>,
14  *          Ashok Raj <ashok.raj@intel.com>,
15  *          Shaohua Li <shaohua.li@intel.com>,
16  *          Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
17  *          Fenghua Yu <fenghua.yu@intel.com>
18  *          Joerg Roedel <jroedel@suse.de>
19  */
20
21 #define pr_fmt(fmt)     "DMAR: " fmt
22
23 #include <linux/init.h>
24 #include <linux/bitmap.h>
25 #include <linux/debugfs.h>
26 #include <linux/export.h>
27 #include <linux/slab.h>
28 #include <linux/irq.h>
29 #include <linux/interrupt.h>
30 #include <linux/spinlock.h>
31 #include <linux/pci.h>
32 #include <linux/dmar.h>
33 #include <linux/dma-mapping.h>
34 #include <linux/mempool.h>
35 #include <linux/memory.h>
36 #include <linux/timer.h>
37 #include <linux/io.h>
38 #include <linux/iova.h>
39 #include <linux/iommu.h>
40 #include <linux/intel-iommu.h>
41 #include <linux/syscore_ops.h>
42 #include <linux/tboot.h>
43 #include <linux/dmi.h>
44 #include <linux/pci-ats.h>
45 #include <linux/memblock.h>
46 #include <linux/dma-contiguous.h>
47 #include <linux/crash_dump.h>
48 #include <asm/irq_remapping.h>
49 #include <asm/cacheflush.h>
50 #include <asm/iommu.h>
51
52 #include "irq_remapping.h"
53
54 #define ROOT_SIZE               VTD_PAGE_SIZE
55 #define CONTEXT_SIZE            VTD_PAGE_SIZE
56
57 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
58 #define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
59 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
60 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
61
62 #define IOAPIC_RANGE_START      (0xfee00000)
63 #define IOAPIC_RANGE_END        (0xfeefffff)
64 #define IOVA_START_ADDR         (0x1000)
65
66 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
67
68 #define MAX_AGAW_WIDTH 64
69 #define MAX_AGAW_PFN_WIDTH      (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
70
71 #define __DOMAIN_MAX_PFN(gaw)  ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
72 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
73
74 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
75    to match. That way, we can use 'unsigned long' for PFNs with impunity. */
76 #define DOMAIN_MAX_PFN(gaw)     ((unsigned long) min_t(uint64_t, \
77                                 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
78 #define DOMAIN_MAX_ADDR(gaw)    (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
79
80 /* IO virtual address start page frame number */
81 #define IOVA_START_PFN          (1)
82
83 #define IOVA_PFN(addr)          ((addr) >> PAGE_SHIFT)
84 #define DMA_32BIT_PFN           IOVA_PFN(DMA_BIT_MASK(32))
85 #define DMA_64BIT_PFN           IOVA_PFN(DMA_BIT_MASK(64))
86
87 /* page table handling */
88 #define LEVEL_STRIDE            (9)
89 #define LEVEL_MASK              (((u64)1 << LEVEL_STRIDE) - 1)
90
91 /*
92  * This bitmap is used to advertise the page sizes our hardware support
93  * to the IOMMU core, which will then use this information to split
94  * physically contiguous memory regions it is mapping into page sizes
95  * that we support.
96  *
97  * Traditionally the IOMMU core just handed us the mappings directly,
98  * after making sure the size is an order of a 4KiB page and that the
99  * mapping has natural alignment.
100  *
101  * To retain this behavior, we currently advertise that we support
102  * all page sizes that are an order of 4KiB.
103  *
104  * If at some point we'd like to utilize the IOMMU core's new behavior,
105  * we could change this to advertise the real page sizes we support.
106  */
107 #define INTEL_IOMMU_PGSIZES     (~0xFFFUL)
108
109 static inline int agaw_to_level(int agaw)
110 {
111         return agaw + 2;
112 }
113
114 static inline int agaw_to_width(int agaw)
115 {
116         return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
117 }
118
119 static inline int width_to_agaw(int width)
120 {
121         return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
122 }
123
124 static inline unsigned int level_to_offset_bits(int level)
125 {
126         return (level - 1) * LEVEL_STRIDE;
127 }
128
129 static inline int pfn_level_offset(unsigned long pfn, int level)
130 {
131         return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
132 }
133
134 static inline unsigned long level_mask(int level)
135 {
136         return -1UL << level_to_offset_bits(level);
137 }
138
139 static inline unsigned long level_size(int level)
140 {
141         return 1UL << level_to_offset_bits(level);
142 }
143
144 static inline unsigned long align_to_level(unsigned long pfn, int level)
145 {
146         return (pfn + level_size(level) - 1) & level_mask(level);
147 }
148
149 static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
150 {
151         return  1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
152 }
153
154 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
155    are never going to work. */
156 static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
157 {
158         return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
159 }
160
161 static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
162 {
163         return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
164 }
165 static inline unsigned long page_to_dma_pfn(struct page *pg)
166 {
167         return mm_to_dma_pfn(page_to_pfn(pg));
168 }
169 static inline unsigned long virt_to_dma_pfn(void *p)
170 {
171         return page_to_dma_pfn(virt_to_page(p));
172 }
173
174 /* global iommu list, set NULL for ignored DMAR units */
175 static struct intel_iommu **g_iommus;
176
177 static void __init check_tylersburg_isoch(void);
178 static int rwbf_quirk;
179
180 /*
181  * set to 1 to panic kernel if can't successfully enable VT-d
182  * (used when kernel is launched w/ TXT)
183  */
184 static int force_on = 0;
185
186 /*
187  * 0: Present
188  * 1-11: Reserved
189  * 12-63: Context Ptr (12 - (haw-1))
190  * 64-127: Reserved
191  */
192 struct root_entry {
193         u64     lo;
194         u64     hi;
195 };
196 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
197
198 /*
199  * Take a root_entry and return the Lower Context Table Pointer (LCTP)
200  * if marked present.
201  */
202 static phys_addr_t root_entry_lctp(struct root_entry *re)
203 {
204         if (!(re->lo & 1))
205                 return 0;
206
207         return re->lo & VTD_PAGE_MASK;
208 }
209
210 /*
211  * Take a root_entry and return the Upper Context Table Pointer (UCTP)
212  * if marked present.
213  */
214 static phys_addr_t root_entry_uctp(struct root_entry *re)
215 {
216         if (!(re->hi & 1))
217                 return 0;
218
219         return re->hi & VTD_PAGE_MASK;
220 }
221 /*
222  * low 64 bits:
223  * 0: present
224  * 1: fault processing disable
225  * 2-3: translation type
226  * 12-63: address space root
227  * high 64 bits:
228  * 0-2: address width
229  * 3-6: aval
230  * 8-23: domain id
231  */
232 struct context_entry {
233         u64 lo;
234         u64 hi;
235 };
236
237 static inline void context_clear_pasid_enable(struct context_entry *context)
238 {
239         context->lo &= ~(1ULL << 11);
240 }
241
242 static inline bool context_pasid_enabled(struct context_entry *context)
243 {
244         return !!(context->lo & (1ULL << 11));
245 }
246
247 static inline void context_set_copied(struct context_entry *context)
248 {
249         context->hi |= (1ull << 3);
250 }
251
252 static inline bool context_copied(struct context_entry *context)
253 {
254         return !!(context->hi & (1ULL << 3));
255 }
256
257 static inline bool __context_present(struct context_entry *context)
258 {
259         return (context->lo & 1);
260 }
261
262 static inline bool context_present(struct context_entry *context)
263 {
264         return context_pasid_enabled(context) ?
265              __context_present(context) :
266              __context_present(context) && !context_copied(context);
267 }
268
269 static inline void context_set_present(struct context_entry *context)
270 {
271         context->lo |= 1;
272 }
273
274 static inline void context_set_fault_enable(struct context_entry *context)
275 {
276         context->lo &= (((u64)-1) << 2) | 1;
277 }
278
279 static inline void context_set_translation_type(struct context_entry *context,
280                                                 unsigned long value)
281 {
282         context->lo &= (((u64)-1) << 4) | 3;
283         context->lo |= (value & 3) << 2;
284 }
285
286 static inline void context_set_address_root(struct context_entry *context,
287                                             unsigned long value)
288 {
289         context->lo &= ~VTD_PAGE_MASK;
290         context->lo |= value & VTD_PAGE_MASK;
291 }
292
293 static inline void context_set_address_width(struct context_entry *context,
294                                              unsigned long value)
295 {
296         context->hi |= value & 7;
297 }
298
299 static inline void context_set_domain_id(struct context_entry *context,
300                                          unsigned long value)
301 {
302         context->hi |= (value & ((1 << 16) - 1)) << 8;
303 }
304
305 static inline int context_domain_id(struct context_entry *c)
306 {
307         return((c->hi >> 8) & 0xffff);
308 }
309
310 static inline void context_clear_entry(struct context_entry *context)
311 {
312         context->lo = 0;
313         context->hi = 0;
314 }
315
316 /*
317  * 0: readable
318  * 1: writable
319  * 2-6: reserved
320  * 7: super page
321  * 8-10: available
322  * 11: snoop behavior
323  * 12-63: Host physcial address
324  */
325 struct dma_pte {
326         u64 val;
327 };
328
329 static inline void dma_clear_pte(struct dma_pte *pte)
330 {
331         pte->val = 0;
332 }
333
334 static inline u64 dma_pte_addr(struct dma_pte *pte)
335 {
336 #ifdef CONFIG_64BIT
337         return pte->val & VTD_PAGE_MASK;
338 #else
339         /* Must have a full atomic 64-bit read */
340         return  __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
341 #endif
342 }
343
344 static inline bool dma_pte_present(struct dma_pte *pte)
345 {
346         return (pte->val & 3) != 0;
347 }
348
349 static inline bool dma_pte_superpage(struct dma_pte *pte)
350 {
351         return (pte->val & DMA_PTE_LARGE_PAGE);
352 }
353
354 static inline int first_pte_in_page(struct dma_pte *pte)
355 {
356         return !((unsigned long)pte & ~VTD_PAGE_MASK);
357 }
358
359 /*
360  * This domain is a statically identity mapping domain.
361  *      1. This domain creats a static 1:1 mapping to all usable memory.
362  *      2. It maps to each iommu if successful.
363  *      3. Each iommu mapps to this domain if successful.
364  */
365 static struct dmar_domain *si_domain;
366 static int hw_pass_through = 1;
367
368 /*
369  * Domain represents a virtual machine, more than one devices
370  * across iommus may be owned in one domain, e.g. kvm guest.
371  */
372 #define DOMAIN_FLAG_VIRTUAL_MACHINE     (1 << 0)
373
374 /* si_domain contains mulitple devices */
375 #define DOMAIN_FLAG_STATIC_IDENTITY     (1 << 1)
376
377 #define for_each_domain_iommu(idx, domain)                      \
378         for (idx = 0; idx < g_num_of_iommus; idx++)             \
379                 if (domain->iommu_refcnt[idx])
380
381 struct dmar_domain {
382         int     nid;                    /* node id */
383
384         unsigned        iommu_refcnt[DMAR_UNITS_SUPPORTED];
385                                         /* Refcount of devices per iommu */
386
387
388         u16             iommu_did[DMAR_UNITS_SUPPORTED];
389                                         /* Domain ids per IOMMU. Use u16 since
390                                          * domain ids are 16 bit wide according
391                                          * to VT-d spec, section 9.3 */
392
393         struct list_head devices;       /* all devices' list */
394         struct iova_domain iovad;       /* iova's that belong to this domain */
395
396         struct dma_pte  *pgd;           /* virtual address */
397         int             gaw;            /* max guest address width */
398
399         /* adjusted guest address width, 0 is level 2 30-bit */
400         int             agaw;
401
402         int             flags;          /* flags to find out type of domain */
403
404         int             iommu_coherency;/* indicate coherency of iommu access */
405         int             iommu_snooping; /* indicate snooping control feature*/
406         int             iommu_count;    /* reference count of iommu */
407         int             iommu_superpage;/* Level of superpages supported:
408                                            0 == 4KiB (no superpages), 1 == 2MiB,
409                                            2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
410         u64             max_addr;       /* maximum mapped address */
411
412         struct iommu_domain domain;     /* generic domain data structure for
413                                            iommu core */
414 };
415
416 /* PCI domain-device relationship */
417 struct device_domain_info {
418         struct list_head link;  /* link to domain siblings */
419         struct list_head global; /* link to global list */
420         u8 bus;                 /* PCI bus number */
421         u8 devfn;               /* PCI devfn number */
422         u8 pasid_supported:3;
423         u8 pasid_enabled:1;
424         u8 pri_supported:1;
425         u8 pri_enabled:1;
426         u8 ats_supported:1;
427         u8 ats_enabled:1;
428         u8 ats_qdep;
429         struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
430         struct intel_iommu *iommu; /* IOMMU used by this device */
431         struct dmar_domain *domain; /* pointer to domain */
432 };
433
434 struct dmar_rmrr_unit {
435         struct list_head list;          /* list of rmrr units   */
436         struct acpi_dmar_header *hdr;   /* ACPI header          */
437         u64     base_address;           /* reserved base address*/
438         u64     end_address;            /* reserved end address */
439         struct dmar_dev_scope *devices; /* target devices */
440         int     devices_cnt;            /* target device count */
441 };
442
443 struct dmar_atsr_unit {
444         struct list_head list;          /* list of ATSR units */
445         struct acpi_dmar_header *hdr;   /* ACPI header */
446         struct dmar_dev_scope *devices; /* target devices */
447         int devices_cnt;                /* target device count */
448         u8 include_all:1;               /* include all ports */
449 };
450
451 static LIST_HEAD(dmar_atsr_units);
452 static LIST_HEAD(dmar_rmrr_units);
453
454 #define for_each_rmrr_units(rmrr) \
455         list_for_each_entry(rmrr, &dmar_rmrr_units, list)
456
457 static void flush_unmaps_timeout(unsigned long data);
458
459 static DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0);
460
461 #define HIGH_WATER_MARK 250
462 struct deferred_flush_tables {
463         int next;
464         struct iova *iova[HIGH_WATER_MARK];
465         struct dmar_domain *domain[HIGH_WATER_MARK];
466         struct page *freelist[HIGH_WATER_MARK];
467 };
468
469 static struct deferred_flush_tables *deferred_flush;
470
471 /* bitmap for indexing intel_iommus */
472 static int g_num_of_iommus;
473
474 static DEFINE_SPINLOCK(async_umap_flush_lock);
475 static LIST_HEAD(unmaps_to_do);
476
477 static int timer_on;
478 static long list_size;
479
480 static void domain_exit(struct dmar_domain *domain);
481 static void domain_remove_dev_info(struct dmar_domain *domain);
482 static void dmar_remove_one_dev_info(struct dmar_domain *domain,
483                                      struct device *dev);
484 static void __dmar_remove_one_dev_info(struct device_domain_info *info);
485 static void domain_context_clear(struct intel_iommu *iommu,
486                                  struct device *dev);
487 static int domain_detach_iommu(struct dmar_domain *domain,
488                                struct intel_iommu *iommu);
489
490 #ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
491 int dmar_disabled = 0;
492 #else
493 int dmar_disabled = 1;
494 #endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
495
496 int intel_iommu_enabled = 0;
497 EXPORT_SYMBOL_GPL(intel_iommu_enabled);
498
499 static int dmar_map_gfx = 1;
500 static int dmar_forcedac;
501 static int intel_iommu_strict;
502 static int intel_iommu_superpage = 1;
503 static int intel_iommu_ecs = 1;
504 static int intel_iommu_pasid28;
505 static int iommu_identity_mapping;
506
507 #define IDENTMAP_ALL            1
508 #define IDENTMAP_GFX            2
509 #define IDENTMAP_AZALIA         4
510
511 /* Broadwell and Skylake have broken ECS support — normal so-called "second
512  * level" translation of DMA requests-without-PASID doesn't actually happen
513  * unless you also set the NESTE bit in an extended context-entry. Which of
514  * course means that SVM doesn't work because it's trying to do nested
515  * translation of the physical addresses it finds in the process page tables,
516  * through the IOVA->phys mapping found in the "second level" page tables.
517  *
518  * The VT-d specification was retroactively changed to change the definition
519  * of the capability bits and pretend that Broadwell/Skylake never happened...
520  * but unfortunately the wrong bit was changed. It's ECS which is broken, but
521  * for some reason it was the PASID capability bit which was redefined (from
522  * bit 28 on BDW/SKL to bit 40 in future).
523  *
524  * So our test for ECS needs to eschew those implementations which set the old
525  * PASID capabiity bit 28, since those are the ones on which ECS is broken.
526  * Unless we are working around the 'pasid28' limitations, that is, by putting
527  * the device into passthrough mode for normal DMA and thus masking the bug.
528  */
529 #define ecs_enabled(iommu) (intel_iommu_ecs && ecap_ecs(iommu->ecap) && \
530                             (intel_iommu_pasid28 || !ecap_broken_pasid(iommu->ecap)))
531 /* PASID support is thus enabled if ECS is enabled and *either* of the old
532  * or new capability bits are set. */
533 #define pasid_enabled(iommu) (ecs_enabled(iommu) &&                     \
534                               (ecap_pasid(iommu->ecap) || ecap_broken_pasid(iommu->ecap)))
535
536 int intel_iommu_gfx_mapped;
537 EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
538
539 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
540 static DEFINE_SPINLOCK(device_domain_lock);
541 static LIST_HEAD(device_domain_list);
542
543 static const struct iommu_ops intel_iommu_ops;
544
545 static bool translation_pre_enabled(struct intel_iommu *iommu)
546 {
547         return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
548 }
549
550 static void clear_translation_pre_enabled(struct intel_iommu *iommu)
551 {
552         iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
553 }
554
555 static void init_translation_status(struct intel_iommu *iommu)
556 {
557         u32 gsts;
558
559         gsts = readl(iommu->reg + DMAR_GSTS_REG);
560         if (gsts & DMA_GSTS_TES)
561                 iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
562 }
563
564 /* Convert generic 'struct iommu_domain to private struct dmar_domain */
565 static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
566 {
567         return container_of(dom, struct dmar_domain, domain);
568 }
569
570 static int __init intel_iommu_setup(char *str)
571 {
572         if (!str)
573                 return -EINVAL;
574         while (*str) {
575                 if (!strncmp(str, "on", 2)) {
576                         dmar_disabled = 0;
577                         pr_info("IOMMU enabled\n");
578                 } else if (!strncmp(str, "off", 3)) {
579                         dmar_disabled = 1;
580                         pr_info("IOMMU disabled\n");
581                 } else if (!strncmp(str, "igfx_off", 8)) {
582                         dmar_map_gfx = 0;
583                         pr_info("Disable GFX device mapping\n");
584                 } else if (!strncmp(str, "forcedac", 8)) {
585                         pr_info("Forcing DAC for PCI devices\n");
586                         dmar_forcedac = 1;
587                 } else if (!strncmp(str, "strict", 6)) {
588                         pr_info("Disable batched IOTLB flush\n");
589                         intel_iommu_strict = 1;
590                 } else if (!strncmp(str, "sp_off", 6)) {
591                         pr_info("Disable supported super page\n");
592                         intel_iommu_superpage = 0;
593                 } else if (!strncmp(str, "ecs_off", 7)) {
594                         printk(KERN_INFO
595                                 "Intel-IOMMU: disable extended context table support\n");
596                         intel_iommu_ecs = 0;
597                 } else if (!strncmp(str, "pasid28", 7)) {
598                         printk(KERN_INFO
599                                 "Intel-IOMMU: enable pre-production PASID support\n");
600                         intel_iommu_pasid28 = 1;
601                         iommu_identity_mapping |= IDENTMAP_GFX;
602                 }
603
604                 str += strcspn(str, ",");
605                 while (*str == ',')
606                         str++;
607         }
608         return 0;
609 }
610 __setup("intel_iommu=", intel_iommu_setup);
611
612 static struct kmem_cache *iommu_domain_cache;
613 static struct kmem_cache *iommu_devinfo_cache;
614
615 static struct dmar_domain* get_iommu_domain(struct intel_iommu *iommu, u16 did)
616 {
617         struct dmar_domain **domains;
618         int idx = did >> 8;
619
620         domains = iommu->domains[idx];
621         if (!domains)
622                 return NULL;
623
624         return domains[did & 0xff];
625 }
626
627 static void set_iommu_domain(struct intel_iommu *iommu, u16 did,
628                              struct dmar_domain *domain)
629 {
630         struct dmar_domain **domains;
631         int idx = did >> 8;
632
633         if (!iommu->domains[idx]) {
634                 size_t size = 256 * sizeof(struct dmar_domain *);
635                 iommu->domains[idx] = kzalloc(size, GFP_ATOMIC);
636         }
637
638         domains = iommu->domains[idx];
639         if (WARN_ON(!domains))
640                 return;
641         else
642                 domains[did & 0xff] = domain;
643 }
644
645 static inline void *alloc_pgtable_page(int node)
646 {
647         struct page *page;
648         void *vaddr = NULL;
649
650         page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
651         if (page)
652                 vaddr = page_address(page);
653         return vaddr;
654 }
655
656 static inline void free_pgtable_page(void *vaddr)
657 {
658         free_page((unsigned long)vaddr);
659 }
660
661 static inline void *alloc_domain_mem(void)
662 {
663         return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
664 }
665
666 static void free_domain_mem(void *vaddr)
667 {
668         kmem_cache_free(iommu_domain_cache, vaddr);
669 }
670
671 static inline void * alloc_devinfo_mem(void)
672 {
673         return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
674 }
675
676 static inline void free_devinfo_mem(void *vaddr)
677 {
678         kmem_cache_free(iommu_devinfo_cache, vaddr);
679 }
680
681 static inline int domain_type_is_vm(struct dmar_domain *domain)
682 {
683         return domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE;
684 }
685
686 static inline int domain_type_is_si(struct dmar_domain *domain)
687 {
688         return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
689 }
690
691 static inline int domain_type_is_vm_or_si(struct dmar_domain *domain)
692 {
693         return domain->flags & (DOMAIN_FLAG_VIRTUAL_MACHINE |
694                                 DOMAIN_FLAG_STATIC_IDENTITY);
695 }
696
697 static inline int domain_pfn_supported(struct dmar_domain *domain,
698                                        unsigned long pfn)
699 {
700         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
701
702         return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
703 }
704
705 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
706 {
707         unsigned long sagaw;
708         int agaw = -1;
709
710         sagaw = cap_sagaw(iommu->cap);
711         for (agaw = width_to_agaw(max_gaw);
712              agaw >= 0; agaw--) {
713                 if (test_bit(agaw, &sagaw))
714                         break;
715         }
716
717         return agaw;
718 }
719
720 /*
721  * Calculate max SAGAW for each iommu.
722  */
723 int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
724 {
725         return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
726 }
727
728 /*
729  * calculate agaw for each iommu.
730  * "SAGAW" may be different across iommus, use a default agaw, and
731  * get a supported less agaw for iommus that don't support the default agaw.
732  */
733 int iommu_calculate_agaw(struct intel_iommu *iommu)
734 {
735         return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
736 }
737
738 /* This functionin only returns single iommu in a domain */
739 static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
740 {
741         int iommu_id;
742
743         /* si_domain and vm domain should not get here. */
744         BUG_ON(domain_type_is_vm_or_si(domain));
745         for_each_domain_iommu(iommu_id, domain)
746                 break;
747
748         if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
749                 return NULL;
750
751         return g_iommus[iommu_id];
752 }
753
754 static void domain_update_iommu_coherency(struct dmar_domain *domain)
755 {
756         struct dmar_drhd_unit *drhd;
757         struct intel_iommu *iommu;
758         bool found = false;
759         int i;
760
761         domain->iommu_coherency = 1;
762
763         for_each_domain_iommu(i, domain) {
764                 found = true;
765                 if (!ecap_coherent(g_iommus[i]->ecap)) {
766                         domain->iommu_coherency = 0;
767                         break;
768                 }
769         }
770         if (found)
771                 return;
772
773         /* No hardware attached; use lowest common denominator */
774         rcu_read_lock();
775         for_each_active_iommu(iommu, drhd) {
776                 if (!ecap_coherent(iommu->ecap)) {
777                         domain->iommu_coherency = 0;
778                         break;
779                 }
780         }
781         rcu_read_unlock();
782 }
783
784 static int domain_update_iommu_snooping(struct intel_iommu *skip)
785 {
786         struct dmar_drhd_unit *drhd;
787         struct intel_iommu *iommu;
788         int ret = 1;
789
790         rcu_read_lock();
791         for_each_active_iommu(iommu, drhd) {
792                 if (iommu != skip) {
793                         if (!ecap_sc_support(iommu->ecap)) {
794                                 ret = 0;
795                                 break;
796                         }
797                 }
798         }
799         rcu_read_unlock();
800
801         return ret;
802 }
803
804 static int domain_update_iommu_superpage(struct intel_iommu *skip)
805 {
806         struct dmar_drhd_unit *drhd;
807         struct intel_iommu *iommu;
808         int mask = 0xf;
809
810         if (!intel_iommu_superpage) {
811                 return 0;
812         }
813
814         /* set iommu_superpage to the smallest common denominator */
815         rcu_read_lock();
816         for_each_active_iommu(iommu, drhd) {
817                 if (iommu != skip) {
818                         mask &= cap_super_page_val(iommu->cap);
819                         if (!mask)
820                                 break;
821                 }
822         }
823         rcu_read_unlock();
824
825         return fls(mask);
826 }
827
828 /* Some capabilities may be different across iommus */
829 static void domain_update_iommu_cap(struct dmar_domain *domain)
830 {
831         domain_update_iommu_coherency(domain);
832         domain->iommu_snooping = domain_update_iommu_snooping(NULL);
833         domain->iommu_superpage = domain_update_iommu_superpage(NULL);
834 }
835
836 static inline struct context_entry *iommu_context_addr(struct intel_iommu *iommu,
837                                                        u8 bus, u8 devfn, int alloc)
838 {
839         struct root_entry *root = &iommu->root_entry[bus];
840         struct context_entry *context;
841         u64 *entry;
842
843         entry = &root->lo;
844         if (ecs_enabled(iommu)) {
845                 if (devfn >= 0x80) {
846                         devfn -= 0x80;
847                         entry = &root->hi;
848                 }
849                 devfn *= 2;
850         }
851         if (*entry & 1)
852                 context = phys_to_virt(*entry & VTD_PAGE_MASK);
853         else {
854                 unsigned long phy_addr;
855                 if (!alloc)
856                         return NULL;
857
858                 context = alloc_pgtable_page(iommu->node);
859                 if (!context)
860                         return NULL;
861
862                 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
863                 phy_addr = virt_to_phys((void *)context);
864                 *entry = phy_addr | 1;
865                 __iommu_flush_cache(iommu, entry, sizeof(*entry));
866         }
867         return &context[devfn];
868 }
869
870 static int iommu_dummy(struct device *dev)
871 {
872         return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
873 }
874
875 static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
876 {
877         struct dmar_drhd_unit *drhd = NULL;
878         struct intel_iommu *iommu;
879         struct device *tmp;
880         struct pci_dev *ptmp, *pdev = NULL;
881         u16 segment = 0;
882         int i;
883
884         if (iommu_dummy(dev))
885                 return NULL;
886
887         if (dev_is_pci(dev)) {
888                 struct pci_dev *pf_pdev;
889
890                 pdev = to_pci_dev(dev);
891                 /* VFs aren't listed in scope tables; we need to look up
892                  * the PF instead to find the IOMMU. */
893                 pf_pdev = pci_physfn(pdev);
894                 dev = &pf_pdev->dev;
895                 segment = pci_domain_nr(pdev->bus);
896         } else if (has_acpi_companion(dev))
897                 dev = &ACPI_COMPANION(dev)->dev;
898
899         rcu_read_lock();
900         for_each_active_iommu(iommu, drhd) {
901                 if (pdev && segment != drhd->segment)
902                         continue;
903
904                 for_each_active_dev_scope(drhd->devices,
905                                           drhd->devices_cnt, i, tmp) {
906                         if (tmp == dev) {
907                                 /* For a VF use its original BDF# not that of the PF
908                                  * which we used for the IOMMU lookup. Strictly speaking
909                                  * we could do this for all PCI devices; we only need to
910                                  * get the BDF# from the scope table for ACPI matches. */
911                                 if (pdev->is_virtfn)
912                                         goto got_pdev;
913
914                                 *bus = drhd->devices[i].bus;
915                                 *devfn = drhd->devices[i].devfn;
916                                 goto out;
917                         }
918
919                         if (!pdev || !dev_is_pci(tmp))
920                                 continue;
921
922                         ptmp = to_pci_dev(tmp);
923                         if (ptmp->subordinate &&
924                             ptmp->subordinate->number <= pdev->bus->number &&
925                             ptmp->subordinate->busn_res.end >= pdev->bus->number)
926                                 goto got_pdev;
927                 }
928
929                 if (pdev && drhd->include_all) {
930                 got_pdev:
931                         *bus = pdev->bus->number;
932                         *devfn = pdev->devfn;
933                         goto out;
934                 }
935         }
936         iommu = NULL;
937  out:
938         rcu_read_unlock();
939
940         return iommu;
941 }
942
943 static void domain_flush_cache(struct dmar_domain *domain,
944                                void *addr, int size)
945 {
946         if (!domain->iommu_coherency)
947                 clflush_cache_range(addr, size);
948 }
949
950 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
951 {
952         struct context_entry *context;
953         int ret = 0;
954         unsigned long flags;
955
956         spin_lock_irqsave(&iommu->lock, flags);
957         context = iommu_context_addr(iommu, bus, devfn, 0);
958         if (context)
959                 ret = context_present(context);
960         spin_unlock_irqrestore(&iommu->lock, flags);
961         return ret;
962 }
963
964 static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
965 {
966         struct context_entry *context;
967         unsigned long flags;
968
969         spin_lock_irqsave(&iommu->lock, flags);
970         context = iommu_context_addr(iommu, bus, devfn, 0);
971         if (context) {
972                 context_clear_entry(context);
973                 __iommu_flush_cache(iommu, context, sizeof(*context));
974         }
975         spin_unlock_irqrestore(&iommu->lock, flags);
976 }
977
978 static void free_context_table(struct intel_iommu *iommu)
979 {
980         int i;
981         unsigned long flags;
982         struct context_entry *context;
983
984         spin_lock_irqsave(&iommu->lock, flags);
985         if (!iommu->root_entry) {
986                 goto out;
987         }
988         for (i = 0; i < ROOT_ENTRY_NR; i++) {
989                 context = iommu_context_addr(iommu, i, 0, 0);
990                 if (context)
991                         free_pgtable_page(context);
992
993                 if (!ecs_enabled(iommu))
994                         continue;
995
996                 context = iommu_context_addr(iommu, i, 0x80, 0);
997                 if (context)
998                         free_pgtable_page(context);
999
1000         }
1001         free_pgtable_page(iommu->root_entry);
1002         iommu->root_entry = NULL;
1003 out:
1004         spin_unlock_irqrestore(&iommu->lock, flags);
1005 }
1006
1007 static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
1008                                       unsigned long pfn, int *target_level)
1009 {
1010         struct dma_pte *parent, *pte = NULL;
1011         int level = agaw_to_level(domain->agaw);
1012         int offset;
1013
1014         BUG_ON(!domain->pgd);
1015
1016         if (!domain_pfn_supported(domain, pfn))
1017                 /* Address beyond IOMMU's addressing capabilities. */
1018                 return NULL;
1019
1020         parent = domain->pgd;
1021
1022         while (1) {
1023                 void *tmp_page;
1024
1025                 offset = pfn_level_offset(pfn, level);
1026                 pte = &parent[offset];
1027                 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
1028                         break;
1029                 if (level == *target_level)
1030                         break;
1031
1032                 if (!dma_pte_present(pte)) {
1033                         uint64_t pteval;
1034
1035                         tmp_page = alloc_pgtable_page(domain->nid);
1036
1037                         if (!tmp_page)
1038                                 return NULL;
1039
1040                         domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
1041                         pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
1042                         if (cmpxchg64(&pte->val, 0ULL, pteval))
1043                                 /* Someone else set it while we were thinking; use theirs. */
1044                                 free_pgtable_page(tmp_page);
1045                         else
1046                                 domain_flush_cache(domain, pte, sizeof(*pte));
1047                 }
1048                 if (level == 1)
1049                         break;
1050
1051                 parent = phys_to_virt(dma_pte_addr(pte));
1052                 level--;
1053         }
1054
1055         if (!*target_level)
1056                 *target_level = level;
1057
1058         return pte;
1059 }
1060
1061
1062 /* return address's pte at specific level */
1063 static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
1064                                          unsigned long pfn,
1065                                          int level, int *large_page)
1066 {
1067         struct dma_pte *parent, *pte = NULL;
1068         int total = agaw_to_level(domain->agaw);
1069         int offset;
1070
1071         parent = domain->pgd;
1072         while (level <= total) {
1073                 offset = pfn_level_offset(pfn, total);
1074                 pte = &parent[offset];
1075                 if (level == total)
1076                         return pte;
1077
1078                 if (!dma_pte_present(pte)) {
1079                         *large_page = total;
1080                         break;
1081                 }
1082
1083                 if (dma_pte_superpage(pte)) {
1084                         *large_page = total;
1085                         return pte;
1086                 }
1087
1088                 parent = phys_to_virt(dma_pte_addr(pte));
1089                 total--;
1090         }
1091         return NULL;
1092 }
1093
1094 /* clear last level pte, a tlb flush should be followed */
1095 static void dma_pte_clear_range(struct dmar_domain *domain,
1096                                 unsigned long start_pfn,
1097                                 unsigned long last_pfn)
1098 {
1099         unsigned int large_page = 1;
1100         struct dma_pte *first_pte, *pte;
1101
1102         BUG_ON(!domain_pfn_supported(domain, start_pfn));
1103         BUG_ON(!domain_pfn_supported(domain, last_pfn));
1104         BUG_ON(start_pfn > last_pfn);
1105
1106         /* we don't need lock here; nobody else touches the iova range */
1107         do {
1108                 large_page = 1;
1109                 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
1110                 if (!pte) {
1111                         start_pfn = align_to_level(start_pfn + 1, large_page + 1);
1112                         continue;
1113                 }
1114                 do {
1115                         dma_clear_pte(pte);
1116                         start_pfn += lvl_to_nr_pages(large_page);
1117                         pte++;
1118                 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
1119
1120                 domain_flush_cache(domain, first_pte,
1121                                    (void *)pte - (void *)first_pte);
1122
1123         } while (start_pfn && start_pfn <= last_pfn);
1124 }
1125
1126 static void dma_pte_free_level(struct dmar_domain *domain, int level,
1127                                struct dma_pte *pte, unsigned long pfn,
1128                                unsigned long start_pfn, unsigned long last_pfn)
1129 {
1130         pfn = max(start_pfn, pfn);
1131         pte = &pte[pfn_level_offset(pfn, level)];
1132
1133         do {
1134                 unsigned long level_pfn;
1135                 struct dma_pte *level_pte;
1136
1137                 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
1138                         goto next;
1139
1140                 level_pfn = pfn & level_mask(level - 1);
1141                 level_pte = phys_to_virt(dma_pte_addr(pte));
1142
1143                 if (level > 2)
1144                         dma_pte_free_level(domain, level - 1, level_pte,
1145                                            level_pfn, start_pfn, last_pfn);
1146
1147                 /* If range covers entire pagetable, free it */
1148                 if (!(start_pfn > level_pfn ||
1149                       last_pfn < level_pfn + level_size(level) - 1)) {
1150                         dma_clear_pte(pte);
1151                         domain_flush_cache(domain, pte, sizeof(*pte));
1152                         free_pgtable_page(level_pte);
1153                 }
1154 next:
1155                 pfn += level_size(level);
1156         } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1157 }
1158
1159 /* free page table pages. last level pte should already be cleared */
1160 static void dma_pte_free_pagetable(struct dmar_domain *domain,
1161                                    unsigned long start_pfn,
1162                                    unsigned long last_pfn)
1163 {
1164         BUG_ON(!domain_pfn_supported(domain, start_pfn));
1165         BUG_ON(!domain_pfn_supported(domain, last_pfn));
1166         BUG_ON(start_pfn > last_pfn);
1167
1168         dma_pte_clear_range(domain, start_pfn, last_pfn);
1169
1170         /* We don't need lock here; nobody else touches the iova range */
1171         dma_pte_free_level(domain, agaw_to_level(domain->agaw),
1172                            domain->pgd, 0, start_pfn, last_pfn);
1173
1174         /* free pgd */
1175         if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1176                 free_pgtable_page(domain->pgd);
1177                 domain->pgd = NULL;
1178         }
1179 }
1180
1181 /* When a page at a given level is being unlinked from its parent, we don't
1182    need to *modify* it at all. All we need to do is make a list of all the
1183    pages which can be freed just as soon as we've flushed the IOTLB and we
1184    know the hardware page-walk will no longer touch them.
1185    The 'pte' argument is the *parent* PTE, pointing to the page that is to
1186    be freed. */
1187 static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1188                                             int level, struct dma_pte *pte,
1189                                             struct page *freelist)
1190 {
1191         struct page *pg;
1192
1193         pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1194         pg->freelist = freelist;
1195         freelist = pg;
1196
1197         if (level == 1)
1198                 return freelist;
1199
1200         pte = page_address(pg);
1201         do {
1202                 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1203                         freelist = dma_pte_list_pagetables(domain, level - 1,
1204                                                            pte, freelist);
1205                 pte++;
1206         } while (!first_pte_in_page(pte));
1207
1208         return freelist;
1209 }
1210
1211 static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1212                                         struct dma_pte *pte, unsigned long pfn,
1213                                         unsigned long start_pfn,
1214                                         unsigned long last_pfn,
1215                                         struct page *freelist)
1216 {
1217         struct dma_pte *first_pte = NULL, *last_pte = NULL;
1218
1219         pfn = max(start_pfn, pfn);
1220         pte = &pte[pfn_level_offset(pfn, level)];
1221
1222         do {
1223                 unsigned long level_pfn;
1224
1225                 if (!dma_pte_present(pte))
1226                         goto next;
1227
1228                 level_pfn = pfn & level_mask(level);
1229
1230                 /* If range covers entire pagetable, free it */
1231                 if (start_pfn <= level_pfn &&
1232                     last_pfn >= level_pfn + level_size(level) - 1) {
1233                         /* These suborbinate page tables are going away entirely. Don't
1234                            bother to clear them; we're just going to *free* them. */
1235                         if (level > 1 && !dma_pte_superpage(pte))
1236                                 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1237
1238                         dma_clear_pte(pte);
1239                         if (!first_pte)
1240                                 first_pte = pte;
1241                         last_pte = pte;
1242                 } else if (level > 1) {
1243                         /* Recurse down into a level that isn't *entirely* obsolete */
1244                         freelist = dma_pte_clear_level(domain, level - 1,
1245                                                        phys_to_virt(dma_pte_addr(pte)),
1246                                                        level_pfn, start_pfn, last_pfn,
1247                                                        freelist);
1248                 }
1249 next:
1250                 pfn += level_size(level);
1251         } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1252
1253         if (first_pte)
1254                 domain_flush_cache(domain, first_pte,
1255                                    (void *)++last_pte - (void *)first_pte);
1256
1257         return freelist;
1258 }
1259
1260 /* We can't just free the pages because the IOMMU may still be walking
1261    the page tables, and may have cached the intermediate levels. The
1262    pages can only be freed after the IOTLB flush has been done. */
1263 static struct page *domain_unmap(struct dmar_domain *domain,
1264                                  unsigned long start_pfn,
1265                                  unsigned long last_pfn)
1266 {
1267         struct page *freelist = NULL;
1268
1269         BUG_ON(!domain_pfn_supported(domain, start_pfn));
1270         BUG_ON(!domain_pfn_supported(domain, last_pfn));
1271         BUG_ON(start_pfn > last_pfn);
1272
1273         /* we don't need lock here; nobody else touches the iova range */
1274         freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1275                                        domain->pgd, 0, start_pfn, last_pfn, NULL);
1276
1277         /* free pgd */
1278         if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1279                 struct page *pgd_page = virt_to_page(domain->pgd);
1280                 pgd_page->freelist = freelist;
1281                 freelist = pgd_page;
1282
1283                 domain->pgd = NULL;
1284         }
1285
1286         return freelist;
1287 }
1288
1289 static void dma_free_pagelist(struct page *freelist)
1290 {
1291         struct page *pg;
1292
1293         while ((pg = freelist)) {
1294                 freelist = pg->freelist;
1295                 free_pgtable_page(page_address(pg));
1296         }
1297 }
1298
1299 /* iommu handling */
1300 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1301 {
1302         struct root_entry *root;
1303         unsigned long flags;
1304
1305         root = (struct root_entry *)alloc_pgtable_page(iommu->node);
1306         if (!root) {
1307                 pr_err("Allocating root entry for %s failed\n",
1308                         iommu->name);
1309                 return -ENOMEM;
1310         }
1311
1312         __iommu_flush_cache(iommu, root, ROOT_SIZE);
1313
1314         spin_lock_irqsave(&iommu->lock, flags);
1315         iommu->root_entry = root;
1316         spin_unlock_irqrestore(&iommu->lock, flags);
1317
1318         return 0;
1319 }
1320
1321 static void iommu_set_root_entry(struct intel_iommu *iommu)
1322 {
1323         u64 addr;
1324         u32 sts;
1325         unsigned long flag;
1326
1327         addr = virt_to_phys(iommu->root_entry);
1328         if (ecs_enabled(iommu))
1329                 addr |= DMA_RTADDR_RTT;
1330
1331         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1332         dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
1333
1334         writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
1335
1336         /* Make sure hardware complete it */
1337         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1338                       readl, (sts & DMA_GSTS_RTPS), sts);
1339
1340         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1341 }
1342
1343 static void iommu_flush_write_buffer(struct intel_iommu *iommu)
1344 {
1345         u32 val;
1346         unsigned long flag;
1347
1348         if (!rwbf_quirk && !cap_rwbf(iommu->cap))
1349                 return;
1350
1351         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1352         writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
1353
1354         /* Make sure hardware complete it */
1355         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1356                       readl, (!(val & DMA_GSTS_WBFS)), val);
1357
1358         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1359 }
1360
1361 /* return value determine if we need a write buffer flush */
1362 static void __iommu_flush_context(struct intel_iommu *iommu,
1363                                   u16 did, u16 source_id, u8 function_mask,
1364                                   u64 type)
1365 {
1366         u64 val = 0;
1367         unsigned long flag;
1368
1369         switch (type) {
1370         case DMA_CCMD_GLOBAL_INVL:
1371                 val = DMA_CCMD_GLOBAL_INVL;
1372                 break;
1373         case DMA_CCMD_DOMAIN_INVL:
1374                 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1375                 break;
1376         case DMA_CCMD_DEVICE_INVL:
1377                 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1378                         | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1379                 break;
1380         default:
1381                 BUG();
1382         }
1383         val |= DMA_CCMD_ICC;
1384
1385         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1386         dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1387
1388         /* Make sure hardware complete it */
1389         IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1390                 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1391
1392         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1393 }
1394
1395 /* return value determine if we need a write buffer flush */
1396 static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1397                                 u64 addr, unsigned int size_order, u64 type)
1398 {
1399         int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1400         u64 val = 0, val_iva = 0;
1401         unsigned long flag;
1402
1403         switch (type) {
1404         case DMA_TLB_GLOBAL_FLUSH:
1405                 /* global flush doesn't need set IVA_REG */
1406                 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1407                 break;
1408         case DMA_TLB_DSI_FLUSH:
1409                 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1410                 break;
1411         case DMA_TLB_PSI_FLUSH:
1412                 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1413                 /* IH bit is passed in as part of address */
1414                 val_iva = size_order | addr;
1415                 break;
1416         default:
1417                 BUG();
1418         }
1419         /* Note: set drain read/write */
1420 #if 0
1421         /*
1422          * This is probably to be super secure.. Looks like we can
1423          * ignore it without any impact.
1424          */
1425         if (cap_read_drain(iommu->cap))
1426                 val |= DMA_TLB_READ_DRAIN;
1427 #endif
1428         if (cap_write_drain(iommu->cap))
1429                 val |= DMA_TLB_WRITE_DRAIN;
1430
1431         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1432         /* Note: Only uses first TLB reg currently */
1433         if (val_iva)
1434                 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1435         dmar_writeq(iommu->reg + tlb_offset + 8, val);
1436
1437         /* Make sure hardware complete it */
1438         IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1439                 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1440
1441         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1442
1443         /* check IOTLB invalidation granularity */
1444         if (DMA_TLB_IAIG(val) == 0)
1445                 pr_err("Flush IOTLB failed\n");
1446         if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1447                 pr_debug("TLB flush request %Lx, actual %Lx\n",
1448                         (unsigned long long)DMA_TLB_IIRG(type),
1449                         (unsigned long long)DMA_TLB_IAIG(val));
1450 }
1451
1452 static struct device_domain_info *
1453 iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1454                          u8 bus, u8 devfn)
1455 {
1456         struct device_domain_info *info;
1457
1458         assert_spin_locked(&device_domain_lock);
1459
1460         if (!iommu->qi)
1461                 return NULL;
1462
1463         list_for_each_entry(info, &domain->devices, link)
1464                 if (info->iommu == iommu && info->bus == bus &&
1465                     info->devfn == devfn) {
1466                         if (info->ats_supported && info->dev)
1467                                 return info;
1468                         break;
1469                 }
1470
1471         return NULL;
1472 }
1473
1474 static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1475 {
1476         struct pci_dev *pdev;
1477
1478         if (!info || !dev_is_pci(info->dev))
1479                 return;
1480
1481         pdev = to_pci_dev(info->dev);
1482
1483 #ifdef CONFIG_INTEL_IOMMU_SVM
1484         /* The PCIe spec, in its wisdom, declares that the behaviour of
1485            the device if you enable PASID support after ATS support is
1486            undefined. So always enable PASID support on devices which
1487            have it, even if we can't yet know if we're ever going to
1488            use it. */
1489         if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
1490                 info->pasid_enabled = 1;
1491
1492         if (info->pri_supported && !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32))
1493                 info->pri_enabled = 1;
1494 #endif
1495         if (info->ats_supported && !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
1496                 info->ats_enabled = 1;
1497                 info->ats_qdep = pci_ats_queue_depth(pdev);
1498         }
1499 }
1500
1501 static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1502 {
1503         struct pci_dev *pdev;
1504
1505         if (!dev_is_pci(info->dev))
1506                 return;
1507
1508         pdev = to_pci_dev(info->dev);
1509
1510         if (info->ats_enabled) {
1511                 pci_disable_ats(pdev);
1512                 info->ats_enabled = 0;
1513         }
1514 #ifdef CONFIG_INTEL_IOMMU_SVM
1515         if (info->pri_enabled) {
1516                 pci_disable_pri(pdev);
1517                 info->pri_enabled = 0;
1518         }
1519         if (info->pasid_enabled) {
1520                 pci_disable_pasid(pdev);
1521                 info->pasid_enabled = 0;
1522         }
1523 #endif
1524 }
1525
1526 static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1527                                   u64 addr, unsigned mask)
1528 {
1529         u16 sid, qdep;
1530         unsigned long flags;
1531         struct device_domain_info *info;
1532
1533         spin_lock_irqsave(&device_domain_lock, flags);
1534         list_for_each_entry(info, &domain->devices, link) {
1535                 if (!info->ats_enabled)
1536                         continue;
1537
1538                 sid = info->bus << 8 | info->devfn;
1539                 qdep = info->ats_qdep;
1540                 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1541         }
1542         spin_unlock_irqrestore(&device_domain_lock, flags);
1543 }
1544
1545 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
1546                                   struct dmar_domain *domain,
1547                                   unsigned long pfn, unsigned int pages,
1548                                   int ih, int map)
1549 {
1550         unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1551         uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1552         u16 did = domain->iommu_did[iommu->seq_id];
1553
1554         BUG_ON(pages == 0);
1555
1556         if (ih)
1557                 ih = 1 << 6;
1558         /*
1559          * Fallback to domain selective flush if no PSI support or the size is
1560          * too big.
1561          * PSI requires page size to be 2 ^ x, and the base address is naturally
1562          * aligned to the size
1563          */
1564         if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1565                 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1566                                                 DMA_TLB_DSI_FLUSH);
1567         else
1568                 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
1569                                                 DMA_TLB_PSI_FLUSH);
1570
1571         /*
1572          * In caching mode, changes of pages from non-present to present require
1573          * flush. However, device IOTLB doesn't need to be flushed in this case.
1574          */
1575         if (!cap_caching_mode(iommu->cap) || !map)
1576                 iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
1577                                       addr, mask);
1578 }
1579
1580 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1581 {
1582         u32 pmen;
1583         unsigned long flags;
1584
1585         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1586         pmen = readl(iommu->reg + DMAR_PMEN_REG);
1587         pmen &= ~DMA_PMEN_EPM;
1588         writel(pmen, iommu->reg + DMAR_PMEN_REG);
1589
1590         /* wait for the protected region status bit to clear */
1591         IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1592                 readl, !(pmen & DMA_PMEN_PRS), pmen);
1593
1594         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1595 }
1596
1597 static void iommu_enable_translation(struct intel_iommu *iommu)
1598 {
1599         u32 sts;
1600         unsigned long flags;
1601
1602         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1603         iommu->gcmd |= DMA_GCMD_TE;
1604         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1605
1606         /* Make sure hardware complete it */
1607         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1608                       readl, (sts & DMA_GSTS_TES), sts);
1609
1610         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1611 }
1612
1613 static void iommu_disable_translation(struct intel_iommu *iommu)
1614 {
1615         u32 sts;
1616         unsigned long flag;
1617
1618         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1619         iommu->gcmd &= ~DMA_GCMD_TE;
1620         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1621
1622         /* Make sure hardware complete it */
1623         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1624                       readl, (!(sts & DMA_GSTS_TES)), sts);
1625
1626         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1627 }
1628
1629
1630 static int iommu_init_domains(struct intel_iommu *iommu)
1631 {
1632         u32 ndomains, nlongs;
1633         size_t size;
1634
1635         ndomains = cap_ndoms(iommu->cap);
1636         pr_debug("%s: Number of Domains supported <%d>\n",
1637                  iommu->name, ndomains);
1638         nlongs = BITS_TO_LONGS(ndomains);
1639
1640         spin_lock_init(&iommu->lock);
1641
1642         iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1643         if (!iommu->domain_ids) {
1644                 pr_err("%s: Allocating domain id array failed\n",
1645                        iommu->name);
1646                 return -ENOMEM;
1647         }
1648
1649         size = ((ndomains >> 8) + 1) * sizeof(struct dmar_domain **);
1650         iommu->domains = kzalloc(size, GFP_KERNEL);
1651
1652         if (iommu->domains) {
1653                 size = 256 * sizeof(struct dmar_domain *);
1654                 iommu->domains[0] = kzalloc(size, GFP_KERNEL);
1655         }
1656
1657         if (!iommu->domains || !iommu->domains[0]) {
1658                 pr_err("%s: Allocating domain array failed\n",
1659                        iommu->name);
1660                 kfree(iommu->domain_ids);
1661                 kfree(iommu->domains);
1662                 iommu->domain_ids = NULL;
1663                 iommu->domains    = NULL;
1664                 return -ENOMEM;
1665         }
1666
1667
1668
1669         /*
1670          * If Caching mode is set, then invalid translations are tagged
1671          * with domain-id 0, hence we need to pre-allocate it. We also
1672          * use domain-id 0 as a marker for non-allocated domain-id, so
1673          * make sure it is not used for a real domain.
1674          */
1675         set_bit(0, iommu->domain_ids);
1676
1677         return 0;
1678 }
1679
1680 static void disable_dmar_iommu(struct intel_iommu *iommu)
1681 {
1682         struct device_domain_info *info, *tmp;
1683         unsigned long flags;
1684
1685         if (!iommu->domains || !iommu->domain_ids)
1686                 return;
1687
1688 again:
1689         spin_lock_irqsave(&device_domain_lock, flags);
1690         list_for_each_entry_safe(info, tmp, &device_domain_list, global) {
1691                 struct dmar_domain *domain;
1692
1693                 if (info->iommu != iommu)
1694                         continue;
1695
1696                 if (!info->dev || !info->domain)
1697                         continue;
1698
1699                 domain = info->domain;
1700
1701                 __dmar_remove_one_dev_info(info);
1702
1703                 if (!domain_type_is_vm_or_si(domain)) {
1704                         /*
1705                          * The domain_exit() function  can't be called under
1706                          * device_domain_lock, as it takes this lock itself.
1707                          * So release the lock here and re-run the loop
1708                          * afterwards.
1709                          */
1710                         spin_unlock_irqrestore(&device_domain_lock, flags);
1711                         domain_exit(domain);
1712                         goto again;
1713                 }
1714         }
1715         spin_unlock_irqrestore(&device_domain_lock, flags);
1716
1717         if (iommu->gcmd & DMA_GCMD_TE)
1718                 iommu_disable_translation(iommu);
1719 }
1720
1721 static void free_dmar_iommu(struct intel_iommu *iommu)
1722 {
1723         if ((iommu->domains) && (iommu->domain_ids)) {
1724                 int elems = (cap_ndoms(iommu->cap) >> 8) + 1;
1725                 int i;
1726
1727                 for (i = 0; i < elems; i++)
1728                         kfree(iommu->domains[i]);
1729                 kfree(iommu->domains);
1730                 kfree(iommu->domain_ids);
1731                 iommu->domains = NULL;
1732                 iommu->domain_ids = NULL;
1733         }
1734
1735         g_iommus[iommu->seq_id] = NULL;
1736
1737         /* free context mapping */
1738         free_context_table(iommu);
1739
1740 #ifdef CONFIG_INTEL_IOMMU_SVM
1741         if (pasid_enabled(iommu)) {
1742                 if (ecap_prs(iommu->ecap))
1743                         intel_svm_finish_prq(iommu);
1744                 intel_svm_free_pasid_tables(iommu);
1745         }
1746 #endif
1747 }
1748
1749 static struct dmar_domain *alloc_domain(int flags)
1750 {
1751         struct dmar_domain *domain;
1752
1753         domain = alloc_domain_mem();
1754         if (!domain)
1755                 return NULL;
1756
1757         memset(domain, 0, sizeof(*domain));
1758         domain->nid = -1;
1759         domain->flags = flags;
1760         INIT_LIST_HEAD(&domain->devices);
1761
1762         return domain;
1763 }
1764
1765 /* Must be called with iommu->lock */
1766 static int domain_attach_iommu(struct dmar_domain *domain,
1767                                struct intel_iommu *iommu)
1768 {
1769         unsigned long ndomains;
1770         int num;
1771
1772         assert_spin_locked(&device_domain_lock);
1773         assert_spin_locked(&iommu->lock);
1774
1775         domain->iommu_refcnt[iommu->seq_id] += 1;
1776         domain->iommu_count += 1;
1777         if (domain->iommu_refcnt[iommu->seq_id] == 1) {
1778                 ndomains = cap_ndoms(iommu->cap);
1779                 num      = find_first_zero_bit(iommu->domain_ids, ndomains);
1780
1781                 if (num >= ndomains) {
1782                         pr_err("%s: No free domain ids\n", iommu->name);
1783                         domain->iommu_refcnt[iommu->seq_id] -= 1;
1784                         domain->iommu_count -= 1;
1785                         return -ENOSPC;
1786                 }
1787
1788                 set_bit(num, iommu->domain_ids);
1789                 set_iommu_domain(iommu, num, domain);
1790
1791                 domain->iommu_did[iommu->seq_id] = num;
1792                 domain->nid                      = iommu->node;
1793
1794                 domain_update_iommu_cap(domain);
1795         }
1796
1797         return 0;
1798 }
1799
1800 static int domain_detach_iommu(struct dmar_domain *domain,
1801                                struct intel_iommu *iommu)
1802 {
1803         int num, count = INT_MAX;
1804
1805         assert_spin_locked(&device_domain_lock);
1806         assert_spin_locked(&iommu->lock);
1807
1808         domain->iommu_refcnt[iommu->seq_id] -= 1;
1809         count = --domain->iommu_count;
1810         if (domain->iommu_refcnt[iommu->seq_id] == 0) {
1811                 num = domain->iommu_did[iommu->seq_id];
1812                 clear_bit(num, iommu->domain_ids);
1813                 set_iommu_domain(iommu, num, NULL);
1814
1815                 domain_update_iommu_cap(domain);
1816                 domain->iommu_did[iommu->seq_id] = 0;
1817         }
1818
1819         return count;
1820 }
1821
1822 static struct iova_domain reserved_iova_list;
1823 static struct lock_class_key reserved_rbtree_key;
1824
1825 static int dmar_init_reserved_ranges(void)
1826 {
1827         struct pci_dev *pdev = NULL;
1828         struct iova *iova;
1829         int i;
1830
1831         init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN,
1832                         DMA_32BIT_PFN);
1833
1834         lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1835                 &reserved_rbtree_key);
1836
1837         /* IOAPIC ranges shouldn't be accessed by DMA */
1838         iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1839                 IOVA_PFN(IOAPIC_RANGE_END));
1840         if (!iova) {
1841                 pr_err("Reserve IOAPIC range failed\n");
1842                 return -ENODEV;
1843         }
1844
1845         /* Reserve all PCI MMIO to avoid peer-to-peer access */
1846         for_each_pci_dev(pdev) {
1847                 struct resource *r;
1848
1849                 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1850                         r = &pdev->resource[i];
1851                         if (!r->flags || !(r->flags & IORESOURCE_MEM))
1852                                 continue;
1853                         iova = reserve_iova(&reserved_iova_list,
1854                                             IOVA_PFN(r->start),
1855                                             IOVA_PFN(r->end));
1856                         if (!iova) {
1857                                 pr_err("Reserve iova failed\n");
1858                                 return -ENODEV;
1859                         }
1860                 }
1861         }
1862         return 0;
1863 }
1864
1865 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1866 {
1867         copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1868 }
1869
1870 static inline int guestwidth_to_adjustwidth(int gaw)
1871 {
1872         int agaw;
1873         int r = (gaw - 12) % 9;
1874
1875         if (r == 0)
1876                 agaw = gaw;
1877         else
1878                 agaw = gaw + 9 - r;
1879         if (agaw > 64)
1880                 agaw = 64;
1881         return agaw;
1882 }
1883
1884 static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
1885                        int guest_width)
1886 {
1887         int adjust_width, agaw;
1888         unsigned long sagaw;
1889
1890         init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN,
1891                         DMA_32BIT_PFN);
1892         domain_reserve_special_ranges(domain);
1893
1894         /* calculate AGAW */
1895         if (guest_width > cap_mgaw(iommu->cap))
1896                 guest_width = cap_mgaw(iommu->cap);
1897         domain->gaw = guest_width;
1898         adjust_width = guestwidth_to_adjustwidth(guest_width);
1899         agaw = width_to_agaw(adjust_width);
1900         sagaw = cap_sagaw(iommu->cap);
1901         if (!test_bit(agaw, &sagaw)) {
1902                 /* hardware doesn't support it, choose a bigger one */
1903                 pr_debug("Hardware doesn't support agaw %d\n", agaw);
1904                 agaw = find_next_bit(&sagaw, 5, agaw);
1905                 if (agaw >= 5)
1906                         return -ENODEV;
1907         }
1908         domain->agaw = agaw;
1909
1910         if (ecap_coherent(iommu->ecap))
1911                 domain->iommu_coherency = 1;
1912         else
1913                 domain->iommu_coherency = 0;
1914
1915         if (ecap_sc_support(iommu->ecap))
1916                 domain->iommu_snooping = 1;
1917         else
1918                 domain->iommu_snooping = 0;
1919
1920         if (intel_iommu_superpage)
1921                 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1922         else
1923                 domain->iommu_superpage = 0;
1924
1925         domain->nid = iommu->node;
1926
1927         /* always allocate the top pgd */
1928         domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
1929         if (!domain->pgd)
1930                 return -ENOMEM;
1931         __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1932         return 0;
1933 }
1934
1935 static void domain_exit(struct dmar_domain *domain)
1936 {
1937         struct page *freelist = NULL;
1938
1939         /* Domain 0 is reserved, so dont process it */
1940         if (!domain)
1941                 return;
1942
1943         /* Flush any lazy unmaps that may reference this domain */
1944         if (!intel_iommu_strict)
1945                 flush_unmaps_timeout(0);
1946
1947         /* Remove associated devices and clear attached or cached domains */
1948         rcu_read_lock();
1949         domain_remove_dev_info(domain);
1950         rcu_read_unlock();
1951
1952         /* destroy iovas */
1953         put_iova_domain(&domain->iovad);
1954
1955         freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1956
1957         dma_free_pagelist(freelist);
1958
1959         free_domain_mem(domain);
1960 }
1961
1962 static int domain_context_mapping_one(struct dmar_domain *domain,
1963                                       struct intel_iommu *iommu,
1964                                       u8 bus, u8 devfn)
1965 {
1966         u16 did = domain->iommu_did[iommu->seq_id];
1967         int translation = CONTEXT_TT_MULTI_LEVEL;
1968         struct device_domain_info *info = NULL;
1969         struct context_entry *context;
1970         unsigned long flags;
1971         struct dma_pte *pgd;
1972         int ret, agaw;
1973
1974         WARN_ON(did == 0);
1975
1976         if (hw_pass_through && domain_type_is_si(domain))
1977                 translation = CONTEXT_TT_PASS_THROUGH;
1978
1979         pr_debug("Set context mapping for %02x:%02x.%d\n",
1980                 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1981
1982         BUG_ON(!domain->pgd);
1983
1984         spin_lock_irqsave(&device_domain_lock, flags);
1985         spin_lock(&iommu->lock);
1986
1987         ret = -ENOMEM;
1988         context = iommu_context_addr(iommu, bus, devfn, 1);
1989         if (!context)
1990                 goto out_unlock;
1991
1992         ret = 0;
1993         if (context_present(context))
1994                 goto out_unlock;
1995
1996         /*
1997          * For kdump cases, old valid entries may be cached due to the
1998          * in-flight DMA and copied pgtable, but there is no unmapping
1999          * behaviour for them, thus we need an explicit cache flush for
2000          * the newly-mapped device. For kdump, at this point, the device
2001          * is supposed to finish reset at its driver probe stage, so no
2002          * in-flight DMA will exist, and we don't need to worry anymore
2003          * hereafter.
2004          */
2005         if (context_copied(context)) {
2006                 u16 did_old = context_domain_id(context);
2007
2008                 if (did_old >= 0 && did_old < cap_ndoms(iommu->cap))
2009                         iommu->flush.flush_context(iommu, did_old,
2010                                                    (((u16)bus) << 8) | devfn,
2011                                                    DMA_CCMD_MASK_NOBIT,
2012                                                    DMA_CCMD_DEVICE_INVL);
2013         }
2014
2015         pgd = domain->pgd;
2016
2017         context_clear_entry(context);
2018         context_set_domain_id(context, did);
2019
2020         /*
2021          * Skip top levels of page tables for iommu which has less agaw
2022          * than default.  Unnecessary for PT mode.
2023          */
2024         if (translation != CONTEXT_TT_PASS_THROUGH) {
2025                 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
2026                         ret = -ENOMEM;
2027                         pgd = phys_to_virt(dma_pte_addr(pgd));
2028                         if (!dma_pte_present(pgd))
2029                                 goto out_unlock;
2030                 }
2031
2032                 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2033                 if (info && info->ats_supported)
2034                         translation = CONTEXT_TT_DEV_IOTLB;
2035                 else
2036                         translation = CONTEXT_TT_MULTI_LEVEL;
2037
2038                 context_set_address_root(context, virt_to_phys(pgd));
2039                 context_set_address_width(context, iommu->agaw);
2040         } else {
2041                 /*
2042                  * In pass through mode, AW must be programmed to
2043                  * indicate the largest AGAW value supported by
2044                  * hardware. And ASR is ignored by hardware.
2045                  */
2046                 context_set_address_width(context, iommu->msagaw);
2047         }
2048
2049         context_set_translation_type(context, translation);
2050         context_set_fault_enable(context);
2051         context_set_present(context);
2052         domain_flush_cache(domain, context, sizeof(*context));
2053
2054         /*
2055          * It's a non-present to present mapping. If hardware doesn't cache
2056          * non-present entry we only need to flush the write-buffer. If the
2057          * _does_ cache non-present entries, then it does so in the special
2058          * domain #0, which we have to flush:
2059          */
2060         if (cap_caching_mode(iommu->cap)) {
2061                 iommu->flush.flush_context(iommu, 0,
2062                                            (((u16)bus) << 8) | devfn,
2063                                            DMA_CCMD_MASK_NOBIT,
2064                                            DMA_CCMD_DEVICE_INVL);
2065                 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
2066         } else {
2067                 iommu_flush_write_buffer(iommu);
2068         }
2069         iommu_enable_dev_iotlb(info);
2070
2071         ret = 0;
2072
2073 out_unlock:
2074         spin_unlock(&iommu->lock);
2075         spin_unlock_irqrestore(&device_domain_lock, flags);
2076
2077         return ret;
2078 }
2079
2080 struct domain_context_mapping_data {
2081         struct dmar_domain *domain;
2082         struct intel_iommu *iommu;
2083 };
2084
2085 static int domain_context_mapping_cb(struct pci_dev *pdev,
2086                                      u16 alias, void *opaque)
2087 {
2088         struct domain_context_mapping_data *data = opaque;
2089
2090         return domain_context_mapping_one(data->domain, data->iommu,
2091                                           PCI_BUS_NUM(alias), alias & 0xff);
2092 }
2093
2094 static int
2095 domain_context_mapping(struct dmar_domain *domain, struct device *dev)
2096 {
2097         struct intel_iommu *iommu;
2098         u8 bus, devfn;
2099         struct domain_context_mapping_data data;
2100
2101         iommu = device_to_iommu(dev, &bus, &devfn);
2102         if (!iommu)
2103                 return -ENODEV;
2104
2105         if (!dev_is_pci(dev))
2106                 return domain_context_mapping_one(domain, iommu, bus, devfn);
2107
2108         data.domain = domain;
2109         data.iommu = iommu;
2110
2111         return pci_for_each_dma_alias(to_pci_dev(dev),
2112                                       &domain_context_mapping_cb, &data);
2113 }
2114
2115 static int domain_context_mapped_cb(struct pci_dev *pdev,
2116                                     u16 alias, void *opaque)
2117 {
2118         struct intel_iommu *iommu = opaque;
2119
2120         return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
2121 }
2122
2123 static int domain_context_mapped(struct device *dev)
2124 {
2125         struct intel_iommu *iommu;
2126         u8 bus, devfn;
2127
2128         iommu = device_to_iommu(dev, &bus, &devfn);
2129         if (!iommu)
2130                 return -ENODEV;
2131
2132         if (!dev_is_pci(dev))
2133                 return device_context_mapped(iommu, bus, devfn);
2134
2135         return !pci_for_each_dma_alias(to_pci_dev(dev),
2136                                        domain_context_mapped_cb, iommu);
2137 }
2138
2139 /* Returns a number of VTD pages, but aligned to MM page size */
2140 static inline unsigned long aligned_nrpages(unsigned long host_addr,
2141                                             size_t size)
2142 {
2143         host_addr &= ~PAGE_MASK;
2144         return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
2145 }
2146
2147 /* Return largest possible superpage level for a given mapping */
2148 static inline int hardware_largepage_caps(struct dmar_domain *domain,
2149                                           unsigned long iov_pfn,
2150                                           unsigned long phy_pfn,
2151                                           unsigned long pages)
2152 {
2153         int support, level = 1;
2154         unsigned long pfnmerge;
2155
2156         support = domain->iommu_superpage;
2157
2158         /* To use a large page, the virtual *and* physical addresses
2159            must be aligned to 2MiB/1GiB/etc. Lower bits set in either
2160            of them will mean we have to use smaller pages. So just
2161            merge them and check both at once. */
2162         pfnmerge = iov_pfn | phy_pfn;
2163
2164         while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
2165                 pages >>= VTD_STRIDE_SHIFT;
2166                 if (!pages)
2167                         break;
2168                 pfnmerge >>= VTD_STRIDE_SHIFT;
2169                 level++;
2170                 support--;
2171         }
2172         return level;
2173 }
2174
2175 static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2176                             struct scatterlist *sg, unsigned long phys_pfn,
2177                             unsigned long nr_pages, int prot)
2178 {
2179         struct dma_pte *first_pte = NULL, *pte = NULL;
2180         phys_addr_t uninitialized_var(pteval);
2181         unsigned long sg_res = 0;
2182         unsigned int largepage_lvl = 0;
2183         unsigned long lvl_pages = 0;
2184
2185         BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
2186
2187         if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
2188                 return -EINVAL;
2189
2190         prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
2191
2192         if (!sg) {
2193                 sg_res = nr_pages;
2194                 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
2195         }
2196
2197         while (nr_pages > 0) {
2198                 uint64_t tmp;
2199
2200                 if (!sg_res) {
2201                         sg_res = aligned_nrpages(sg->offset, sg->length);
2202                         sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
2203                         sg->dma_length = sg->length;
2204                         pteval = page_to_phys(sg_page(sg)) | prot;
2205                         phys_pfn = pteval >> VTD_PAGE_SHIFT;
2206                 }
2207
2208                 if (!pte) {
2209                         largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2210
2211                         first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
2212                         if (!pte)
2213                                 return -ENOMEM;
2214                         /* It is large page*/
2215                         if (largepage_lvl > 1) {
2216                                 unsigned long nr_superpages, end_pfn;
2217
2218                                 pteval |= DMA_PTE_LARGE_PAGE;
2219                                 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2220
2221                                 nr_superpages = sg_res / lvl_pages;
2222                                 end_pfn = iov_pfn + nr_superpages * lvl_pages - 1;
2223
2224                                 /*
2225                                  * Ensure that old small page tables are
2226                                  * removed to make room for superpage(s).
2227                                  */
2228                                 dma_pte_free_pagetable(domain, iov_pfn, end_pfn);
2229                         } else {
2230                                 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
2231                         }
2232
2233                 }
2234                 /* We don't need lock here, nobody else
2235                  * touches the iova range
2236                  */
2237                 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
2238                 if (tmp) {
2239                         static int dumps = 5;
2240                         pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2241                                 iov_pfn, tmp, (unsigned long long)pteval);
2242                         if (dumps) {
2243                                 dumps--;
2244                                 debug_dma_dump_mappings(NULL);
2245                         }
2246                         WARN_ON(1);
2247                 }
2248
2249                 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2250
2251                 BUG_ON(nr_pages < lvl_pages);
2252                 BUG_ON(sg_res < lvl_pages);
2253
2254                 nr_pages -= lvl_pages;
2255                 iov_pfn += lvl_pages;
2256                 phys_pfn += lvl_pages;
2257                 pteval += lvl_pages * VTD_PAGE_SIZE;
2258                 sg_res -= lvl_pages;
2259
2260                 /* If the next PTE would be the first in a new page, then we
2261                    need to flush the cache on the entries we've just written.
2262                    And then we'll need to recalculate 'pte', so clear it and
2263                    let it get set again in the if (!pte) block above.
2264
2265                    If we're done (!nr_pages) we need to flush the cache too.
2266
2267                    Also if we've been setting superpages, we may need to
2268                    recalculate 'pte' and switch back to smaller pages for the
2269                    end of the mapping, if the trailing size is not enough to
2270                    use another superpage (i.e. sg_res < lvl_pages). */
2271                 pte++;
2272                 if (!nr_pages || first_pte_in_page(pte) ||
2273                     (largepage_lvl > 1 && sg_res < lvl_pages)) {
2274                         domain_flush_cache(domain, first_pte,
2275                                            (void *)pte - (void *)first_pte);
2276                         pte = NULL;
2277                 }
2278
2279                 if (!sg_res && nr_pages)
2280                         sg = sg_next(sg);
2281         }
2282         return 0;
2283 }
2284
2285 static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2286                                     struct scatterlist *sg, unsigned long nr_pages,
2287                                     int prot)
2288 {
2289         return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
2290 }
2291
2292 static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2293                                      unsigned long phys_pfn, unsigned long nr_pages,
2294                                      int prot)
2295 {
2296         return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
2297 }
2298
2299 static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn)
2300 {
2301         if (!iommu)
2302                 return;
2303
2304         clear_context_table(iommu, bus, devfn);
2305         iommu->flush.flush_context(iommu, 0, 0, 0,
2306                                            DMA_CCMD_GLOBAL_INVL);
2307         iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2308 }
2309
2310 static inline void unlink_domain_info(struct device_domain_info *info)
2311 {
2312         assert_spin_locked(&device_domain_lock);
2313         list_del(&info->link);
2314         list_del(&info->global);
2315         if (info->dev)
2316                 info->dev->archdata.iommu = NULL;
2317 }
2318
2319 static void domain_remove_dev_info(struct dmar_domain *domain)
2320 {
2321         struct device_domain_info *info, *tmp;
2322         unsigned long flags;
2323
2324         spin_lock_irqsave(&device_domain_lock, flags);
2325         list_for_each_entry_safe(info, tmp, &domain->devices, link)
2326                 __dmar_remove_one_dev_info(info);
2327         spin_unlock_irqrestore(&device_domain_lock, flags);
2328 }
2329
2330 /*
2331  * find_domain
2332  * Note: we use struct device->archdata.iommu stores the info
2333  */
2334 static struct dmar_domain *find_domain(struct device *dev)
2335 {
2336         struct device_domain_info *info;
2337
2338         /* No lock here, assumes no domain exit in normal case */
2339         info = dev->archdata.iommu;
2340         if (info)
2341                 return info->domain;
2342         return NULL;
2343 }
2344
2345 static inline struct device_domain_info *
2346 dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2347 {
2348         struct device_domain_info *info;
2349
2350         list_for_each_entry(info, &device_domain_list, global)
2351                 if (info->iommu->segment == segment && info->bus == bus &&
2352                     info->devfn == devfn)
2353                         return info;
2354
2355         return NULL;
2356 }
2357
2358 static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
2359                                                     int bus, int devfn,
2360                                                     struct device *dev,
2361                                                     struct dmar_domain *domain)
2362 {
2363         struct dmar_domain *found = NULL;
2364         struct device_domain_info *info;
2365         unsigned long flags;
2366         int ret;
2367
2368         info = alloc_devinfo_mem();
2369         if (!info)
2370                 return NULL;
2371
2372         info->bus = bus;
2373         info->devfn = devfn;
2374         info->ats_supported = info->pasid_supported = info->pri_supported = 0;
2375         info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0;
2376         info->ats_qdep = 0;
2377         info->dev = dev;
2378         info->domain = domain;
2379         info->iommu = iommu;
2380
2381         if (dev && dev_is_pci(dev)) {
2382                 struct pci_dev *pdev = to_pci_dev(info->dev);
2383
2384                 if (ecap_dev_iotlb_support(iommu->ecap) &&
2385                     pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS) &&
2386                     dmar_find_matched_atsr_unit(pdev))
2387                         info->ats_supported = 1;
2388
2389                 if (ecs_enabled(iommu)) {
2390                         if (pasid_enabled(iommu)) {
2391                                 int features = pci_pasid_features(pdev);
2392                                 if (features >= 0)
2393                                         info->pasid_supported = features | 1;
2394                         }
2395
2396                         if (info->ats_supported && ecap_prs(iommu->ecap) &&
2397                             pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI))
2398                                 info->pri_supported = 1;
2399                 }
2400         }
2401
2402         spin_lock_irqsave(&device_domain_lock, flags);
2403         if (dev)
2404                 found = find_domain(dev);
2405
2406         if (!found) {
2407                 struct device_domain_info *info2;
2408                 info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
2409                 if (info2) {
2410                         found      = info2->domain;
2411                         info2->dev = dev;
2412                 }
2413         }
2414
2415         if (found) {
2416                 spin_unlock_irqrestore(&device_domain_lock, flags);
2417                 free_devinfo_mem(info);
2418                 /* Caller must free the original domain */
2419                 return found;
2420         }
2421
2422         spin_lock(&iommu->lock);
2423         ret = domain_attach_iommu(domain, iommu);
2424         spin_unlock(&iommu->lock);
2425
2426         if (ret) {
2427                 spin_unlock_irqrestore(&device_domain_lock, flags);
2428                 free_devinfo_mem(info);
2429                 return NULL;
2430         }
2431
2432         list_add(&info->link, &domain->devices);
2433         list_add(&info->global, &device_domain_list);
2434         if (dev)
2435                 dev->archdata.iommu = info;
2436         spin_unlock_irqrestore(&device_domain_lock, flags);
2437
2438         if (dev && domain_context_mapping(domain, dev)) {
2439                 pr_err("Domain context map for %s failed\n", dev_name(dev));
2440                 dmar_remove_one_dev_info(domain, dev);
2441                 return NULL;
2442         }
2443
2444         return domain;
2445 }
2446
2447 static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
2448 {
2449         *(u16 *)opaque = alias;
2450         return 0;
2451 }
2452
2453 /* domain is initialized */
2454 static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
2455 {
2456         struct device_domain_info *info = NULL;
2457         struct dmar_domain *domain, *tmp;
2458         struct intel_iommu *iommu;
2459         u16 req_id, dma_alias;
2460         unsigned long flags;
2461         u8 bus, devfn;
2462
2463         domain = find_domain(dev);
2464         if (domain)
2465                 return domain;
2466
2467         iommu = device_to_iommu(dev, &bus, &devfn);
2468         if (!iommu)
2469                 return NULL;
2470
2471         req_id = ((u16)bus << 8) | devfn;
2472
2473         if (dev_is_pci(dev)) {
2474                 struct pci_dev *pdev = to_pci_dev(dev);
2475
2476                 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2477
2478                 spin_lock_irqsave(&device_domain_lock, flags);
2479                 info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
2480                                                       PCI_BUS_NUM(dma_alias),
2481                                                       dma_alias & 0xff);
2482                 if (info) {
2483                         iommu = info->iommu;
2484                         domain = info->domain;
2485                 }
2486                 spin_unlock_irqrestore(&device_domain_lock, flags);
2487
2488                 /* DMA alias already has a domain, uses it */
2489                 if (info)
2490                         goto found_domain;
2491         }
2492
2493         /* Allocate and initialize new domain for the device */
2494         domain = alloc_domain(0);
2495         if (!domain)
2496                 return NULL;
2497         if (domain_init(domain, iommu, gaw)) {
2498                 domain_exit(domain);
2499                 return NULL;
2500         }
2501
2502         /* register PCI DMA alias device */
2503         if (req_id != dma_alias && dev_is_pci(dev)) {
2504                 tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias),
2505                                                dma_alias & 0xff, NULL, domain);
2506
2507                 if (!tmp || tmp != domain) {
2508                         domain_exit(domain);
2509                         domain = tmp;
2510                 }
2511
2512                 if (!domain)
2513                         return NULL;
2514         }
2515
2516 found_domain:
2517         tmp = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
2518
2519         if (!tmp || tmp != domain) {
2520                 domain_exit(domain);
2521                 domain = tmp;
2522         }
2523
2524         return domain;
2525 }
2526
2527 static int iommu_domain_identity_map(struct dmar_domain *domain,
2528                                      unsigned long long start,
2529                                      unsigned long long end)
2530 {
2531         unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2532         unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2533
2534         if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2535                           dma_to_mm_pfn(last_vpfn))) {
2536                 pr_err("Reserving iova failed\n");
2537                 return -ENOMEM;
2538         }
2539
2540         pr_debug("Mapping reserved region %llx-%llx\n", start, end);
2541         /*
2542          * RMRR range might have overlap with physical memory range,
2543          * clear it first
2544          */
2545         dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2546
2547         return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2548                                   last_vpfn - first_vpfn + 1,
2549                                   DMA_PTE_READ|DMA_PTE_WRITE);
2550 }
2551
2552 static int domain_prepare_identity_map(struct device *dev,
2553                                        struct dmar_domain *domain,
2554                                        unsigned long long start,
2555                                        unsigned long long end)
2556 {
2557         /* For _hardware_ passthrough, don't bother. But for software
2558            passthrough, we do it anyway -- it may indicate a memory
2559            range which is reserved in E820, so which didn't get set
2560            up to start with in si_domain */
2561         if (domain == si_domain && hw_pass_through) {
2562                 pr_warn("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2563                         dev_name(dev), start, end);
2564                 return 0;
2565         }
2566
2567         pr_info("Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2568                 dev_name(dev), start, end);
2569
2570         if (end < start) {
2571                 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2572                         "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2573                         dmi_get_system_info(DMI_BIOS_VENDOR),
2574                         dmi_get_system_info(DMI_BIOS_VERSION),
2575                      dmi_get_system_info(DMI_PRODUCT_VERSION));
2576                 return -EIO;
2577         }
2578
2579         if (end >> agaw_to_width(domain->agaw)) {
2580                 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2581                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2582                      agaw_to_width(domain->agaw),
2583                      dmi_get_system_info(DMI_BIOS_VENDOR),
2584                      dmi_get_system_info(DMI_BIOS_VERSION),
2585                      dmi_get_system_info(DMI_PRODUCT_VERSION));
2586                 return -EIO;
2587         }
2588
2589         return iommu_domain_identity_map(domain, start, end);
2590 }
2591
2592 static int iommu_prepare_identity_map(struct device *dev,
2593                                       unsigned long long start,
2594                                       unsigned long long end)
2595 {
2596         struct dmar_domain *domain;
2597         int ret;
2598
2599         domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2600         if (!domain)
2601                 return -ENOMEM;
2602
2603         ret = domain_prepare_identity_map(dev, domain, start, end);
2604         if (ret)
2605                 domain_exit(domain);
2606
2607         return ret;
2608 }
2609
2610 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2611                                          struct device *dev)
2612 {
2613         if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2614                 return 0;
2615         return iommu_prepare_identity_map(dev, rmrr->base_address,
2616                                           rmrr->end_address);
2617 }
2618
2619 #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
2620 static inline void iommu_prepare_isa(void)
2621 {
2622         struct pci_dev *pdev;
2623         int ret;
2624
2625         pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2626         if (!pdev)
2627                 return;
2628
2629         pr_info("Prepare 0-16MiB unity mapping for LPC\n");
2630         ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1);
2631
2632         if (ret)
2633                 pr_err("Failed to create 0-16MiB identity map - floppy might not work\n");
2634
2635         pci_dev_put(pdev);
2636 }
2637 #else
2638 static inline void iommu_prepare_isa(void)
2639 {
2640         return;
2641 }
2642 #endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
2643
2644 static int md_domain_init(struct dmar_domain *domain, int guest_width);
2645
2646 static int __init si_domain_init(int hw)
2647 {
2648         int nid, ret = 0;
2649
2650         si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2651         if (!si_domain)
2652                 return -EFAULT;
2653
2654         if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2655                 domain_exit(si_domain);
2656                 return -EFAULT;
2657         }
2658
2659         pr_debug("Identity mapping domain allocated\n");
2660
2661         if (hw)
2662                 return 0;
2663
2664         for_each_online_node(nid) {
2665                 unsigned long start_pfn, end_pfn;
2666                 int i;
2667
2668                 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2669                         ret = iommu_domain_identity_map(si_domain,
2670                                         PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2671                         if (ret)
2672                                 return ret;
2673                 }
2674         }
2675
2676         return 0;
2677 }
2678
2679 static int identity_mapping(struct device *dev)
2680 {
2681         struct device_domain_info *info;
2682
2683         if (likely(!iommu_identity_mapping))
2684                 return 0;
2685
2686         info = dev->archdata.iommu;
2687         if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2688                 return (info->domain == si_domain);
2689
2690         return 0;
2691 }
2692
2693 static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
2694 {
2695         struct dmar_domain *ndomain;
2696         struct intel_iommu *iommu;
2697         u8 bus, devfn;
2698
2699         iommu = device_to_iommu(dev, &bus, &devfn);
2700         if (!iommu)
2701                 return -ENODEV;
2702
2703         ndomain = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
2704         if (ndomain != domain)
2705                 return -EBUSY;
2706
2707         return 0;
2708 }
2709
2710 static bool device_has_rmrr(struct device *dev)
2711 {
2712         struct dmar_rmrr_unit *rmrr;
2713         struct device *tmp;
2714         int i;
2715
2716         rcu_read_lock();
2717         for_each_rmrr_units(rmrr) {
2718                 /*
2719                  * Return TRUE if this RMRR contains the device that
2720                  * is passed in.
2721                  */
2722                 for_each_active_dev_scope(rmrr->devices,
2723                                           rmrr->devices_cnt, i, tmp)
2724                         if (tmp == dev) {
2725                                 rcu_read_unlock();
2726                                 return true;
2727                         }
2728         }
2729         rcu_read_unlock();
2730         return false;
2731 }
2732
2733 /*
2734  * There are a couple cases where we need to restrict the functionality of
2735  * devices associated with RMRRs.  The first is when evaluating a device for
2736  * identity mapping because problems exist when devices are moved in and out
2737  * of domains and their respective RMRR information is lost.  This means that
2738  * a device with associated RMRRs will never be in a "passthrough" domain.
2739  * The second is use of the device through the IOMMU API.  This interface
2740  * expects to have full control of the IOVA space for the device.  We cannot
2741  * satisfy both the requirement that RMRR access is maintained and have an
2742  * unencumbered IOVA space.  We also have no ability to quiesce the device's
2743  * use of the RMRR space or even inform the IOMMU API user of the restriction.
2744  * We therefore prevent devices associated with an RMRR from participating in
2745  * the IOMMU API, which eliminates them from device assignment.
2746  *
2747  * In both cases we assume that PCI USB devices with RMRRs have them largely
2748  * for historical reasons and that the RMRR space is not actively used post
2749  * boot.  This exclusion may change if vendors begin to abuse it.
2750  *
2751  * The same exception is made for graphics devices, with the requirement that
2752  * any use of the RMRR regions will be torn down before assigning the device
2753  * to a guest.
2754  */
2755 static bool device_is_rmrr_locked(struct device *dev)
2756 {
2757         if (!device_has_rmrr(dev))
2758                 return false;
2759
2760         if (dev_is_pci(dev)) {
2761                 struct pci_dev *pdev = to_pci_dev(dev);
2762
2763                 if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
2764                         return false;
2765         }
2766
2767         return true;
2768 }
2769
2770 static int iommu_should_identity_map(struct device *dev, int startup)
2771 {
2772
2773         if (dev_is_pci(dev)) {
2774                 struct pci_dev *pdev = to_pci_dev(dev);
2775
2776                 if (device_is_rmrr_locked(dev))
2777                         return 0;
2778
2779                 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2780                         return 1;
2781
2782                 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2783                         return 1;
2784
2785                 if (!(iommu_identity_mapping & IDENTMAP_ALL))
2786                         return 0;
2787
2788                 /*
2789                  * We want to start off with all devices in the 1:1 domain, and
2790                  * take them out later if we find they can't access all of memory.
2791                  *
2792                  * However, we can't do this for PCI devices behind bridges,
2793                  * because all PCI devices behind the same bridge will end up
2794                  * with the same source-id on their transactions.
2795                  *
2796                  * Practically speaking, we can't change things around for these
2797                  * devices at run-time, because we can't be sure there'll be no
2798                  * DMA transactions in flight for any of their siblings.
2799                  *
2800                  * So PCI devices (unless they're on the root bus) as well as
2801                  * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2802                  * the 1:1 domain, just in _case_ one of their siblings turns out
2803                  * not to be able to map all of memory.
2804                  */
2805                 if (!pci_is_pcie(pdev)) {
2806                         if (!pci_is_root_bus(pdev->bus))
2807                                 return 0;
2808                         if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2809                                 return 0;
2810                 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
2811                         return 0;
2812         } else {
2813                 if (device_has_rmrr(dev))
2814                         return 0;
2815         }
2816
2817         /*
2818          * At boot time, we don't yet know if devices will be 64-bit capable.
2819          * Assume that they will — if they turn out not to be, then we can
2820          * take them out of the 1:1 domain later.
2821          */
2822         if (!startup) {
2823                 /*
2824                  * If the device's dma_mask is less than the system's memory
2825                  * size then this is not a candidate for identity mapping.
2826                  */
2827                 u64 dma_mask = *dev->dma_mask;
2828
2829                 if (dev->coherent_dma_mask &&
2830                     dev->coherent_dma_mask < dma_mask)
2831                         dma_mask = dev->coherent_dma_mask;
2832
2833                 return dma_mask >= dma_get_required_mask(dev);
2834         }
2835
2836         return 1;
2837 }
2838
2839 static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw)
2840 {
2841         int ret;
2842
2843         if (!iommu_should_identity_map(dev, 1))
2844                 return 0;
2845
2846         ret = domain_add_dev_info(si_domain, dev);
2847         if (!ret)
2848                 pr_info("%s identity mapping for device %s\n",
2849                         hw ? "Hardware" : "Software", dev_name(dev));
2850         else if (ret == -ENODEV)
2851                 /* device not associated with an iommu */
2852                 ret = 0;
2853
2854         return ret;
2855 }
2856
2857
2858 static int __init iommu_prepare_static_identity_mapping(int hw)
2859 {
2860         struct pci_dev *pdev = NULL;
2861         struct dmar_drhd_unit *drhd;
2862         struct intel_iommu *iommu;
2863         struct device *dev;
2864         int i;
2865         int ret = 0;
2866
2867         for_each_pci_dev(pdev) {
2868                 ret = dev_prepare_static_identity_mapping(&pdev->dev, hw);
2869                 if (ret)
2870                         return ret;
2871         }
2872
2873         for_each_active_iommu(iommu, drhd)
2874                 for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) {
2875                         struct acpi_device_physical_node *pn;
2876                         struct acpi_device *adev;
2877
2878                         if (dev->bus != &acpi_bus_type)
2879                                 continue;
2880
2881                         adev= to_acpi_device(dev);
2882                         mutex_lock(&adev->physical_node_lock);
2883                         list_for_each_entry(pn, &adev->physical_node_list, node) {
2884                                 ret = dev_prepare_static_identity_mapping(pn->dev, hw);
2885                                 if (ret)
2886                                         break;
2887                         }
2888                         mutex_unlock(&adev->physical_node_lock);
2889                         if (ret)
2890                                 return ret;
2891                 }
2892
2893         return 0;
2894 }
2895
2896 static void intel_iommu_init_qi(struct intel_iommu *iommu)
2897 {
2898         /*
2899          * Start from the sane iommu hardware state.
2900          * If the queued invalidation is already initialized by us
2901          * (for example, while enabling interrupt-remapping) then
2902          * we got the things already rolling from a sane state.
2903          */
2904         if (!iommu->qi) {
2905                 /*
2906                  * Clear any previous faults.
2907                  */
2908                 dmar_fault(-1, iommu);
2909                 /*
2910                  * Disable queued invalidation if supported and already enabled
2911                  * before OS handover.
2912                  */
2913                 dmar_disable_qi(iommu);
2914         }
2915
2916         if (dmar_enable_qi(iommu)) {
2917                 /*
2918                  * Queued Invalidate not enabled, use Register Based Invalidate
2919                  */
2920                 iommu->flush.flush_context = __iommu_flush_context;
2921                 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2922                 pr_info("%s: Using Register based invalidation\n",
2923                         iommu->name);
2924         } else {
2925                 iommu->flush.flush_context = qi_flush_context;
2926                 iommu->flush.flush_iotlb = qi_flush_iotlb;
2927                 pr_info("%s: Using Queued invalidation\n", iommu->name);
2928         }
2929 }
2930
2931 static int copy_context_table(struct intel_iommu *iommu,
2932                               struct root_entry *old_re,
2933                               struct context_entry **tbl,
2934                               int bus, bool ext)
2935 {
2936         int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
2937         struct context_entry *new_ce = NULL, ce;
2938         struct context_entry *old_ce = NULL;
2939         struct root_entry re;
2940         phys_addr_t old_ce_phys;
2941
2942         tbl_idx = ext ? bus * 2 : bus;
2943         memcpy(&re, old_re, sizeof(re));
2944
2945         for (devfn = 0; devfn < 256; devfn++) {
2946                 /* First calculate the correct index */
2947                 idx = (ext ? devfn * 2 : devfn) % 256;
2948
2949                 if (idx == 0) {
2950                         /* First save what we may have and clean up */
2951                         if (new_ce) {
2952                                 tbl[tbl_idx] = new_ce;
2953                                 __iommu_flush_cache(iommu, new_ce,
2954                                                     VTD_PAGE_SIZE);
2955                                 pos = 1;
2956                         }
2957
2958                         if (old_ce)
2959                                 iounmap(old_ce);
2960
2961                         ret = 0;
2962                         if (devfn < 0x80)
2963                                 old_ce_phys = root_entry_lctp(&re);
2964                         else
2965                                 old_ce_phys = root_entry_uctp(&re);
2966
2967                         if (!old_ce_phys) {
2968                                 if (ext && devfn == 0) {
2969                                         /* No LCTP, try UCTP */
2970                                         devfn = 0x7f;
2971                                         continue;
2972                                 } else {
2973                                         goto out;
2974                                 }
2975                         }
2976
2977                         ret = -ENOMEM;
2978                         old_ce = memremap(old_ce_phys, PAGE_SIZE,
2979                                         MEMREMAP_WB);
2980                         if (!old_ce)
2981                                 goto out;
2982
2983                         new_ce = alloc_pgtable_page(iommu->node);
2984                         if (!new_ce)
2985                                 goto out_unmap;
2986
2987                         ret = 0;
2988                 }
2989
2990                 /* Now copy the context entry */
2991                 memcpy(&ce, old_ce + idx, sizeof(ce));
2992
2993                 if (!__context_present(&ce))
2994                         continue;
2995
2996                 did = context_domain_id(&ce);
2997                 if (did >= 0 && did < cap_ndoms(iommu->cap))
2998                         set_bit(did, iommu->domain_ids);
2999
3000                 /*
3001                  * We need a marker for copied context entries. This
3002                  * marker needs to work for the old format as well as
3003                  * for extended context entries.
3004                  *
3005                  * Bit 67 of the context entry is used. In the old
3006                  * format this bit is available to software, in the
3007                  * extended format it is the PGE bit, but PGE is ignored
3008                  * by HW if PASIDs are disabled (and thus still
3009                  * available).
3010                  *
3011                  * So disable PASIDs first and then mark the entry
3012                  * copied. This means that we don't copy PASID
3013                  * translations from the old kernel, but this is fine as
3014                  * faults there are not fatal.
3015                  */
3016                 context_clear_pasid_enable(&ce);
3017                 context_set_copied(&ce);
3018
3019                 new_ce[idx] = ce;
3020         }
3021
3022         tbl[tbl_idx + pos] = new_ce;
3023
3024         __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
3025
3026 out_unmap:
3027         memunmap(old_ce);
3028
3029 out:
3030         return ret;
3031 }
3032
3033 static int copy_translation_tables(struct intel_iommu *iommu)
3034 {
3035         struct context_entry **ctxt_tbls;
3036         struct root_entry *old_rt;
3037         phys_addr_t old_rt_phys;
3038         int ctxt_table_entries;
3039         unsigned long flags;
3040         u64 rtaddr_reg;
3041         int bus, ret;
3042         bool new_ext, ext;
3043
3044         rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
3045         ext        = !!(rtaddr_reg & DMA_RTADDR_RTT);
3046         new_ext    = !!ecap_ecs(iommu->ecap);
3047
3048         /*
3049          * The RTT bit can only be changed when translation is disabled,
3050          * but disabling translation means to open a window for data
3051          * corruption. So bail out and don't copy anything if we would
3052          * have to change the bit.
3053          */
3054         if (new_ext != ext)
3055                 return -EINVAL;
3056
3057         old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
3058         if (!old_rt_phys)
3059                 return -EINVAL;
3060
3061         old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB);
3062         if (!old_rt)
3063                 return -ENOMEM;
3064
3065         /* This is too big for the stack - allocate it from slab */
3066         ctxt_table_entries = ext ? 512 : 256;
3067         ret = -ENOMEM;
3068         ctxt_tbls = kzalloc(ctxt_table_entries * sizeof(void *), GFP_KERNEL);
3069         if (!ctxt_tbls)
3070                 goto out_unmap;
3071
3072         for (bus = 0; bus < 256; bus++) {
3073                 ret = copy_context_table(iommu, &old_rt[bus],
3074                                          ctxt_tbls, bus, ext);
3075                 if (ret) {
3076                         pr_err("%s: Failed to copy context table for bus %d\n",
3077                                 iommu->name, bus);
3078                         continue;
3079                 }
3080         }
3081
3082         spin_lock_irqsave(&iommu->lock, flags);
3083
3084         /* Context tables are copied, now write them to the root_entry table */
3085         for (bus = 0; bus < 256; bus++) {
3086                 int idx = ext ? bus * 2 : bus;
3087                 u64 val;
3088
3089                 if (ctxt_tbls[idx]) {
3090                         val = virt_to_phys(ctxt_tbls[idx]) | 1;
3091                         iommu->root_entry[bus].lo = val;
3092                 }
3093
3094                 if (!ext || !ctxt_tbls[idx + 1])
3095                         continue;
3096
3097                 val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
3098                 iommu->root_entry[bus].hi = val;
3099         }
3100
3101         spin_unlock_irqrestore(&iommu->lock, flags);
3102
3103         kfree(ctxt_tbls);
3104
3105         __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
3106
3107         ret = 0;
3108
3109 out_unmap:
3110         memunmap(old_rt);
3111
3112         return ret;
3113 }
3114
3115 static int __init init_dmars(void)
3116 {
3117         struct dmar_drhd_unit *drhd;
3118         struct dmar_rmrr_unit *rmrr;
3119         bool copied_tables = false;
3120         struct device *dev;
3121         struct intel_iommu *iommu;
3122         int i, ret;
3123
3124         /*
3125          * for each drhd
3126          *    allocate root
3127          *    initialize and program root entry to not present
3128          * endfor
3129          */
3130         for_each_drhd_unit(drhd) {
3131                 /*
3132                  * lock not needed as this is only incremented in the single
3133                  * threaded kernel __init code path all other access are read
3134                  * only
3135                  */
3136                 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
3137                         g_num_of_iommus++;
3138                         continue;
3139                 }
3140                 pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED);
3141         }
3142
3143         /* Preallocate enough resources for IOMMU hot-addition */
3144         if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
3145                 g_num_of_iommus = DMAR_UNITS_SUPPORTED;
3146
3147         g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
3148                         GFP_KERNEL);
3149         if (!g_iommus) {
3150                 pr_err("Allocating global iommu array failed\n");
3151                 ret = -ENOMEM;
3152                 goto error;
3153         }
3154
3155         deferred_flush = kzalloc(g_num_of_iommus *
3156                 sizeof(struct deferred_flush_tables), GFP_KERNEL);
3157         if (!deferred_flush) {
3158                 ret = -ENOMEM;
3159                 goto free_g_iommus;
3160         }
3161
3162         for_each_active_iommu(iommu, drhd) {
3163                 g_iommus[iommu->seq_id] = iommu;
3164
3165                 intel_iommu_init_qi(iommu);
3166
3167                 ret = iommu_init_domains(iommu);
3168                 if (ret)
3169                         goto free_iommu;
3170
3171                 init_translation_status(iommu);
3172
3173                 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
3174                         iommu_disable_translation(iommu);
3175                         clear_translation_pre_enabled(iommu);
3176                         pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
3177                                 iommu->name);
3178                 }
3179
3180                 /*
3181                  * TBD:
3182                  * we could share the same root & context tables
3183                  * among all IOMMU's. Need to Split it later.
3184                  */
3185                 ret = iommu_alloc_root_entry(iommu);
3186                 if (ret)
3187                         goto free_iommu;
3188
3189                 if (translation_pre_enabled(iommu)) {
3190                         pr_info("Translation already enabled - trying to copy translation structures\n");
3191
3192                         ret = copy_translation_tables(iommu);
3193                         if (ret) {
3194                                 /*
3195                                  * We found the IOMMU with translation
3196                                  * enabled - but failed to copy over the
3197                                  * old root-entry table. Try to proceed
3198                                  * by disabling translation now and
3199                                  * allocating a clean root-entry table.
3200                                  * This might cause DMAR faults, but
3201                                  * probably the dump will still succeed.
3202                                  */
3203                                 pr_err("Failed to copy translation tables from previous kernel for %s\n",
3204                                        iommu->name);
3205                                 iommu_disable_translation(iommu);
3206                                 clear_translation_pre_enabled(iommu);
3207                         } else {
3208                                 pr_info("Copied translation tables from previous kernel for %s\n",
3209                                         iommu->name);
3210                                 copied_tables = true;
3211                         }
3212                 }
3213
3214                 if (!ecap_pass_through(iommu->ecap))
3215                         hw_pass_through = 0;
3216 #ifdef CONFIG_INTEL_IOMMU_SVM
3217                 if (pasid_enabled(iommu))
3218                         intel_svm_alloc_pasid_tables(iommu);
3219 #endif
3220         }
3221
3222         /*
3223          * Now that qi is enabled on all iommus, set the root entry and flush
3224          * caches. This is required on some Intel X58 chipsets, otherwise the
3225          * flush_context function will loop forever and the boot hangs.
3226          */
3227         for_each_active_iommu(iommu, drhd) {
3228                 iommu_flush_write_buffer(iommu);
3229                 iommu_set_root_entry(iommu);
3230                 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
3231                 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3232         }
3233
3234         if (iommu_pass_through)
3235                 iommu_identity_mapping |= IDENTMAP_ALL;
3236
3237 #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
3238         iommu_identity_mapping |= IDENTMAP_GFX;
3239 #endif
3240
3241         check_tylersburg_isoch();
3242
3243         if (iommu_identity_mapping) {
3244                 ret = si_domain_init(hw_pass_through);
3245                 if (ret)
3246                         goto free_iommu;
3247         }
3248
3249
3250         /*
3251          * If we copied translations from a previous kernel in the kdump
3252          * case, we can not assign the devices to domains now, as that
3253          * would eliminate the old mappings. So skip this part and defer
3254          * the assignment to device driver initialization time.
3255          */
3256         if (copied_tables)
3257                 goto domains_done;
3258
3259         /*
3260          * If pass through is not set or not enabled, setup context entries for
3261          * identity mappings for rmrr, gfx, and isa and may fall back to static
3262          * identity mapping if iommu_identity_mapping is set.
3263          */
3264         if (iommu_identity_mapping) {
3265                 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
3266                 if (ret) {
3267                         pr_crit("Failed to setup IOMMU pass-through\n");
3268                         goto free_iommu;
3269                 }
3270         }
3271         /*
3272          * For each rmrr
3273          *   for each dev attached to rmrr
3274          *   do
3275          *     locate drhd for dev, alloc domain for dev
3276          *     allocate free domain
3277          *     allocate page table entries for rmrr
3278          *     if context not allocated for bus
3279          *           allocate and init context
3280          *           set present in root table for this bus
3281          *     init context with domain, translation etc
3282          *    endfor
3283          * endfor
3284          */
3285         pr_info("Setting RMRR:\n");
3286         for_each_rmrr_units(rmrr) {
3287                 /* some BIOS lists non-exist devices in DMAR table. */
3288                 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
3289                                           i, dev) {
3290                         ret = iommu_prepare_rmrr_dev(rmrr, dev);
3291                         if (ret)
3292                                 pr_err("Mapping reserved region failed\n");
3293                 }
3294         }
3295
3296         iommu_prepare_isa();
3297
3298 domains_done:
3299
3300         /*
3301          * for each drhd
3302          *   enable fault log
3303          *   global invalidate context cache
3304          *   global invalidate iotlb
3305          *   enable translation
3306          */
3307         for_each_iommu(iommu, drhd) {
3308                 if (drhd->ignored) {
3309                         /*
3310                          * we always have to disable PMRs or DMA may fail on
3311                          * this device
3312                          */
3313                         if (force_on)
3314                                 iommu_disable_protect_mem_regions(iommu);
3315                         continue;
3316                 }
3317
3318                 iommu_flush_write_buffer(iommu);
3319
3320 #ifdef CONFIG_INTEL_IOMMU_SVM
3321                 if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) {
3322                         ret = intel_svm_enable_prq(iommu);
3323                         if (ret)
3324                                 goto free_iommu;
3325                 }
3326 #endif
3327                 ret = dmar_set_interrupt(iommu);
3328                 if (ret)
3329                         goto free_iommu;
3330
3331                 if (!translation_pre_enabled(iommu))
3332                         iommu_enable_translation(iommu);
3333
3334                 iommu_disable_protect_mem_regions(iommu);
3335         }
3336
3337         return 0;
3338
3339 free_iommu:
3340         for_each_active_iommu(iommu, drhd) {
3341                 disable_dmar_iommu(iommu);
3342                 free_dmar_iommu(iommu);
3343         }
3344         kfree(deferred_flush);
3345 free_g_iommus:
3346         kfree(g_iommus);
3347 error:
3348         return ret;
3349 }
3350
3351 /* This takes a number of _MM_ pages, not VTD pages */
3352 static struct iova *intel_alloc_iova(struct device *dev,
3353                                      struct dmar_domain *domain,
3354                                      unsigned long nrpages, uint64_t dma_mask)
3355 {
3356         struct iova *iova = NULL;
3357
3358         /* Restrict dma_mask to the width that the iommu can handle */
3359         dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
3360         /* Ensure we reserve the whole size-aligned region */
3361         nrpages = __roundup_pow_of_two(nrpages);
3362
3363         if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
3364                 /*
3365                  * First try to allocate an io virtual address in
3366                  * DMA_BIT_MASK(32) and if that fails then try allocating
3367                  * from higher range
3368                  */
3369                 iova = alloc_iova(&domain->iovad, nrpages,
3370                                   IOVA_PFN(DMA_BIT_MASK(32)), 1);
3371                 if (iova)
3372                         return iova;
3373         }
3374         iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
3375         if (unlikely(!iova)) {
3376                 pr_err("Allocating %ld-page iova for %s failed",
3377                        nrpages, dev_name(dev));
3378                 return NULL;
3379         }
3380
3381         return iova;
3382 }
3383
3384 static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
3385 {
3386         struct dmar_rmrr_unit *rmrr;
3387         struct dmar_domain *domain;
3388         struct device *i_dev;
3389         int i, ret;
3390
3391         domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
3392         if (!domain) {
3393                 pr_err("Allocating domain for %s failed\n",
3394                        dev_name(dev));
3395                 return NULL;
3396         }
3397
3398         /* We have a new domain - setup possible RMRRs for the device */
3399         rcu_read_lock();
3400         for_each_rmrr_units(rmrr) {
3401                 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
3402                                           i, i_dev) {
3403                         if (i_dev != dev)
3404                                 continue;
3405
3406                         ret = domain_prepare_identity_map(dev, domain,
3407                                                           rmrr->base_address,
3408                                                           rmrr->end_address);
3409                         if (ret)
3410                                 dev_err(dev, "Mapping reserved region failed\n");
3411                 }
3412         }
3413         rcu_read_unlock();
3414
3415         return domain;
3416 }
3417
3418 static inline struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
3419 {
3420         struct device_domain_info *info;
3421
3422         /* No lock here, assumes no domain exit in normal case */
3423         info = dev->archdata.iommu;
3424         if (likely(info))
3425                 return info->domain;
3426
3427         return __get_valid_domain_for_dev(dev);
3428 }
3429
3430 /* Check if the dev needs to go through non-identity map and unmap process.*/
3431 static int iommu_no_mapping(struct device *dev)
3432 {
3433         int found;
3434
3435         if (iommu_dummy(dev))
3436                 return 1;
3437
3438         if (!iommu_identity_mapping)
3439                 return 0;
3440
3441         found = identity_mapping(dev);
3442         if (found) {
3443                 if (iommu_should_identity_map(dev, 0))
3444                         return 1;
3445                 else {
3446                         /*
3447                          * 32 bit DMA is removed from si_domain and fall back
3448                          * to non-identity mapping.
3449                          */
3450                         dmar_remove_one_dev_info(si_domain, dev);
3451                         pr_info("32bit %s uses non-identity mapping\n",
3452                                 dev_name(dev));
3453                         return 0;
3454                 }
3455         } else {
3456                 /*
3457                  * In case of a detached 64 bit DMA device from vm, the device
3458                  * is put into si_domain for identity mapping.
3459                  */
3460                 if (iommu_should_identity_map(dev, 0)) {
3461                         int ret;
3462                         ret = domain_add_dev_info(si_domain, dev);
3463                         if (!ret) {
3464                                 pr_info("64bit %s uses identity mapping\n",
3465                                         dev_name(dev));
3466                                 return 1;
3467                         }
3468                 }
3469         }
3470
3471         return 0;
3472 }
3473
3474 static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
3475                                      size_t size, int dir, u64 dma_mask)
3476 {
3477         struct dmar_domain *domain;
3478         phys_addr_t start_paddr;
3479         struct iova *iova;
3480         int prot = 0;
3481         int ret;
3482         struct intel_iommu *iommu;
3483         unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
3484
3485         BUG_ON(dir == DMA_NONE);
3486
3487         if (iommu_no_mapping(dev))
3488                 return paddr;
3489
3490         domain = get_valid_domain_for_dev(dev);
3491         if (!domain)
3492                 return 0;
3493
3494         iommu = domain_get_iommu(domain);
3495         size = aligned_nrpages(paddr, size);
3496
3497         iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
3498         if (!iova)
3499                 goto error;
3500
3501         /*
3502          * Check if DMAR supports zero-length reads on write only
3503          * mappings..
3504          */
3505         if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3506                         !cap_zlr(iommu->cap))
3507                 prot |= DMA_PTE_READ;
3508         if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3509                 prot |= DMA_PTE_WRITE;
3510         /*
3511          * paddr - (paddr + size) might be partial page, we should map the whole
3512          * page.  Note: if two part of one page are separately mapped, we
3513          * might have two guest_addr mapping to the same host paddr, but this
3514          * is not a big problem
3515          */
3516         ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
3517                                  mm_to_dma_pfn(paddr_pfn), size, prot);
3518         if (ret)
3519                 goto error;
3520
3521         /* it's a non-present to present mapping. Only flush if caching mode */
3522         if (cap_caching_mode(iommu->cap))
3523                 iommu_flush_iotlb_psi(iommu, domain,
3524                                       mm_to_dma_pfn(iova->pfn_lo),
3525                                       size, 0, 1);
3526         else
3527                 iommu_flush_write_buffer(iommu);
3528
3529         start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
3530         start_paddr += paddr & ~PAGE_MASK;
3531         return start_paddr;
3532
3533 error:
3534         if (iova)
3535                 __free_iova(&domain->iovad, iova);
3536         pr_err("Device %s request: %zx@%llx dir %d --- failed\n",
3537                 dev_name(dev), size, (unsigned long long)paddr, dir);
3538         return 0;
3539 }
3540
3541 static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3542                                  unsigned long offset, size_t size,
3543                                  enum dma_data_direction dir,
3544                                  struct dma_attrs *attrs)
3545 {
3546         return __intel_map_single(dev, page_to_phys(page) + offset, size,
3547                                   dir, *dev->dma_mask);
3548 }
3549
3550 static void flush_unmaps(void)
3551 {
3552         int i, j;
3553
3554         timer_on = 0;
3555
3556         /* just flush them all */
3557         for (i = 0; i < g_num_of_iommus; i++) {
3558                 struct intel_iommu *iommu = g_iommus[i];
3559                 if (!iommu)
3560                         continue;
3561
3562                 if (!deferred_flush[i].next)
3563                         continue;
3564
3565                 /* In caching mode, global flushes turn emulation expensive */
3566                 if (!cap_caching_mode(iommu->cap))
3567                         iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3568                                          DMA_TLB_GLOBAL_FLUSH);
3569                 for (j = 0; j < deferred_flush[i].next; j++) {
3570                         unsigned long mask;
3571                         struct iova *iova = deferred_flush[i].iova[j];
3572                         struct dmar_domain *domain = deferred_flush[i].domain[j];
3573
3574                         /* On real hardware multiple invalidations are expensive */
3575                         if (cap_caching_mode(iommu->cap))
3576                                 iommu_flush_iotlb_psi(iommu, domain,
3577                                         iova->pfn_lo, iova_size(iova),
3578                                         !deferred_flush[i].freelist[j], 0);
3579                         else {
3580                                 mask = ilog2(mm_to_dma_pfn(iova_size(iova)));
3581                                 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
3582                                                 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
3583                         }
3584                         __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
3585                         if (deferred_flush[i].freelist[j])
3586                                 dma_free_pagelist(deferred_flush[i].freelist[j]);
3587                 }
3588                 deferred_flush[i].next = 0;
3589         }
3590
3591         list_size = 0;
3592 }
3593
3594 static void flush_unmaps_timeout(unsigned long data)
3595 {
3596         unsigned long flags;
3597
3598         spin_lock_irqsave(&async_umap_flush_lock, flags);
3599         flush_unmaps();
3600         spin_unlock_irqrestore(&async_umap_flush_lock, flags);
3601 }
3602
3603 static void add_unmap(struct dmar_domain *dom, struct iova *iova, struct page *freelist)
3604 {
3605         unsigned long flags;
3606         int next, iommu_id;
3607         struct intel_iommu *iommu;
3608
3609         spin_lock_irqsave(&async_umap_flush_lock, flags);
3610         if (list_size == HIGH_WATER_MARK)
3611                 flush_unmaps();
3612
3613         iommu = domain_get_iommu(dom);
3614         iommu_id = iommu->seq_id;
3615
3616         next = deferred_flush[iommu_id].next;
3617         deferred_flush[iommu_id].domain[next] = dom;
3618         deferred_flush[iommu_id].iova[next] = iova;
3619         deferred_flush[iommu_id].freelist[next] = freelist;
3620         deferred_flush[iommu_id].next++;
3621
3622         if (!timer_on) {
3623                 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
3624                 timer_on = 1;
3625         }
3626         list_size++;
3627         spin_unlock_irqrestore(&async_umap_flush_lock, flags);
3628 }
3629
3630 static void intel_unmap(struct device *dev, dma_addr_t dev_addr)
3631 {
3632         struct dmar_domain *domain;
3633         unsigned long start_pfn, last_pfn;
3634         struct iova *iova;
3635         struct intel_iommu *iommu;
3636         struct page *freelist;
3637
3638         if (iommu_no_mapping(dev))
3639                 return;
3640
3641         domain = find_domain(dev);
3642         BUG_ON(!domain);
3643
3644         iommu = domain_get_iommu(domain);
3645
3646         iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
3647         if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
3648                       (unsigned long long)dev_addr))
3649                 return;
3650
3651         start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3652         last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
3653
3654         pr_debug("Device %s unmapping: pfn %lx-%lx\n",
3655                  dev_name(dev), start_pfn, last_pfn);
3656
3657         freelist = domain_unmap(domain, start_pfn, last_pfn);
3658
3659         if (intel_iommu_strict) {
3660                 iommu_flush_iotlb_psi(iommu, domain, start_pfn,
3661                                       last_pfn - start_pfn + 1, !freelist, 0);
3662                 /* free iova */
3663                 __free_iova(&domain->iovad, iova);
3664                 dma_free_pagelist(freelist);
3665         } else {
3666                 add_unmap(domain, iova, freelist);
3667                 /*
3668                  * queue up the release of the unmap to save the 1/6th of the
3669                  * cpu used up by the iotlb flush operation...
3670                  */
3671         }
3672 }
3673
3674 static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3675                              size_t size, enum dma_data_direction dir,
3676                              struct dma_attrs *attrs)
3677 {
3678         intel_unmap(dev, dev_addr);
3679 }
3680
3681 static void *intel_alloc_coherent(struct device *dev, size_t size,
3682                                   dma_addr_t *dma_handle, gfp_t flags,
3683                                   struct dma_attrs *attrs)
3684 {
3685         struct page *page = NULL;
3686         int order;
3687
3688         size = PAGE_ALIGN(size);
3689         order = get_order(size);
3690
3691         if (!iommu_no_mapping(dev))
3692                 flags &= ~(GFP_DMA | GFP_DMA32);
3693         else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) {
3694                 if (dev->coherent_dma_mask < DMA_BIT_MASK(32))
3695                         flags |= GFP_DMA;
3696                 else
3697                         flags |= GFP_DMA32;
3698         }
3699
3700         if (gfpflags_allow_blocking(flags)) {
3701                 unsigned int count = size >> PAGE_SHIFT;
3702
3703                 page = dma_alloc_from_contiguous(dev, count, order);
3704                 if (page && iommu_no_mapping(dev) &&
3705                     page_to_phys(page) + size > dev->coherent_dma_mask) {
3706                         dma_release_from_contiguous(dev, page, count);
3707                         page = NULL;
3708                 }
3709         }
3710
3711         if (!page)
3712                 page = alloc_pages(flags, order);
3713         if (!page)
3714                 return NULL;
3715         memset(page_address(page), 0, size);
3716
3717         *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
3718                                          DMA_BIDIRECTIONAL,
3719                                          dev->coherent_dma_mask);
3720         if (*dma_handle)
3721                 return page_address(page);
3722         if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3723                 __free_pages(page, order);
3724
3725         return NULL;
3726 }
3727
3728 static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
3729                                 dma_addr_t dma_handle, struct dma_attrs *attrs)
3730 {
3731         int order;
3732         struct page *page = virt_to_page(vaddr);
3733
3734         size = PAGE_ALIGN(size);
3735         order = get_order(size);
3736
3737         intel_unmap(dev, dma_handle);
3738         if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3739                 __free_pages(page, order);
3740 }
3741
3742 static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
3743                            int nelems, enum dma_data_direction dir,
3744                            struct dma_attrs *attrs)
3745 {
3746         intel_unmap(dev, sglist[0].dma_address);
3747 }
3748
3749 static int intel_nontranslate_map_sg(struct device *hddev,
3750         struct scatterlist *sglist, int nelems, int dir)
3751 {
3752         int i;
3753         struct scatterlist *sg;
3754
3755         for_each_sg(sglist, sg, nelems, i) {
3756                 BUG_ON(!sg_page(sg));
3757                 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
3758                 sg->dma_length = sg->length;
3759         }
3760         return nelems;
3761 }
3762
3763 static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
3764                         enum dma_data_direction dir, struct dma_attrs *attrs)
3765 {
3766         int i;
3767         struct dmar_domain *domain;
3768         size_t size = 0;
3769         int prot = 0;
3770         struct iova *iova = NULL;
3771         int ret;
3772         struct scatterlist *sg;
3773         unsigned long start_vpfn;
3774         struct intel_iommu *iommu;
3775
3776         BUG_ON(dir == DMA_NONE);
3777         if (iommu_no_mapping(dev))
3778                 return intel_nontranslate_map_sg(dev, sglist, nelems, dir);
3779
3780         domain = get_valid_domain_for_dev(dev);
3781         if (!domain)
3782                 return 0;
3783
3784         iommu = domain_get_iommu(domain);
3785
3786         for_each_sg(sglist, sg, nelems, i)
3787                 size += aligned_nrpages(sg->offset, sg->length);
3788
3789         iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
3790                                 *dev->dma_mask);
3791         if (!iova) {
3792                 sglist->dma_length = 0;
3793                 return 0;
3794         }
3795
3796         /*
3797          * Check if DMAR supports zero-length reads on write only
3798          * mappings..
3799          */
3800         if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3801                         !cap_zlr(iommu->cap))
3802                 prot |= DMA_PTE_READ;
3803         if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3804                 prot |= DMA_PTE_WRITE;
3805
3806         start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
3807
3808         ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
3809         if (unlikely(ret)) {
3810                 dma_pte_free_pagetable(domain, start_vpfn,
3811                                        start_vpfn + size - 1);
3812                 __free_iova(&domain->iovad, iova);
3813                 return 0;
3814         }
3815
3816         /* it's a non-present to present mapping. Only flush if caching mode */
3817         if (cap_caching_mode(iommu->cap))
3818                 iommu_flush_iotlb_psi(iommu, domain, start_vpfn, size, 0, 1);
3819         else
3820                 iommu_flush_write_buffer(iommu);
3821
3822         return nelems;
3823 }
3824
3825 static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3826 {
3827         return !dma_addr;
3828 }
3829
3830 struct dma_map_ops intel_dma_ops = {
3831         .alloc = intel_alloc_coherent,
3832         .free = intel_free_coherent,
3833         .map_sg = intel_map_sg,
3834         .unmap_sg = intel_unmap_sg,
3835         .map_page = intel_map_page,
3836         .unmap_page = intel_unmap_page,
3837         .mapping_error = intel_mapping_error,
3838 };
3839
3840 static inline int iommu_domain_cache_init(void)
3841 {
3842         int ret = 0;
3843
3844         iommu_domain_cache = kmem_cache_create("iommu_domain",
3845                                          sizeof(struct dmar_domain),
3846                                          0,
3847                                          SLAB_HWCACHE_ALIGN,
3848
3849                                          NULL);
3850         if (!iommu_domain_cache) {
3851                 pr_err("Couldn't create iommu_domain cache\n");
3852                 ret = -ENOMEM;
3853         }
3854
3855         return ret;
3856 }
3857
3858 static inline int iommu_devinfo_cache_init(void)
3859 {
3860         int ret = 0;
3861
3862         iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3863                                          sizeof(struct device_domain_info),
3864                                          0,
3865                                          SLAB_HWCACHE_ALIGN,
3866                                          NULL);
3867         if (!iommu_devinfo_cache) {
3868                 pr_err("Couldn't create devinfo cache\n");
3869                 ret = -ENOMEM;
3870         }
3871
3872         return ret;
3873 }
3874
3875 static int __init iommu_init_mempool(void)
3876 {
3877         int ret;
3878         ret = iova_cache_get();
3879         if (ret)
3880                 return ret;
3881
3882         ret = iommu_domain_cache_init();
3883         if (ret)
3884                 goto domain_error;
3885
3886         ret = iommu_devinfo_cache_init();
3887         if (!ret)
3888                 return ret;
3889
3890         kmem_cache_destroy(iommu_domain_cache);
3891 domain_error:
3892         iova_cache_put();
3893
3894         return -ENOMEM;
3895 }
3896
3897 static void __init iommu_exit_mempool(void)
3898 {
3899         kmem_cache_destroy(iommu_devinfo_cache);
3900         kmem_cache_destroy(iommu_domain_cache);
3901         iova_cache_put();
3902 }
3903
3904 static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3905 {
3906         struct dmar_drhd_unit *drhd;
3907         u32 vtbar;
3908         int rc;
3909
3910         /* We know that this device on this chipset has its own IOMMU.
3911          * If we find it under a different IOMMU, then the BIOS is lying
3912          * to us. Hope that the IOMMU for this device is actually
3913          * disabled, and it needs no translation...
3914          */
3915         rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3916         if (rc) {
3917                 /* "can't" happen */
3918                 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3919                 return;
3920         }
3921         vtbar &= 0xffff0000;
3922
3923         /* we know that the this iommu should be at offset 0xa000 from vtbar */
3924         drhd = dmar_find_matched_drhd_unit(pdev);
3925         if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3926                             TAINT_FIRMWARE_WORKAROUND,
3927                             "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3928                 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3929 }
3930 DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3931
3932 static void __init init_no_remapping_devices(void)
3933 {
3934         struct dmar_drhd_unit *drhd;
3935         struct device *dev;
3936         int i;
3937
3938         for_each_drhd_unit(drhd) {
3939                 if (!drhd->include_all) {
3940                         for_each_active_dev_scope(drhd->devices,
3941                                                   drhd->devices_cnt, i, dev)
3942                                 break;
3943                         /* ignore DMAR unit if no devices exist */
3944                         if (i == drhd->devices_cnt)
3945                                 drhd->ignored = 1;
3946                 }
3947         }
3948
3949         for_each_active_drhd_unit(drhd) {
3950                 if (drhd->include_all)
3951                         continue;
3952
3953                 for_each_active_dev_scope(drhd->devices,
3954                                           drhd->devices_cnt, i, dev)
3955                         if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
3956                                 break;
3957                 if (i < drhd->devices_cnt)
3958                         continue;
3959
3960                 /* This IOMMU has *only* gfx devices. Either bypass it or
3961                    set the gfx_mapped flag, as appropriate */
3962                 if (dmar_map_gfx) {
3963                         intel_iommu_gfx_mapped = 1;
3964                 } else {
3965                         drhd->ignored = 1;
3966                         for_each_active_dev_scope(drhd->devices,
3967                                                   drhd->devices_cnt, i, dev)
3968                                 dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3969                 }
3970         }
3971 }
3972
3973 #ifdef CONFIG_SUSPEND
3974 static int init_iommu_hw(void)
3975 {
3976         struct dmar_drhd_unit *drhd;
3977         struct intel_iommu *iommu = NULL;
3978
3979         for_each_active_iommu(iommu, drhd)
3980                 if (iommu->qi)
3981                         dmar_reenable_qi(iommu);
3982
3983         for_each_iommu(iommu, drhd) {
3984                 if (drhd->ignored) {
3985                         /*
3986                          * we always have to disable PMRs or DMA may fail on
3987                          * this device
3988                          */
3989                         if (force_on)
3990                                 iommu_disable_protect_mem_regions(iommu);
3991                         continue;
3992                 }
3993         
3994                 iommu_flush_write_buffer(iommu);
3995
3996                 iommu_set_root_entry(iommu);
3997
3998                 iommu->flush.flush_context(iommu, 0, 0, 0,
3999                                            DMA_CCMD_GLOBAL_INVL);
4000                 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4001                 iommu_enable_translation(iommu);
4002                 iommu_disable_protect_mem_regions(iommu);
4003         }
4004
4005         return 0;
4006 }
4007
4008 static void iommu_flush_all(void)
4009 {
4010         struct dmar_drhd_unit *drhd;
4011         struct intel_iommu *iommu;
4012
4013         for_each_active_iommu(iommu, drhd) {
4014                 iommu->flush.flush_context(iommu, 0, 0, 0,
4015                                            DMA_CCMD_GLOBAL_INVL);
4016                 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
4017                                          DMA_TLB_GLOBAL_FLUSH);
4018         }
4019 }
4020
4021 static int iommu_suspend(void)
4022 {
4023         struct dmar_drhd_unit *drhd;
4024         struct intel_iommu *iommu = NULL;
4025         unsigned long flag;
4026
4027         for_each_active_iommu(iommu, drhd) {
4028                 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
4029                                                  GFP_ATOMIC);
4030                 if (!iommu->iommu_state)
4031                         goto nomem;
4032         }
4033
4034         iommu_flush_all();
4035
4036         for_each_active_iommu(iommu, drhd) {
4037                 iommu_disable_translation(iommu);
4038
4039                 raw_spin_lock_irqsave(&iommu->register_lock, flag);
4040
4041                 iommu->iommu_state[SR_DMAR_FECTL_REG] =
4042                         readl(iommu->reg + DMAR_FECTL_REG);
4043                 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
4044                         readl(iommu->reg + DMAR_FEDATA_REG);
4045                 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
4046                         readl(iommu->reg + DMAR_FEADDR_REG);
4047                 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
4048                         readl(iommu->reg + DMAR_FEUADDR_REG);
4049
4050                 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
4051         }
4052         return 0;
4053
4054 nomem:
4055         for_each_active_iommu(iommu, drhd)
4056                 kfree(iommu->iommu_state);
4057
4058         return -ENOMEM;
4059 }
4060
4061 static void iommu_resume(void)
4062 {
4063         struct dmar_drhd_unit *drhd;
4064         struct intel_iommu *iommu = NULL;
4065         unsigned long flag;
4066
4067         if (init_iommu_hw()) {
4068                 if (force_on)
4069                         panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
4070                 else
4071                         WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
4072                 return;
4073         }
4074
4075         for_each_active_iommu(iommu, drhd) {
4076
4077                 raw_spin_lock_irqsave(&iommu->register_lock, flag);
4078
4079                 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
4080                         iommu->reg + DMAR_FECTL_REG);
4081                 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
4082                         iommu->reg + DMAR_FEDATA_REG);
4083                 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
4084                         iommu->reg + DMAR_FEADDR_REG);
4085                 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
4086                         iommu->reg + DMAR_FEUADDR_REG);
4087
4088                 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
4089         }
4090
4091         for_each_active_iommu(iommu, drhd)
4092                 kfree(iommu->iommu_state);
4093 }
4094
4095 static struct syscore_ops iommu_syscore_ops = {
4096         .resume         = iommu_resume,
4097         .suspend        = iommu_suspend,
4098 };
4099
4100 static void __init init_iommu_pm_ops(void)
4101 {
4102         register_syscore_ops(&iommu_syscore_ops);
4103 }
4104
4105 #else
4106 static inline void init_iommu_pm_ops(void) {}
4107 #endif  /* CONFIG_PM */
4108
4109
4110 int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
4111 {
4112         struct acpi_dmar_reserved_memory *rmrr;
4113         struct dmar_rmrr_unit *rmrru;
4114
4115         rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
4116         if (!rmrru)
4117                 return -ENOMEM;
4118
4119         rmrru->hdr = header;
4120         rmrr = (struct acpi_dmar_reserved_memory *)header;
4121         rmrru->base_address = rmrr->base_address;
4122         rmrru->end_address = rmrr->end_address;
4123         rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
4124                                 ((void *)rmrr) + rmrr->header.length,
4125                                 &rmrru->devices_cnt);
4126         if (rmrru->devices_cnt && rmrru->devices == NULL) {
4127                 kfree(rmrru);
4128                 return -ENOMEM;
4129         }
4130
4131         list_add(&rmrru->list, &dmar_rmrr_units);
4132
4133         return 0;
4134 }
4135
4136 static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
4137 {
4138         struct dmar_atsr_unit *atsru;
4139         struct acpi_dmar_atsr *tmp;
4140
4141         list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4142                 tmp = (struct acpi_dmar_atsr *)atsru->hdr;
4143                 if (atsr->segment != tmp->segment)
4144                         continue;
4145                 if (atsr->header.length != tmp->header.length)
4146                         continue;
4147                 if (memcmp(atsr, tmp, atsr->header.length) == 0)
4148                         return atsru;
4149         }
4150
4151         return NULL;
4152 }
4153
4154 int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4155 {
4156         struct acpi_dmar_atsr *atsr;
4157         struct dmar_atsr_unit *atsru;
4158
4159         if (system_state != SYSTEM_BOOTING && !intel_iommu_enabled)
4160                 return 0;
4161
4162         atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4163         atsru = dmar_find_atsr(atsr);
4164         if (atsru)
4165                 return 0;
4166
4167         atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
4168         if (!atsru)
4169                 return -ENOMEM;
4170
4171         /*
4172          * If memory is allocated from slab by ACPI _DSM method, we need to
4173          * copy the memory content because the memory buffer will be freed
4174          * on return.
4175          */
4176         atsru->hdr = (void *)(atsru + 1);
4177         memcpy(atsru->hdr, hdr, hdr->length);
4178         atsru->include_all = atsr->flags & 0x1;
4179         if (!atsru->include_all) {
4180                 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
4181                                 (void *)atsr + atsr->header.length,
4182                                 &atsru->devices_cnt);
4183                 if (atsru->devices_cnt && atsru->devices == NULL) {
4184                         kfree(atsru);
4185                         return -ENOMEM;
4186                 }
4187         }
4188
4189         list_add_rcu(&atsru->list, &dmar_atsr_units);
4190
4191         return 0;
4192 }
4193
4194 static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
4195 {
4196         dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
4197         kfree(atsru);
4198 }
4199
4200 int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4201 {
4202         struct acpi_dmar_atsr *atsr;
4203         struct dmar_atsr_unit *atsru;
4204
4205         atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4206         atsru = dmar_find_atsr(atsr);
4207         if (atsru) {
4208                 list_del_rcu(&atsru->list);
4209                 synchronize_rcu();
4210                 intel_iommu_free_atsr(atsru);
4211         }
4212
4213         return 0;
4214 }
4215
4216 int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4217 {
4218         int i;
4219         struct device *dev;
4220         struct acpi_dmar_atsr *atsr;
4221         struct dmar_atsr_unit *atsru;
4222
4223         atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4224         atsru = dmar_find_atsr(atsr);
4225         if (!atsru)
4226                 return 0;
4227
4228         if (!atsru->include_all && atsru->devices && atsru->devices_cnt) {
4229                 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
4230                                           i, dev)
4231                         return -EBUSY;
4232         }
4233
4234         return 0;
4235 }
4236
4237 static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
4238 {
4239         int sp, ret = 0;
4240         struct intel_iommu *iommu = dmaru->iommu;
4241
4242         if (g_iommus[iommu->seq_id])
4243                 return 0;
4244
4245         if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
4246                 pr_warn("%s: Doesn't support hardware pass through.\n",
4247                         iommu->name);
4248                 return -ENXIO;
4249         }
4250         if (!ecap_sc_support(iommu->ecap) &&
4251             domain_update_iommu_snooping(iommu)) {
4252                 pr_warn("%s: Doesn't support snooping.\n",
4253                         iommu->name);
4254                 return -ENXIO;
4255         }
4256         sp = domain_update_iommu_superpage(iommu) - 1;
4257         if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
4258                 pr_warn("%s: Doesn't support large page.\n",
4259                         iommu->name);
4260                 return -ENXIO;
4261         }
4262
4263         /*
4264          * Disable translation if already enabled prior to OS handover.
4265          */
4266         if (iommu->gcmd & DMA_GCMD_TE)
4267                 iommu_disable_translation(iommu);
4268
4269         g_iommus[iommu->seq_id] = iommu;
4270         ret = iommu_init_domains(iommu);
4271         if (ret == 0)
4272                 ret = iommu_alloc_root_entry(iommu);
4273         if (ret)
4274                 goto out;
4275
4276 #ifdef CONFIG_INTEL_IOMMU_SVM
4277         if (pasid_enabled(iommu))
4278                 intel_svm_alloc_pasid_tables(iommu);
4279 #endif
4280
4281         if (dmaru->ignored) {
4282                 /*
4283                  * we always have to disable PMRs or DMA may fail on this device
4284                  */
4285                 if (force_on)
4286                         iommu_disable_protect_mem_regions(iommu);
4287                 return 0;
4288         }
4289
4290         intel_iommu_init_qi(iommu);
4291         iommu_flush_write_buffer(iommu);
4292
4293 #ifdef CONFIG_INTEL_IOMMU_SVM
4294         if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) {
4295                 ret = intel_svm_enable_prq(iommu);
4296                 if (ret)
4297                         goto disable_iommu;
4298         }
4299 #endif
4300         ret = dmar_set_interrupt(iommu);
4301         if (ret)
4302                 goto disable_iommu;
4303
4304         iommu_set_root_entry(iommu);
4305         iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
4306         iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4307         iommu_enable_translation(iommu);
4308
4309         iommu_disable_protect_mem_regions(iommu);
4310         return 0;
4311
4312 disable_iommu:
4313         disable_dmar_iommu(iommu);
4314 out:
4315         free_dmar_iommu(iommu);
4316         return ret;
4317 }
4318
4319 int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
4320 {
4321         int ret = 0;
4322         struct intel_iommu *iommu = dmaru->iommu;
4323
4324         if (!intel_iommu_enabled)
4325                 return 0;
4326         if (iommu == NULL)
4327                 return -EINVAL;
4328
4329         if (insert) {
4330                 ret = intel_iommu_add(dmaru);
4331         } else {
4332                 disable_dmar_iommu(iommu);
4333                 free_dmar_iommu(iommu);
4334         }
4335
4336         return ret;
4337 }
4338
4339 static void intel_iommu_free_dmars(void)
4340 {
4341         struct dmar_rmrr_unit *rmrru, *rmrr_n;
4342         struct dmar_atsr_unit *atsru, *atsr_n;
4343
4344         list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
4345                 list_del(&rmrru->list);
4346                 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
4347                 kfree(rmrru);
4348         }
4349
4350         list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
4351                 list_del(&atsru->list);
4352                 intel_iommu_free_atsr(atsru);
4353         }
4354 }
4355
4356 int dmar_find_matched_atsr_unit(struct pci_dev *dev)
4357 {
4358         int i, ret = 1;
4359         struct pci_bus *bus;
4360         struct pci_dev *bridge = NULL;
4361         struct device *tmp;
4362         struct acpi_dmar_atsr *atsr;
4363         struct dmar_atsr_unit *atsru;
4364
4365         dev = pci_physfn(dev);
4366         for (bus = dev->bus; bus; bus = bus->parent) {
4367                 bridge = bus->self;
4368                 /* If it's an integrated device, allow ATS */
4369                 if (!bridge)
4370                         return 1;
4371                 /* Connected via non-PCIe: no ATS */
4372                 if (!pci_is_pcie(bridge) ||
4373                     pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
4374                         return 0;
4375                 /* If we found the root port, look it up in the ATSR */
4376                 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
4377                         break;
4378         }
4379
4380         rcu_read_lock();
4381         list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4382                 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4383                 if (atsr->segment != pci_domain_nr(dev->bus))
4384                         continue;
4385
4386                 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
4387                         if (tmp == &bridge->dev)
4388                                 goto out;
4389
4390                 if (atsru->include_all)
4391                         goto out;
4392         }
4393         ret = 0;
4394 out:
4395         rcu_read_unlock();
4396
4397         return ret;
4398 }
4399
4400 int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
4401 {
4402         int ret = 0;
4403         struct dmar_rmrr_unit *rmrru;
4404         struct dmar_atsr_unit *atsru;
4405         struct acpi_dmar_atsr *atsr;
4406         struct acpi_dmar_reserved_memory *rmrr;
4407
4408         if (!intel_iommu_enabled && system_state != SYSTEM_BOOTING)
4409                 return 0;
4410
4411         list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
4412                 rmrr = container_of(rmrru->hdr,
4413                                     struct acpi_dmar_reserved_memory, header);
4414                 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4415                         ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
4416                                 ((void *)rmrr) + rmrr->header.length,
4417                                 rmrr->segment, rmrru->devices,
4418                                 rmrru->devices_cnt);
4419                         if(ret < 0)
4420                                 return ret;
4421                 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
4422                         dmar_remove_dev_scope(info, rmrr->segment,
4423                                 rmrru->devices, rmrru->devices_cnt);
4424                 }
4425         }
4426
4427         list_for_each_entry(atsru, &dmar_atsr_units, list) {
4428                 if (atsru->include_all)
4429                         continue;
4430
4431                 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4432                 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4433                         ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
4434                                         (void *)atsr + atsr->header.length,
4435                                         atsr->segment, atsru->devices,
4436                                         atsru->devices_cnt);
4437                         if (ret > 0)
4438                                 break;
4439                         else if(ret < 0)
4440                                 return ret;
4441                 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
4442                         if (dmar_remove_dev_scope(info, atsr->segment,
4443                                         atsru->devices, atsru->devices_cnt))
4444                                 break;
4445                 }
4446         }
4447
4448         return 0;
4449 }
4450
4451 /*
4452  * Here we only respond to action of unbound device from driver.
4453  *
4454  * Added device is not attached to its DMAR domain here yet. That will happen
4455  * when mapping the device to iova.
4456  */
4457 static int device_notifier(struct notifier_block *nb,
4458                                   unsigned long action, void *data)
4459 {
4460         struct device *dev = data;
4461         struct dmar_domain *domain;
4462
4463         if (iommu_dummy(dev))
4464                 return 0;
4465
4466         if (action != BUS_NOTIFY_REMOVED_DEVICE)
4467                 return 0;
4468
4469         domain = find_domain(dev);
4470         if (!domain)
4471                 return 0;
4472
4473         dmar_remove_one_dev_info(domain, dev);
4474         if (!domain_type_is_vm_or_si(domain) && list_empty(&domain->devices))
4475                 domain_exit(domain);
4476
4477         return 0;
4478 }
4479
4480 static struct notifier_block device_nb = {
4481         .notifier_call = device_notifier,
4482 };
4483
4484 static int intel_iommu_memory_notifier(struct notifier_block *nb,
4485                                        unsigned long val, void *v)
4486 {
4487         struct memory_notify *mhp = v;
4488         unsigned long long start, end;
4489         unsigned long start_vpfn, last_vpfn;
4490
4491         switch (val) {
4492         case MEM_GOING_ONLINE:
4493                 start = mhp->start_pfn << PAGE_SHIFT;
4494                 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
4495                 if (iommu_domain_identity_map(si_domain, start, end)) {
4496                         pr_warn("Failed to build identity map for [%llx-%llx]\n",
4497                                 start, end);
4498                         return NOTIFY_BAD;
4499                 }
4500                 break;
4501
4502         case MEM_OFFLINE:
4503         case MEM_CANCEL_ONLINE:
4504                 start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
4505                 last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
4506                 while (start_vpfn <= last_vpfn) {
4507                         struct iova *iova;
4508                         struct dmar_drhd_unit *drhd;
4509                         struct intel_iommu *iommu;
4510                         struct page *freelist;
4511
4512                         iova = find_iova(&si_domain->iovad, start_vpfn);
4513                         if (iova == NULL) {
4514                                 pr_debug("Failed get IOVA for PFN %lx\n",
4515                                          start_vpfn);
4516                                 break;
4517                         }
4518
4519                         iova = split_and_remove_iova(&si_domain->iovad, iova,
4520                                                      start_vpfn, last_vpfn);
4521                         if (iova == NULL) {
4522                                 pr_warn("Failed to split IOVA PFN [%lx-%lx]\n",
4523                                         start_vpfn, last_vpfn);
4524                                 return NOTIFY_BAD;
4525                         }
4526
4527                         freelist = domain_unmap(si_domain, iova->pfn_lo,
4528                                                iova->pfn_hi);
4529
4530                         rcu_read_lock();
4531                         for_each_active_iommu(iommu, drhd)
4532                                 iommu_flush_iotlb_psi(iommu, si_domain,
4533                                         iova->pfn_lo, iova_size(iova),
4534                                         !freelist, 0);
4535                         rcu_read_unlock();
4536                         dma_free_pagelist(freelist);
4537
4538                         start_vpfn = iova->pfn_hi + 1;
4539                         free_iova_mem(iova);
4540                 }
4541                 break;
4542         }
4543
4544         return NOTIFY_OK;
4545 }
4546
4547 static struct notifier_block intel_iommu_memory_nb = {
4548         .notifier_call = intel_iommu_memory_notifier,
4549         .priority = 0
4550 };
4551
4552
4553 static ssize_t intel_iommu_show_version(struct device *dev,
4554                                         struct device_attribute *attr,
4555                                         char *buf)
4556 {
4557         struct intel_iommu *iommu = dev_get_drvdata(dev);
4558         u32 ver = readl(iommu->reg + DMAR_VER_REG);
4559         return sprintf(buf, "%d:%d\n",
4560                        DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
4561 }
4562 static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
4563
4564 static ssize_t intel_iommu_show_address(struct device *dev,
4565                                         struct device_attribute *attr,
4566                                         char *buf)
4567 {
4568         struct intel_iommu *iommu = dev_get_drvdata(dev);
4569         return sprintf(buf, "%llx\n", iommu->reg_phys);
4570 }
4571 static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
4572
4573 static ssize_t intel_iommu_show_cap(struct device *dev,
4574                                     struct device_attribute *attr,
4575                                     char *buf)
4576 {
4577         struct intel_iommu *iommu = dev_get_drvdata(dev);
4578         return sprintf(buf, "%llx\n", iommu->cap);
4579 }
4580 static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
4581
4582 static ssize_t intel_iommu_show_ecap(struct device *dev,
4583                                     struct device_attribute *attr,
4584                                     char *buf)
4585 {
4586         struct intel_iommu *iommu = dev_get_drvdata(dev);
4587         return sprintf(buf, "%llx\n", iommu->ecap);
4588 }
4589 static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
4590
4591 static ssize_t intel_iommu_show_ndoms(struct device *dev,
4592                                       struct device_attribute *attr,
4593                                       char *buf)
4594 {
4595         struct intel_iommu *iommu = dev_get_drvdata(dev);
4596         return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
4597 }
4598 static DEVICE_ATTR(domains_supported, S_IRUGO, intel_iommu_show_ndoms, NULL);
4599
4600 static ssize_t intel_iommu_show_ndoms_used(struct device *dev,
4601                                            struct device_attribute *attr,
4602                                            char *buf)
4603 {
4604         struct intel_iommu *iommu = dev_get_drvdata(dev);
4605         return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
4606                                                   cap_ndoms(iommu->cap)));
4607 }
4608 static DEVICE_ATTR(domains_used, S_IRUGO, intel_iommu_show_ndoms_used, NULL);
4609
4610 static struct attribute *intel_iommu_attrs[] = {
4611         &dev_attr_version.attr,
4612         &dev_attr_address.attr,
4613         &dev_attr_cap.attr,
4614         &dev_attr_ecap.attr,
4615         &dev_attr_domains_supported.attr,
4616         &dev_attr_domains_used.attr,
4617         NULL,
4618 };
4619
4620 static struct attribute_group intel_iommu_group = {
4621         .name = "intel-iommu",
4622         .attrs = intel_iommu_attrs,
4623 };
4624
4625 const struct attribute_group *intel_iommu_groups[] = {
4626         &intel_iommu_group,
4627         NULL,
4628 };
4629
4630 int __init intel_iommu_init(void)
4631 {
4632         int ret = -ENODEV;
4633         struct dmar_drhd_unit *drhd;
4634         struct intel_iommu *iommu;
4635
4636         /* VT-d is required for a TXT/tboot launch, so enforce that */
4637         force_on = tboot_force_iommu();
4638
4639         if (iommu_init_mempool()) {
4640                 if (force_on)
4641                         panic("tboot: Failed to initialize iommu memory\n");
4642                 return -ENOMEM;
4643         }
4644
4645         down_write(&dmar_global_lock);
4646         if (dmar_table_init()) {
4647                 if (force_on)
4648                         panic("tboot: Failed to initialize DMAR table\n");
4649                 goto out_free_dmar;
4650         }
4651
4652         if (dmar_dev_scope_init() < 0) {
4653                 if (force_on)
4654                         panic("tboot: Failed to initialize DMAR device scope\n");
4655                 goto out_free_dmar;
4656         }
4657
4658         if (no_iommu || dmar_disabled)
4659                 goto out_free_dmar;
4660
4661         if (list_empty(&dmar_rmrr_units))
4662                 pr_info("No RMRR found\n");
4663
4664         if (list_empty(&dmar_atsr_units))
4665                 pr_info("No ATSR found\n");
4666
4667         if (dmar_init_reserved_ranges()) {
4668                 if (force_on)
4669                         panic("tboot: Failed to reserve iommu ranges\n");
4670                 goto out_free_reserved_range;
4671         }
4672
4673         init_no_remapping_devices();
4674
4675         ret = init_dmars();
4676         if (ret) {
4677                 if (force_on)
4678                         panic("tboot: Failed to initialize DMARs\n");
4679                 pr_err("Initialization failed\n");
4680                 goto out_free_reserved_range;
4681         }
4682         up_write(&dmar_global_lock);
4683         pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
4684
4685         init_timer(&unmap_timer);
4686 #ifdef CONFIG_SWIOTLB
4687         swiotlb = 0;
4688 #endif
4689         dma_ops = &intel_dma_ops;
4690
4691         init_iommu_pm_ops();
4692
4693         for_each_active_iommu(iommu, drhd)
4694                 iommu->iommu_dev = iommu_device_create(NULL, iommu,
4695                                                        intel_iommu_groups,
4696                                                        "%s", iommu->name);
4697
4698         bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
4699         bus_register_notifier(&pci_bus_type, &device_nb);
4700         if (si_domain && !hw_pass_through)
4701                 register_memory_notifier(&intel_iommu_memory_nb);
4702
4703         intel_iommu_enabled = 1;
4704
4705         return 0;
4706
4707 out_free_reserved_range:
4708         put_iova_domain(&reserved_iova_list);
4709 out_free_dmar:
4710         intel_iommu_free_dmars();
4711         up_write(&dmar_global_lock);
4712         iommu_exit_mempool();
4713         return ret;
4714 }
4715
4716 static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
4717 {
4718         struct intel_iommu *iommu = opaque;
4719
4720         domain_context_clear_one(iommu, PCI_BUS_NUM(alias), alias & 0xff);
4721         return 0;
4722 }
4723
4724 /*
4725  * NB - intel-iommu lacks any sort of reference counting for the users of
4726  * dependent devices.  If multiple endpoints have intersecting dependent
4727  * devices, unbinding the driver from any one of them will possibly leave
4728  * the others unable to operate.
4729  */
4730 static void domain_context_clear(struct intel_iommu *iommu, struct device *dev)
4731 {
4732         if (!iommu || !dev || !dev_is_pci(dev))
4733                 return;
4734
4735         pci_for_each_dma_alias(to_pci_dev(dev), &domain_context_clear_one_cb, iommu);
4736 }
4737
4738 static void __dmar_remove_one_dev_info(struct device_domain_info *info)
4739 {
4740         struct intel_iommu *iommu;
4741         unsigned long flags;
4742
4743         assert_spin_locked(&device_domain_lock);
4744
4745         if (WARN_ON(!info))
4746                 return;
4747
4748         iommu = info->iommu;
4749
4750         if (info->dev) {
4751                 iommu_disable_dev_iotlb(info);
4752                 domain_context_clear(iommu, info->dev);
4753         }
4754
4755         unlink_domain_info(info);
4756
4757         spin_lock_irqsave(&iommu->lock, flags);
4758         domain_detach_iommu(info->domain, iommu);
4759         spin_unlock_irqrestore(&iommu->lock, flags);
4760
4761         free_devinfo_mem(info);
4762 }
4763
4764 static void dmar_remove_one_dev_info(struct dmar_domain *domain,
4765                                      struct device *dev)
4766 {
4767         struct device_domain_info *info;
4768         unsigned long flags;
4769
4770         spin_lock_irqsave(&device_domain_lock, flags);
4771         info = dev->archdata.iommu;
4772         __dmar_remove_one_dev_info(info);
4773         spin_unlock_irqrestore(&device_domain_lock, flags);
4774 }
4775
4776 static int md_domain_init(struct dmar_domain *domain, int guest_width)
4777 {
4778         int adjust_width;
4779
4780         init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN,
4781                         DMA_32BIT_PFN);
4782         domain_reserve_special_ranges(domain);
4783
4784         /* calculate AGAW */
4785         domain->gaw = guest_width;
4786         adjust_width = guestwidth_to_adjustwidth(guest_width);
4787         domain->agaw = width_to_agaw(adjust_width);
4788
4789         domain->iommu_coherency = 0;
4790         domain->iommu_snooping = 0;
4791         domain->iommu_superpage = 0;
4792         domain->max_addr = 0;
4793
4794         /* always allocate the top pgd */
4795         domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
4796         if (!domain->pgd)
4797                 return -ENOMEM;
4798         domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
4799         return 0;
4800 }
4801
4802 static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
4803 {
4804         struct dmar_domain *dmar_domain;
4805         struct iommu_domain *domain;
4806
4807         if (type != IOMMU_DOMAIN_UNMANAGED)
4808                 return NULL;
4809
4810         dmar_domain = alloc_domain(DOMAIN_FLAG_VIRTUAL_MACHINE);
4811         if (!dmar_domain) {
4812                 pr_err("Can't allocate dmar_domain\n");
4813                 return NULL;
4814         }
4815         if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
4816                 pr_err("Domain initialization failed\n");
4817                 domain_exit(dmar_domain);
4818                 return NULL;
4819         }
4820         domain_update_iommu_cap(dmar_domain);
4821
4822         domain = &dmar_domain->domain;
4823         domain->geometry.aperture_start = 0;
4824         domain->geometry.aperture_end   = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
4825         domain->geometry.force_aperture = true;
4826
4827         return domain;
4828 }
4829
4830 static void intel_iommu_domain_free(struct iommu_domain *domain)
4831 {
4832         domain_exit(to_dmar_domain(domain));
4833 }
4834
4835 static int intel_iommu_attach_device(struct iommu_domain *domain,
4836                                      struct device *dev)
4837 {
4838         struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4839         struct intel_iommu *iommu;
4840         int addr_width;
4841         u8 bus, devfn;
4842
4843         if (device_is_rmrr_locked(dev)) {
4844                 dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement.  Contact your platform vendor.\n");
4845                 return -EPERM;
4846         }
4847
4848         /* normally dev is not mapped */
4849         if (unlikely(domain_context_mapped(dev))) {
4850                 struct dmar_domain *old_domain;
4851
4852                 old_domain = find_domain(dev);
4853                 if (old_domain) {
4854                         rcu_read_lock();
4855                         dmar_remove_one_dev_info(old_domain, dev);
4856                         rcu_read_unlock();
4857
4858                         if (!domain_type_is_vm_or_si(old_domain) &&
4859                              list_empty(&old_domain->devices))
4860                                 domain_exit(old_domain);
4861                 }
4862         }
4863
4864         iommu = device_to_iommu(dev, &bus, &devfn);
4865         if (!iommu)
4866                 return -ENODEV;
4867
4868         /* check if this iommu agaw is sufficient for max mapped address */
4869         addr_width = agaw_to_width(iommu->agaw);
4870         if (addr_width > cap_mgaw(iommu->cap))
4871                 addr_width = cap_mgaw(iommu->cap);
4872
4873         if (dmar_domain->max_addr > (1LL << addr_width)) {
4874                 pr_err("%s: iommu width (%d) is not "
4875                        "sufficient for the mapped address (%llx)\n",
4876                        __func__, addr_width, dmar_domain->max_addr);
4877                 return -EFAULT;
4878         }
4879         dmar_domain->gaw = addr_width;
4880
4881         /*
4882          * Knock out extra levels of page tables if necessary
4883          */
4884         while (iommu->agaw < dmar_domain->agaw) {
4885                 struct dma_pte *pte;
4886
4887                 pte = dmar_domain->pgd;
4888                 if (dma_pte_present(pte)) {
4889                         dmar_domain->pgd = (struct dma_pte *)
4890                                 phys_to_virt(dma_pte_addr(pte));
4891                         free_pgtable_page(pte);
4892                 }
4893                 dmar_domain->agaw--;
4894         }
4895
4896         return domain_add_dev_info(dmar_domain, dev);
4897 }
4898
4899 static void intel_iommu_detach_device(struct iommu_domain *domain,
4900                                       struct device *dev)
4901 {
4902         dmar_remove_one_dev_info(to_dmar_domain(domain), dev);
4903 }
4904
4905 static int intel_iommu_map(struct iommu_domain *domain,
4906                            unsigned long iova, phys_addr_t hpa,
4907                            size_t size, int iommu_prot)
4908 {
4909         struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4910         u64 max_addr;
4911         int prot = 0;
4912         int ret;
4913
4914         if (iommu_prot & IOMMU_READ)
4915                 prot |= DMA_PTE_READ;
4916         if (iommu_prot & IOMMU_WRITE)
4917                 prot |= DMA_PTE_WRITE;
4918         if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
4919                 prot |= DMA_PTE_SNP;
4920
4921         max_addr = iova + size;
4922         if (dmar_domain->max_addr < max_addr) {
4923                 u64 end;
4924
4925                 /* check if minimum agaw is sufficient for mapped address */
4926                 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
4927                 if (end < max_addr) {
4928                         pr_err("%s: iommu width (%d) is not "
4929                                "sufficient for the mapped address (%llx)\n",
4930                                __func__, dmar_domain->gaw, max_addr);
4931                         return -EFAULT;
4932                 }
4933                 dmar_domain->max_addr = max_addr;
4934         }
4935         /* Round up size to next multiple of PAGE_SIZE, if it and
4936            the low bits of hpa would take us onto the next page */
4937         size = aligned_nrpages(hpa, size);
4938         ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
4939                                  hpa >> VTD_PAGE_SHIFT, size, prot);
4940         return ret;
4941 }
4942
4943 static size_t intel_iommu_unmap(struct iommu_domain *domain,
4944                                 unsigned long iova, size_t size)
4945 {
4946         struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4947         struct page *freelist = NULL;
4948         struct intel_iommu *iommu;
4949         unsigned long start_pfn, last_pfn;
4950         unsigned int npages;
4951         int iommu_id, level = 0;
4952
4953         /* Cope with horrid API which requires us to unmap more than the
4954            size argument if it happens to be a large-page mapping. */
4955         BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level));
4956
4957         if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
4958                 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
4959
4960         start_pfn = iova >> VTD_PAGE_SHIFT;
4961         last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
4962
4963         freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
4964
4965         npages = last_pfn - start_pfn + 1;
4966
4967         for_each_domain_iommu(iommu_id, dmar_domain) {
4968                 iommu = g_iommus[iommu_id];
4969
4970                 iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
4971                                       start_pfn, npages, !freelist, 0);
4972         }
4973
4974         dma_free_pagelist(freelist);
4975
4976         if (dmar_domain->max_addr == iova + size)
4977                 dmar_domain->max_addr = iova;
4978
4979         return size;
4980 }
4981
4982 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
4983                                             dma_addr_t iova)
4984 {
4985         struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4986         struct dma_pte *pte;
4987         int level = 0;
4988         u64 phys = 0;
4989
4990         pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
4991         if (pte)
4992                 phys = dma_pte_addr(pte);
4993
4994         return phys;
4995 }
4996
4997 static bool intel_iommu_capable(enum iommu_cap cap)
4998 {
4999         if (cap == IOMMU_CAP_CACHE_COHERENCY)
5000                 return domain_update_iommu_snooping(NULL) == 1;
5001         if (cap == IOMMU_CAP_INTR_REMAP)
5002                 return irq_remapping_enabled == 1;
5003
5004         return false;
5005 }
5006
5007 static int intel_iommu_add_device(struct device *dev)
5008 {
5009         struct intel_iommu *iommu;
5010         struct iommu_group *group;
5011         u8 bus, devfn;
5012
5013         iommu = device_to_iommu(dev, &bus, &devfn);
5014         if (!iommu)
5015                 return -ENODEV;
5016
5017         iommu_device_link(iommu->iommu_dev, dev);
5018
5019         group = iommu_group_get_for_dev(dev);
5020
5021         if (IS_ERR(group))
5022                 return PTR_ERR(group);
5023
5024         iommu_group_put(group);
5025         return 0;
5026 }
5027
5028 static void intel_iommu_remove_device(struct device *dev)
5029 {
5030         struct intel_iommu *iommu;
5031         u8 bus, devfn;
5032
5033         iommu = device_to_iommu(dev, &bus, &devfn);
5034         if (!iommu)
5035                 return;
5036
5037         iommu_group_remove_device(dev);
5038
5039         iommu_device_unlink(iommu->iommu_dev, dev);
5040 }
5041
5042 #ifdef CONFIG_INTEL_IOMMU_SVM
5043 #define MAX_NR_PASID_BITS (20)
5044 static inline unsigned long intel_iommu_get_pts(struct intel_iommu *iommu)
5045 {
5046         /*
5047          * Convert ecap_pss to extend context entry pts encoding, also
5048          * respect the soft pasid_max value set by the iommu.
5049          * - number of PASID bits = ecap_pss + 1
5050          * - number of PASID table entries = 2^(pts + 5)
5051          * Therefore, pts = ecap_pss - 4
5052          * e.g. KBL ecap_pss = 0x13, PASID has 20 bits, pts = 15
5053          */
5054         if (ecap_pss(iommu->ecap) < 5)
5055                 return 0;
5056
5057         /* pasid_max is encoded as actual number of entries not the bits */
5058         return find_first_bit((unsigned long *)&iommu->pasid_max,
5059                         MAX_NR_PASID_BITS) - 5;
5060 }
5061
5062 int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev)
5063 {
5064         struct device_domain_info *info;
5065         struct context_entry *context;
5066         struct dmar_domain *domain;
5067         unsigned long flags;
5068         u64 ctx_lo;
5069         int ret;
5070
5071         domain = get_valid_domain_for_dev(sdev->dev);
5072         if (!domain)
5073                 return -EINVAL;
5074
5075         spin_lock_irqsave(&device_domain_lock, flags);
5076         spin_lock(&iommu->lock);
5077
5078         ret = -EINVAL;
5079         info = sdev->dev->archdata.iommu;
5080         if (!info || !info->pasid_supported)
5081                 goto out;
5082
5083         context = iommu_context_addr(iommu, info->bus, info->devfn, 0);
5084         if (WARN_ON(!context))
5085                 goto out;
5086
5087         ctx_lo = context[0].lo;
5088
5089         sdev->did = domain->iommu_did[iommu->seq_id];
5090         sdev->sid = PCI_DEVID(info->bus, info->devfn);
5091
5092         if (!(ctx_lo & CONTEXT_PASIDE)) {
5093                 context[1].hi = (u64)virt_to_phys(iommu->pasid_state_table);
5094                 context[1].lo = (u64)virt_to_phys(iommu->pasid_table) |
5095                         intel_iommu_get_pts(iommu);
5096
5097                 wmb();
5098                 /* CONTEXT_TT_MULTI_LEVEL and CONTEXT_TT_DEV_IOTLB are both
5099                  * extended to permit requests-with-PASID if the PASIDE bit
5100                  * is set. which makes sense. For CONTEXT_TT_PASS_THROUGH,
5101                  * however, the PASIDE bit is ignored and requests-with-PASID
5102                  * are unconditionally blocked. Which makes less sense.
5103                  * So convert from CONTEXT_TT_PASS_THROUGH to one of the new
5104                  * "guest mode" translation types depending on whether ATS
5105                  * is available or not. Annoyingly, we can't use the new
5106                  * modes *unless* PASIDE is set. */
5107                 if ((ctx_lo & CONTEXT_TT_MASK) == (CONTEXT_TT_PASS_THROUGH << 2)) {
5108                         ctx_lo &= ~CONTEXT_TT_MASK;
5109                         if (info->ats_supported)
5110                                 ctx_lo |= CONTEXT_TT_PT_PASID_DEV_IOTLB << 2;
5111                         else
5112                                 ctx_lo |= CONTEXT_TT_PT_PASID << 2;
5113                 }
5114                 ctx_lo |= CONTEXT_PASIDE;
5115                 if (iommu->pasid_state_table)
5116                         ctx_lo |= CONTEXT_DINVE;
5117                 if (info->pri_supported)
5118                         ctx_lo |= CONTEXT_PRS;
5119                 context[0].lo = ctx_lo;
5120                 wmb();
5121                 iommu->flush.flush_context(iommu, sdev->did, sdev->sid,
5122                                            DMA_CCMD_MASK_NOBIT,
5123                                            DMA_CCMD_DEVICE_INVL);
5124         }
5125
5126         /* Enable PASID support in the device, if it wasn't already */
5127         if (!info->pasid_enabled)
5128                 iommu_enable_dev_iotlb(info);
5129
5130         if (info->ats_enabled) {
5131                 sdev->dev_iotlb = 1;
5132                 sdev->qdep = info->ats_qdep;
5133                 if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS)
5134                         sdev->qdep = 0;
5135         }
5136         ret = 0;
5137
5138  out:
5139         spin_unlock(&iommu->lock);
5140         spin_unlock_irqrestore(&device_domain_lock, flags);
5141
5142         return ret;
5143 }
5144
5145 struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
5146 {
5147         struct intel_iommu *iommu;
5148         u8 bus, devfn;
5149
5150         if (iommu_dummy(dev)) {
5151                 dev_warn(dev,
5152                          "No IOMMU translation for device; cannot enable SVM\n");
5153                 return NULL;
5154         }
5155
5156         iommu = device_to_iommu(dev, &bus, &devfn);
5157         if ((!iommu)) {
5158                 dev_err(dev, "No IOMMU for device; cannot enable SVM\n");
5159                 return NULL;
5160         }
5161
5162         if (!iommu->pasid_table) {
5163                 dev_err(dev, "PASID not enabled on IOMMU; cannot enable SVM\n");
5164                 return NULL;
5165         }
5166
5167         return iommu;
5168 }
5169 #endif /* CONFIG_INTEL_IOMMU_SVM */
5170
5171 static const struct iommu_ops intel_iommu_ops = {
5172         .capable        = intel_iommu_capable,
5173         .domain_alloc   = intel_iommu_domain_alloc,
5174         .domain_free    = intel_iommu_domain_free,
5175         .attach_dev     = intel_iommu_attach_device,
5176         .detach_dev     = intel_iommu_detach_device,
5177         .map            = intel_iommu_map,
5178         .unmap          = intel_iommu_unmap,
5179         .map_sg         = default_iommu_map_sg,
5180         .iova_to_phys   = intel_iommu_iova_to_phys,
5181         .add_device     = intel_iommu_add_device,
5182         .remove_device  = intel_iommu_remove_device,
5183         .device_group   = pci_device_group,
5184         .pgsize_bitmap  = INTEL_IOMMU_PGSIZES,
5185 };
5186
5187 static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
5188 {
5189         /* G4x/GM45 integrated gfx dmar support is totally busted. */
5190         pr_info("Disabling IOMMU for graphics on this chipset\n");
5191         dmar_map_gfx = 0;
5192 }
5193
5194 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
5195 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
5196 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
5197 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
5198 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
5199 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
5200 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
5201
5202 static void quirk_iommu_rwbf(struct pci_dev *dev)
5203 {
5204         /*
5205          * Mobile 4 Series Chipset neglects to set RWBF capability,
5206          * but needs it. Same seems to hold for the desktop versions.
5207          */
5208         pr_info("Forcing write-buffer flush capability\n");
5209         rwbf_quirk = 1;
5210 }
5211
5212 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
5213 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
5214 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
5215 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
5216 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
5217 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
5218 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
5219
5220 #define GGC 0x52
5221 #define GGC_MEMORY_SIZE_MASK    (0xf << 8)
5222 #define GGC_MEMORY_SIZE_NONE    (0x0 << 8)
5223 #define GGC_MEMORY_SIZE_1M      (0x1 << 8)
5224 #define GGC_MEMORY_SIZE_2M      (0x3 << 8)
5225 #define GGC_MEMORY_VT_ENABLED   (0x8 << 8)
5226 #define GGC_MEMORY_SIZE_2M_VT   (0x9 << 8)
5227 #define GGC_MEMORY_SIZE_3M_VT   (0xa << 8)
5228 #define GGC_MEMORY_SIZE_4M_VT   (0xb << 8)
5229
5230 static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
5231 {
5232         unsigned short ggc;
5233
5234         if (pci_read_config_word(dev, GGC, &ggc))
5235                 return;
5236
5237         if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
5238                 pr_info("BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
5239                 dmar_map_gfx = 0;
5240         } else if (dmar_map_gfx) {
5241                 /* we have to ensure the gfx device is idle before we flush */
5242                 pr_info("Disabling batched IOTLB flush on Ironlake\n");
5243                 intel_iommu_strict = 1;
5244        }
5245 }
5246 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
5247 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
5248 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
5249 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
5250
5251 /* On Tylersburg chipsets, some BIOSes have been known to enable the
5252    ISOCH DMAR unit for the Azalia sound device, but not give it any
5253    TLB entries, which causes it to deadlock. Check for that.  We do
5254    this in a function called from init_dmars(), instead of in a PCI
5255    quirk, because we don't want to print the obnoxious "BIOS broken"
5256    message if VT-d is actually disabled.
5257 */
5258 static void __init check_tylersburg_isoch(void)
5259 {
5260         struct pci_dev *pdev;
5261         uint32_t vtisochctrl;
5262
5263         /* If there's no Azalia in the system anyway, forget it. */
5264         pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
5265         if (!pdev)
5266                 return;
5267         pci_dev_put(pdev);
5268
5269         /* System Management Registers. Might be hidden, in which case
5270            we can't do the sanity check. But that's OK, because the
5271            known-broken BIOSes _don't_ actually hide it, so far. */
5272         pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
5273         if (!pdev)
5274                 return;
5275
5276         if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
5277                 pci_dev_put(pdev);
5278                 return;
5279         }
5280
5281         pci_dev_put(pdev);
5282
5283         /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
5284         if (vtisochctrl & 1)
5285                 return;
5286
5287         /* Drop all bits other than the number of TLB entries */
5288         vtisochctrl &= 0x1c;
5289
5290         /* If we have the recommended number of TLB entries (16), fine. */
5291         if (vtisochctrl == 0x10)
5292                 return;
5293
5294         /* Zero TLB entries? You get to ride the short bus to school. */
5295         if (!vtisochctrl) {
5296                 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
5297                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
5298                      dmi_get_system_info(DMI_BIOS_VENDOR),
5299                      dmi_get_system_info(DMI_BIOS_VERSION),
5300                      dmi_get_system_info(DMI_PRODUCT_VERSION));
5301                 iommu_identity_mapping |= IDENTMAP_AZALIA;
5302                 return;
5303         }
5304
5305         pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
5306                vtisochctrl);
5307 }