Merge commit 'ed30f24e8d07d30aa3e69d1f508f4d7bd2e8ea14' of git://git.linaro.org/landi...
[firefly-linux-kernel-4.4.55.git] / drivers / edac / edac_mc.c
1 /*
2  * edac_mc kernel module
3  * (C) 2005, 2006 Linux Networx (http://lnxi.com)
4  * This file may be distributed under the terms of the
5  * GNU General Public License.
6  *
7  * Written by Thayne Harbaugh
8  * Based on work by Dan Hollis <goemon at anime dot net> and others.
9  *      http://www.anime.net/~goemon/linux-ecc/
10  *
11  * Modified by Dave Peterson and Doug Thompson
12  *
13  */
14
15 #include <linux/module.h>
16 #include <linux/proc_fs.h>
17 #include <linux/kernel.h>
18 #include <linux/types.h>
19 #include <linux/smp.h>
20 #include <linux/init.h>
21 #include <linux/sysctl.h>
22 #include <linux/highmem.h>
23 #include <linux/timer.h>
24 #include <linux/slab.h>
25 #include <linux/jiffies.h>
26 #include <linux/spinlock.h>
27 #include <linux/list.h>
28 #include <linux/ctype.h>
29 #include <linux/edac.h>
30 #include <linux/bitops.h>
31 #include <asm/uaccess.h>
32 #include <asm/page.h>
33 #include <asm/edac.h>
34 #include "edac_core.h"
35 #include "edac_module.h"
36
37 #define CREATE_TRACE_POINTS
38 #define TRACE_INCLUDE_PATH ../../include/ras
39 #include <ras/ras_event.h>
40
41 /* lock to memory controller's control array */
42 static DEFINE_MUTEX(mem_ctls_mutex);
43 static LIST_HEAD(mc_devices);
44
45 /*
46  * Used to lock EDAC MC to just one module, avoiding two drivers e. g.
47  *      apei/ghes and i7core_edac to be used at the same time.
48  */
49 static void const *edac_mc_owner;
50
51 unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf,
52                                  unsigned len)
53 {
54         struct mem_ctl_info *mci = dimm->mci;
55         int i, n, count = 0;
56         char *p = buf;
57
58         for (i = 0; i < mci->n_layers; i++) {
59                 n = snprintf(p, len, "%s %d ",
60                               edac_layer_name[mci->layers[i].type],
61                               dimm->location[i]);
62                 p += n;
63                 len -= n;
64                 count += n;
65                 if (!len)
66                         break;
67         }
68
69         return count;
70 }
71
72 #ifdef CONFIG_EDAC_DEBUG
73
74 static void edac_mc_dump_channel(struct rank_info *chan)
75 {
76         edac_dbg(4, "  channel->chan_idx = %d\n", chan->chan_idx);
77         edac_dbg(4, "    channel = %p\n", chan);
78         edac_dbg(4, "    channel->csrow = %p\n", chan->csrow);
79         edac_dbg(4, "    channel->dimm = %p\n", chan->dimm);
80 }
81
82 static void edac_mc_dump_dimm(struct dimm_info *dimm, int number)
83 {
84         char location[80];
85
86         edac_dimm_info_location(dimm, location, sizeof(location));
87
88         edac_dbg(4, "%s%i: %smapped as virtual row %d, chan %d\n",
89                  dimm->mci->csbased ? "rank" : "dimm",
90                  number, location, dimm->csrow, dimm->cschannel);
91         edac_dbg(4, "  dimm = %p\n", dimm);
92         edac_dbg(4, "  dimm->label = '%s'\n", dimm->label);
93         edac_dbg(4, "  dimm->nr_pages = 0x%x\n", dimm->nr_pages);
94         edac_dbg(4, "  dimm->grain = %d\n", dimm->grain);
95         edac_dbg(4, "  dimm->nr_pages = 0x%x\n", dimm->nr_pages);
96 }
97
98 static void edac_mc_dump_csrow(struct csrow_info *csrow)
99 {
100         edac_dbg(4, "csrow->csrow_idx = %d\n", csrow->csrow_idx);
101         edac_dbg(4, "  csrow = %p\n", csrow);
102         edac_dbg(4, "  csrow->first_page = 0x%lx\n", csrow->first_page);
103         edac_dbg(4, "  csrow->last_page = 0x%lx\n", csrow->last_page);
104         edac_dbg(4, "  csrow->page_mask = 0x%lx\n", csrow->page_mask);
105         edac_dbg(4, "  csrow->nr_channels = %d\n", csrow->nr_channels);
106         edac_dbg(4, "  csrow->channels = %p\n", csrow->channels);
107         edac_dbg(4, "  csrow->mci = %p\n", csrow->mci);
108 }
109
110 static void edac_mc_dump_mci(struct mem_ctl_info *mci)
111 {
112         edac_dbg(3, "\tmci = %p\n", mci);
113         edac_dbg(3, "\tmci->mtype_cap = %lx\n", mci->mtype_cap);
114         edac_dbg(3, "\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
115         edac_dbg(3, "\tmci->edac_cap = %lx\n", mci->edac_cap);
116         edac_dbg(4, "\tmci->edac_check = %p\n", mci->edac_check);
117         edac_dbg(3, "\tmci->nr_csrows = %d, csrows = %p\n",
118                  mci->nr_csrows, mci->csrows);
119         edac_dbg(3, "\tmci->nr_dimms = %d, dimms = %p\n",
120                  mci->tot_dimms, mci->dimms);
121         edac_dbg(3, "\tdev = %p\n", mci->pdev);
122         edac_dbg(3, "\tmod_name:ctl_name = %s:%s\n",
123                  mci->mod_name, mci->ctl_name);
124         edac_dbg(3, "\tpvt_info = %p\n\n", mci->pvt_info);
125 }
126
127 #endif                          /* CONFIG_EDAC_DEBUG */
128
129 /*
130  * keep those in sync with the enum mem_type
131  */
132 const char *edac_mem_types[] = {
133         "Empty csrow",
134         "Reserved csrow type",
135         "Unknown csrow type",
136         "Fast page mode RAM",
137         "Extended data out RAM",
138         "Burst Extended data out RAM",
139         "Single data rate SDRAM",
140         "Registered single data rate SDRAM",
141         "Double data rate SDRAM",
142         "Registered Double data rate SDRAM",
143         "Rambus DRAM",
144         "Unbuffered DDR2 RAM",
145         "Fully buffered DDR2",
146         "Registered DDR2 RAM",
147         "Rambus XDR",
148         "Unbuffered DDR3 RAM",
149         "Registered DDR3 RAM",
150 };
151 EXPORT_SYMBOL_GPL(edac_mem_types);
152
153 /**
154  * edac_align_ptr - Prepares the pointer offsets for a single-shot allocation
155  * @p:          pointer to a pointer with the memory offset to be used. At
156  *              return, this will be incremented to point to the next offset
157  * @size:       Size of the data structure to be reserved
158  * @n_elems:    Number of elements that should be reserved
159  *
160  * If 'size' is a constant, the compiler will optimize this whole function
161  * down to either a no-op or the addition of a constant to the value of '*p'.
162  *
163  * The 'p' pointer is absolutely needed to keep the proper advancing
164  * further in memory to the proper offsets when allocating the struct along
165  * with its embedded structs, as edac_device_alloc_ctl_info() does it
166  * above, for example.
167  *
168  * At return, the pointer 'p' will be incremented to be used on a next call
169  * to this function.
170  */
171 void *edac_align_ptr(void **p, unsigned size, int n_elems)
172 {
173         unsigned align, r;
174         void *ptr = *p;
175
176         *p += size * n_elems;
177
178         /*
179          * 'p' can possibly be an unaligned item X such that sizeof(X) is
180          * 'size'.  Adjust 'p' so that its alignment is at least as
181          * stringent as what the compiler would provide for X and return
182          * the aligned result.
183          * Here we assume that the alignment of a "long long" is the most
184          * stringent alignment that the compiler will ever provide by default.
185          * As far as I know, this is a reasonable assumption.
186          */
187         if (size > sizeof(long))
188                 align = sizeof(long long);
189         else if (size > sizeof(int))
190                 align = sizeof(long);
191         else if (size > sizeof(short))
192                 align = sizeof(int);
193         else if (size > sizeof(char))
194                 align = sizeof(short);
195         else
196                 return (char *)ptr;
197
198         r = (unsigned long)p % align;
199
200         if (r == 0)
201                 return (char *)ptr;
202
203         *p += align - r;
204
205         return (void *)(((unsigned long)ptr) + align - r);
206 }
207
208 static void _edac_mc_free(struct mem_ctl_info *mci)
209 {
210         int i, chn, row;
211         struct csrow_info *csr;
212         const unsigned int tot_dimms = mci->tot_dimms;
213         const unsigned int tot_channels = mci->num_cschannel;
214         const unsigned int tot_csrows = mci->nr_csrows;
215
216         if (mci->dimms) {
217                 for (i = 0; i < tot_dimms; i++)
218                         kfree(mci->dimms[i]);
219                 kfree(mci->dimms);
220         }
221         if (mci->csrows) {
222                 for (row = 0; row < tot_csrows; row++) {
223                         csr = mci->csrows[row];
224                         if (csr) {
225                                 if (csr->channels) {
226                                         for (chn = 0; chn < tot_channels; chn++)
227                                                 kfree(csr->channels[chn]);
228                                         kfree(csr->channels);
229                                 }
230                                 kfree(csr);
231                         }
232                 }
233                 kfree(mci->csrows);
234         }
235         kfree(mci);
236 }
237
238 /**
239  * edac_mc_alloc: Allocate and partially fill a struct mem_ctl_info structure
240  * @mc_num:             Memory controller number
241  * @n_layers:           Number of MC hierarchy layers
242  * layers:              Describes each layer as seen by the Memory Controller
243  * @size_pvt:           size of private storage needed
244  *
245  *
246  * Everything is kmalloc'ed as one big chunk - more efficient.
247  * Only can be used if all structures have the same lifetime - otherwise
248  * you have to allocate and initialize your own structures.
249  *
250  * Use edac_mc_free() to free mc structures allocated by this function.
251  *
252  * NOTE: drivers handle multi-rank memories in different ways: in some
253  * drivers, one multi-rank memory stick is mapped as one entry, while, in
254  * others, a single multi-rank memory stick would be mapped into several
255  * entries. Currently, this function will allocate multiple struct dimm_info
256  * on such scenarios, as grouping the multiple ranks require drivers change.
257  *
258  * Returns:
259  *      On failure: NULL
260  *      On success: struct mem_ctl_info pointer
261  */
262 struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
263                                    unsigned n_layers,
264                                    struct edac_mc_layer *layers,
265                                    unsigned sz_pvt)
266 {
267         struct mem_ctl_info *mci;
268         struct edac_mc_layer *layer;
269         struct csrow_info *csr;
270         struct rank_info *chan;
271         struct dimm_info *dimm;
272         u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
273         unsigned pos[EDAC_MAX_LAYERS];
274         unsigned size, tot_dimms = 1, count = 1;
275         unsigned tot_csrows = 1, tot_channels = 1, tot_errcount = 0;
276         void *pvt, *p, *ptr = NULL;
277         int i, j, row, chn, n, len, off;
278         bool per_rank = false;
279
280         BUG_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0);
281         /*
282          * Calculate the total amount of dimms and csrows/cschannels while
283          * in the old API emulation mode
284          */
285         for (i = 0; i < n_layers; i++) {
286                 tot_dimms *= layers[i].size;
287                 if (layers[i].is_virt_csrow)
288                         tot_csrows *= layers[i].size;
289                 else
290                         tot_channels *= layers[i].size;
291
292                 if (layers[i].type == EDAC_MC_LAYER_CHIP_SELECT)
293                         per_rank = true;
294         }
295
296         /* Figure out the offsets of the various items from the start of an mc
297          * structure.  We want the alignment of each item to be at least as
298          * stringent as what the compiler would provide if we could simply
299          * hardcode everything into a single struct.
300          */
301         mci = edac_align_ptr(&ptr, sizeof(*mci), 1);
302         layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers);
303         for (i = 0; i < n_layers; i++) {
304                 count *= layers[i].size;
305                 edac_dbg(4, "errcount layer %d size %d\n", i, count);
306                 ce_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
307                 ue_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
308                 tot_errcount += 2 * count;
309         }
310
311         edac_dbg(4, "allocating %d error counters\n", tot_errcount);
312         pvt = edac_align_ptr(&ptr, sz_pvt, 1);
313         size = ((unsigned long)pvt) + sz_pvt;
314
315         edac_dbg(1, "allocating %u bytes for mci data (%d %s, %d csrows/channels)\n",
316                  size,
317                  tot_dimms,
318                  per_rank ? "ranks" : "dimms",
319                  tot_csrows * tot_channels);
320
321         mci = kzalloc(size, GFP_KERNEL);
322         if (mci == NULL)
323                 return NULL;
324
325         /* Adjust pointers so they point within the memory we just allocated
326          * rather than an imaginary chunk of memory located at address 0.
327          */
328         layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer));
329         for (i = 0; i < n_layers; i++) {
330                 mci->ce_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ce_per_layer[i]));
331                 mci->ue_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ue_per_layer[i]));
332         }
333         pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
334
335         /* setup index and various internal pointers */
336         mci->mc_idx = mc_num;
337         mci->tot_dimms = tot_dimms;
338         mci->pvt_info = pvt;
339         mci->n_layers = n_layers;
340         mci->layers = layer;
341         memcpy(mci->layers, layers, sizeof(*layer) * n_layers);
342         mci->nr_csrows = tot_csrows;
343         mci->num_cschannel = tot_channels;
344         mci->csbased = per_rank;
345
346         /*
347          * Alocate and fill the csrow/channels structs
348          */
349         mci->csrows = kcalloc(tot_csrows, sizeof(*mci->csrows), GFP_KERNEL);
350         if (!mci->csrows)
351                 goto error;
352         for (row = 0; row < tot_csrows; row++) {
353                 csr = kzalloc(sizeof(**mci->csrows), GFP_KERNEL);
354                 if (!csr)
355                         goto error;
356                 mci->csrows[row] = csr;
357                 csr->csrow_idx = row;
358                 csr->mci = mci;
359                 csr->nr_channels = tot_channels;
360                 csr->channels = kcalloc(tot_channels, sizeof(*csr->channels),
361                                         GFP_KERNEL);
362                 if (!csr->channels)
363                         goto error;
364
365                 for (chn = 0; chn < tot_channels; chn++) {
366                         chan = kzalloc(sizeof(**csr->channels), GFP_KERNEL);
367                         if (!chan)
368                                 goto error;
369                         csr->channels[chn] = chan;
370                         chan->chan_idx = chn;
371                         chan->csrow = csr;
372                 }
373         }
374
375         /*
376          * Allocate and fill the dimm structs
377          */
378         mci->dimms  = kcalloc(tot_dimms, sizeof(*mci->dimms), GFP_KERNEL);
379         if (!mci->dimms)
380                 goto error;
381
382         memset(&pos, 0, sizeof(pos));
383         row = 0;
384         chn = 0;
385         for (i = 0; i < tot_dimms; i++) {
386                 chan = mci->csrows[row]->channels[chn];
387                 off = EDAC_DIMM_OFF(layer, n_layers, pos[0], pos[1], pos[2]);
388                 if (off < 0 || off >= tot_dimms) {
389                         edac_mc_printk(mci, KERN_ERR, "EDAC core bug: EDAC_DIMM_OFF is trying to do an illegal data access\n");
390                         goto error;
391                 }
392
393                 dimm = kzalloc(sizeof(**mci->dimms), GFP_KERNEL);
394                 if (!dimm)
395                         goto error;
396                 mci->dimms[off] = dimm;
397                 dimm->mci = mci;
398
399                 /*
400                  * Copy DIMM location and initialize it.
401                  */
402                 len = sizeof(dimm->label);
403                 p = dimm->label;
404                 n = snprintf(p, len, "mc#%u", mc_num);
405                 p += n;
406                 len -= n;
407                 for (j = 0; j < n_layers; j++) {
408                         n = snprintf(p, len, "%s#%u",
409                                      edac_layer_name[layers[j].type],
410                                      pos[j]);
411                         p += n;
412                         len -= n;
413                         dimm->location[j] = pos[j];
414
415                         if (len <= 0)
416                                 break;
417                 }
418
419                 /* Link it to the csrows old API data */
420                 chan->dimm = dimm;
421                 dimm->csrow = row;
422                 dimm->cschannel = chn;
423
424                 /* Increment csrow location */
425                 if (layers[0].is_virt_csrow) {
426                         chn++;
427                         if (chn == tot_channels) {
428                                 chn = 0;
429                                 row++;
430                         }
431                 } else {
432                         row++;
433                         if (row == tot_csrows) {
434                                 row = 0;
435                                 chn++;
436                         }
437                 }
438
439                 /* Increment dimm location */
440                 for (j = n_layers - 1; j >= 0; j--) {
441                         pos[j]++;
442                         if (pos[j] < layers[j].size)
443                                 break;
444                         pos[j] = 0;
445                 }
446         }
447
448         mci->op_state = OP_ALLOC;
449
450         return mci;
451
452 error:
453         _edac_mc_free(mci);
454
455         return NULL;
456 }
457 EXPORT_SYMBOL_GPL(edac_mc_alloc);
458
459 /**
460  * edac_mc_free
461  *      'Free' a previously allocated 'mci' structure
462  * @mci: pointer to a struct mem_ctl_info structure
463  */
464 void edac_mc_free(struct mem_ctl_info *mci)
465 {
466         edac_dbg(1, "\n");
467
468         /* If we're not yet registered with sysfs free only what was allocated
469          * in edac_mc_alloc().
470          */
471         if (!device_is_registered(&mci->dev)) {
472                 _edac_mc_free(mci);
473                 return;
474         }
475
476         /* the mci instance is freed here, when the sysfs object is dropped */
477         edac_unregister_sysfs(mci);
478 }
479 EXPORT_SYMBOL_GPL(edac_mc_free);
480
481
482 /**
483  * find_mci_by_dev
484  *
485  *      scan list of controllers looking for the one that manages
486  *      the 'dev' device
487  * @dev: pointer to a struct device related with the MCI
488  */
489 struct mem_ctl_info *find_mci_by_dev(struct device *dev)
490 {
491         struct mem_ctl_info *mci;
492         struct list_head *item;
493
494         edac_dbg(3, "\n");
495
496         list_for_each(item, &mc_devices) {
497                 mci = list_entry(item, struct mem_ctl_info, link);
498
499                 if (mci->pdev == dev)
500                         return mci;
501         }
502
503         return NULL;
504 }
505 EXPORT_SYMBOL_GPL(find_mci_by_dev);
506
507 /*
508  * handler for EDAC to check if NMI type handler has asserted interrupt
509  */
510 static int edac_mc_assert_error_check_and_clear(void)
511 {
512         int old_state;
513
514         if (edac_op_state == EDAC_OPSTATE_POLL)
515                 return 1;
516
517         old_state = edac_err_assert;
518         edac_err_assert = 0;
519
520         return old_state;
521 }
522
523 /*
524  * edac_mc_workq_function
525  *      performs the operation scheduled by a workq request
526  */
527 static void edac_mc_workq_function(struct work_struct *work_req)
528 {
529         struct delayed_work *d_work = to_delayed_work(work_req);
530         struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);
531
532         mutex_lock(&mem_ctls_mutex);
533
534         /* if this control struct has movd to offline state, we are done */
535         if (mci->op_state == OP_OFFLINE) {
536                 mutex_unlock(&mem_ctls_mutex);
537                 return;
538         }
539
540         /* Only poll controllers that are running polled and have a check */
541         if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
542                 mci->edac_check(mci);
543
544         mutex_unlock(&mem_ctls_mutex);
545
546         /* Reschedule */
547         queue_delayed_work(edac_workqueue, &mci->work,
548                         msecs_to_jiffies(edac_mc_get_poll_msec()));
549 }
550
551 /*
552  * edac_mc_workq_setup
553  *      initialize a workq item for this mci
554  *      passing in the new delay period in msec
555  *
556  *      locking model:
557  *
558  *              called with the mem_ctls_mutex held
559  */
560 static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
561 {
562         edac_dbg(0, "\n");
563
564         /* if this instance is not in the POLL state, then simply return */
565         if (mci->op_state != OP_RUNNING_POLL)
566                 return;
567
568         INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
569         mod_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
570 }
571
572 /*
573  * edac_mc_workq_teardown
574  *      stop the workq processing on this mci
575  *
576  *      locking model:
577  *
578  *              called WITHOUT lock held
579  */
580 static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
581 {
582         int status;
583
584         if (mci->op_state != OP_RUNNING_POLL)
585                 return;
586
587         status = cancel_delayed_work(&mci->work);
588         if (status == 0) {
589                 edac_dbg(0, "not canceled, flush the queue\n");
590
591                 /* workq instance might be running, wait for it */
592                 flush_workqueue(edac_workqueue);
593         }
594 }
595
596 /*
597  * edac_mc_reset_delay_period(unsigned long value)
598  *
599  *      user space has updated our poll period value, need to
600  *      reset our workq delays
601  */
602 void edac_mc_reset_delay_period(int value)
603 {
604         struct mem_ctl_info *mci;
605         struct list_head *item;
606
607         mutex_lock(&mem_ctls_mutex);
608
609         list_for_each(item, &mc_devices) {
610                 mci = list_entry(item, struct mem_ctl_info, link);
611
612                 edac_mc_workq_setup(mci, (unsigned long) value);
613         }
614
615         mutex_unlock(&mem_ctls_mutex);
616 }
617
618
619
620 /* Return 0 on success, 1 on failure.
621  * Before calling this function, caller must
622  * assign a unique value to mci->mc_idx.
623  *
624  *      locking model:
625  *
626  *              called with the mem_ctls_mutex lock held
627  */
628 static int add_mc_to_global_list(struct mem_ctl_info *mci)
629 {
630         struct list_head *item, *insert_before;
631         struct mem_ctl_info *p;
632
633         insert_before = &mc_devices;
634
635         p = find_mci_by_dev(mci->pdev);
636         if (unlikely(p != NULL))
637                 goto fail0;
638
639         list_for_each(item, &mc_devices) {
640                 p = list_entry(item, struct mem_ctl_info, link);
641
642                 if (p->mc_idx >= mci->mc_idx) {
643                         if (unlikely(p->mc_idx == mci->mc_idx))
644                                 goto fail1;
645
646                         insert_before = item;
647                         break;
648                 }
649         }
650
651         list_add_tail_rcu(&mci->link, insert_before);
652         atomic_inc(&edac_handlers);
653         return 0;
654
655 fail0:
656         edac_printk(KERN_WARNING, EDAC_MC,
657                 "%s (%s) %s %s already assigned %d\n", dev_name(p->pdev),
658                 edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
659         return 1;
660
661 fail1:
662         edac_printk(KERN_WARNING, EDAC_MC,
663                 "bug in low-level driver: attempt to assign\n"
664                 "    duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
665         return 1;
666 }
667
668 static int del_mc_from_global_list(struct mem_ctl_info *mci)
669 {
670         int handlers = atomic_dec_return(&edac_handlers);
671         list_del_rcu(&mci->link);
672
673         /* these are for safe removal of devices from global list while
674          * NMI handlers may be traversing list
675          */
676         synchronize_rcu();
677         INIT_LIST_HEAD(&mci->link);
678
679         return handlers;
680 }
681
682 /**
683  * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'.
684  *
685  * If found, return a pointer to the structure.
686  * Else return NULL.
687  *
688  * Caller must hold mem_ctls_mutex.
689  */
690 struct mem_ctl_info *edac_mc_find(int idx)
691 {
692         struct list_head *item;
693         struct mem_ctl_info *mci;
694
695         list_for_each(item, &mc_devices) {
696                 mci = list_entry(item, struct mem_ctl_info, link);
697
698                 if (mci->mc_idx >= idx) {
699                         if (mci->mc_idx == idx)
700                                 return mci;
701
702                         break;
703                 }
704         }
705
706         return NULL;
707 }
708 EXPORT_SYMBOL(edac_mc_find);
709
710 /**
711  * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
712  *                 create sysfs entries associated with mci structure
713  * @mci: pointer to the mci structure to be added to the list
714  *
715  * Return:
716  *      0       Success
717  *      !0      Failure
718  */
719
720 /* FIXME - should a warning be printed if no error detection? correction? */
721 int edac_mc_add_mc(struct mem_ctl_info *mci)
722 {
723         int ret = -EINVAL;
724         edac_dbg(0, "\n");
725
726 #ifdef CONFIG_EDAC_DEBUG
727         if (edac_debug_level >= 3)
728                 edac_mc_dump_mci(mci);
729
730         if (edac_debug_level >= 4) {
731                 int i;
732
733                 for (i = 0; i < mci->nr_csrows; i++) {
734                         struct csrow_info *csrow = mci->csrows[i];
735                         u32 nr_pages = 0;
736                         int j;
737
738                         for (j = 0; j < csrow->nr_channels; j++)
739                                 nr_pages += csrow->channels[j]->dimm->nr_pages;
740                         if (!nr_pages)
741                                 continue;
742                         edac_mc_dump_csrow(csrow);
743                         for (j = 0; j < csrow->nr_channels; j++)
744                                 if (csrow->channels[j]->dimm->nr_pages)
745                                         edac_mc_dump_channel(csrow->channels[j]);
746                 }
747                 for (i = 0; i < mci->tot_dimms; i++)
748                         if (mci->dimms[i]->nr_pages)
749                                 edac_mc_dump_dimm(mci->dimms[i], i);
750         }
751 #endif
752         mutex_lock(&mem_ctls_mutex);
753
754         if (edac_mc_owner && edac_mc_owner != mci->mod_name) {
755                 ret = -EPERM;
756                 goto fail0;
757         }
758
759         if (add_mc_to_global_list(mci))
760                 goto fail0;
761
762         /* set load time so that error rate can be tracked */
763         mci->start_time = jiffies;
764
765         if (edac_create_sysfs_mci_device(mci)) {
766                 edac_mc_printk(mci, KERN_WARNING,
767                         "failed to create sysfs device\n");
768                 goto fail1;
769         }
770
771         /* If there IS a check routine, then we are running POLLED */
772         if (mci->edac_check != NULL) {
773                 /* This instance is NOW RUNNING */
774                 mci->op_state = OP_RUNNING_POLL;
775
776                 edac_mc_workq_setup(mci, edac_mc_get_poll_msec());
777         } else {
778                 mci->op_state = OP_RUNNING_INTERRUPT;
779         }
780
781         /* Report action taken */
782         edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
783                 " DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci));
784
785         edac_mc_owner = mci->mod_name;
786
787         mutex_unlock(&mem_ctls_mutex);
788         return 0;
789
790 fail1:
791         del_mc_from_global_list(mci);
792
793 fail0:
794         mutex_unlock(&mem_ctls_mutex);
795         return ret;
796 }
797 EXPORT_SYMBOL_GPL(edac_mc_add_mc);
798
799 /**
800  * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
801  *                 remove mci structure from global list
802  * @pdev: Pointer to 'struct device' representing mci structure to remove.
803  *
804  * Return pointer to removed mci structure, or NULL if device not found.
805  */
806 struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
807 {
808         struct mem_ctl_info *mci;
809
810         edac_dbg(0, "\n");
811
812         mutex_lock(&mem_ctls_mutex);
813
814         /* find the requested mci struct in the global list */
815         mci = find_mci_by_dev(dev);
816         if (mci == NULL) {
817                 mutex_unlock(&mem_ctls_mutex);
818                 return NULL;
819         }
820
821         if (!del_mc_from_global_list(mci))
822                 edac_mc_owner = NULL;
823         mutex_unlock(&mem_ctls_mutex);
824
825         /* flush workq processes */
826         edac_mc_workq_teardown(mci);
827
828         /* marking MCI offline */
829         mci->op_state = OP_OFFLINE;
830
831         /* remove from sysfs */
832         edac_remove_sysfs_mci_device(mci);
833
834         edac_printk(KERN_INFO, EDAC_MC,
835                 "Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
836                 mci->mod_name, mci->ctl_name, edac_dev_name(mci));
837
838         return mci;
839 }
840 EXPORT_SYMBOL_GPL(edac_mc_del_mc);
841
842 static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
843                                 u32 size)
844 {
845         struct page *pg;
846         void *virt_addr;
847         unsigned long flags = 0;
848
849         edac_dbg(3, "\n");
850
851         /* ECC error page was not in our memory. Ignore it. */
852         if (!pfn_valid(page))
853                 return;
854
855         /* Find the actual page structure then map it and fix */
856         pg = pfn_to_page(page);
857
858         if (PageHighMem(pg))
859                 local_irq_save(flags);
860
861         virt_addr = kmap_atomic(pg);
862
863         /* Perform architecture specific atomic scrub operation */
864         atomic_scrub(virt_addr + offset, size);
865
866         /* Unmap and complete */
867         kunmap_atomic(virt_addr);
868
869         if (PageHighMem(pg))
870                 local_irq_restore(flags);
871 }
872
873 /* FIXME - should return -1 */
874 int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
875 {
876         struct csrow_info **csrows = mci->csrows;
877         int row, i, j, n;
878
879         edac_dbg(1, "MC%d: 0x%lx\n", mci->mc_idx, page);
880         row = -1;
881
882         for (i = 0; i < mci->nr_csrows; i++) {
883                 struct csrow_info *csrow = csrows[i];
884                 n = 0;
885                 for (j = 0; j < csrow->nr_channels; j++) {
886                         struct dimm_info *dimm = csrow->channels[j]->dimm;
887                         n += dimm->nr_pages;
888                 }
889                 if (n == 0)
890                         continue;
891
892                 edac_dbg(3, "MC%d: first(0x%lx) page(0x%lx) last(0x%lx) mask(0x%lx)\n",
893                          mci->mc_idx,
894                          csrow->first_page, page, csrow->last_page,
895                          csrow->page_mask);
896
897                 if ((page >= csrow->first_page) &&
898                     (page <= csrow->last_page) &&
899                     ((page & csrow->page_mask) ==
900                      (csrow->first_page & csrow->page_mask))) {
901                         row = i;
902                         break;
903                 }
904         }
905
906         if (row == -1)
907                 edac_mc_printk(mci, KERN_ERR,
908                         "could not look up page error address %lx\n",
909                         (unsigned long)page);
910
911         return row;
912 }
913 EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
914
915 const char *edac_layer_name[] = {
916         [EDAC_MC_LAYER_BRANCH] = "branch",
917         [EDAC_MC_LAYER_CHANNEL] = "channel",
918         [EDAC_MC_LAYER_SLOT] = "slot",
919         [EDAC_MC_LAYER_CHIP_SELECT] = "csrow",
920         [EDAC_MC_LAYER_ALL_MEM] = "memory",
921 };
922 EXPORT_SYMBOL_GPL(edac_layer_name);
923
924 static void edac_inc_ce_error(struct mem_ctl_info *mci,
925                               bool enable_per_layer_report,
926                               const int pos[EDAC_MAX_LAYERS],
927                               const u16 count)
928 {
929         int i, index = 0;
930
931         mci->ce_mc += count;
932
933         if (!enable_per_layer_report) {
934                 mci->ce_noinfo_count += count;
935                 return;
936         }
937
938         for (i = 0; i < mci->n_layers; i++) {
939                 if (pos[i] < 0)
940                         break;
941                 index += pos[i];
942                 mci->ce_per_layer[i][index] += count;
943
944                 if (i < mci->n_layers - 1)
945                         index *= mci->layers[i + 1].size;
946         }
947 }
948
949 static void edac_inc_ue_error(struct mem_ctl_info *mci,
950                                     bool enable_per_layer_report,
951                                     const int pos[EDAC_MAX_LAYERS],
952                                     const u16 count)
953 {
954         int i, index = 0;
955
956         mci->ue_mc += count;
957
958         if (!enable_per_layer_report) {
959                 mci->ce_noinfo_count += count;
960                 return;
961         }
962
963         for (i = 0; i < mci->n_layers; i++) {
964                 if (pos[i] < 0)
965                         break;
966                 index += pos[i];
967                 mci->ue_per_layer[i][index] += count;
968
969                 if (i < mci->n_layers - 1)
970                         index *= mci->layers[i + 1].size;
971         }
972 }
973
974 static void edac_ce_error(struct mem_ctl_info *mci,
975                           const u16 error_count,
976                           const int pos[EDAC_MAX_LAYERS],
977                           const char *msg,
978                           const char *location,
979                           const char *label,
980                           const char *detail,
981                           const char *other_detail,
982                           const bool enable_per_layer_report,
983                           const unsigned long page_frame_number,
984                           const unsigned long offset_in_page,
985                           long grain)
986 {
987         unsigned long remapped_page;
988         char *msg_aux = "";
989
990         if (*msg)
991                 msg_aux = " ";
992
993         if (edac_mc_get_log_ce()) {
994                 if (other_detail && *other_detail)
995                         edac_mc_printk(mci, KERN_WARNING,
996                                        "%d CE %s%son %s (%s %s - %s)\n",
997                                        error_count, msg, msg_aux, label,
998                                        location, detail, other_detail);
999                 else
1000                         edac_mc_printk(mci, KERN_WARNING,
1001                                        "%d CE %s%son %s (%s %s)\n",
1002                                        error_count, msg, msg_aux, label,
1003                                        location, detail);
1004         }
1005         edac_inc_ce_error(mci, enable_per_layer_report, pos, error_count);
1006
1007         if (mci->scrub_mode & SCRUB_SW_SRC) {
1008                 /*
1009                         * Some memory controllers (called MCs below) can remap
1010                         * memory so that it is still available at a different
1011                         * address when PCI devices map into memory.
1012                         * MC's that can't do this, lose the memory where PCI
1013                         * devices are mapped. This mapping is MC-dependent
1014                         * and so we call back into the MC driver for it to
1015                         * map the MC page to a physical (CPU) page which can
1016                         * then be mapped to a virtual page - which can then
1017                         * be scrubbed.
1018                         */
1019                 remapped_page = mci->ctl_page_to_phys ?
1020                         mci->ctl_page_to_phys(mci, page_frame_number) :
1021                         page_frame_number;
1022
1023                 edac_mc_scrub_block(remapped_page,
1024                                         offset_in_page, grain);
1025         }
1026 }
1027
1028 static void edac_ue_error(struct mem_ctl_info *mci,
1029                           const u16 error_count,
1030                           const int pos[EDAC_MAX_LAYERS],
1031                           const char *msg,
1032                           const char *location,
1033                           const char *label,
1034                           const char *detail,
1035                           const char *other_detail,
1036                           const bool enable_per_layer_report)
1037 {
1038         char *msg_aux = "";
1039
1040         if (*msg)
1041                 msg_aux = " ";
1042
1043         if (edac_mc_get_log_ue()) {
1044                 if (other_detail && *other_detail)
1045                         edac_mc_printk(mci, KERN_WARNING,
1046                                        "%d UE %s%son %s (%s %s - %s)\n",
1047                                        error_count, msg, msg_aux, label,
1048                                        location, detail, other_detail);
1049                 else
1050                         edac_mc_printk(mci, KERN_WARNING,
1051                                        "%d UE %s%son %s (%s %s)\n",
1052                                        error_count, msg, msg_aux, label,
1053                                        location, detail);
1054         }
1055
1056         if (edac_mc_get_panic_on_ue()) {
1057                 if (other_detail && *other_detail)
1058                         panic("UE %s%son %s (%s%s - %s)\n",
1059                               msg, msg_aux, label, location, detail, other_detail);
1060                 else
1061                         panic("UE %s%son %s (%s%s)\n",
1062                               msg, msg_aux, label, location, detail);
1063         }
1064
1065         edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count);
1066 }
1067
1068 /**
1069  * edac_raw_mc_handle_error - reports a memory event to userspace without doing
1070  *                            anything to discover the error location
1071  *
1072  * @type:               severity of the error (CE/UE/Fatal)
1073  * @mci:                a struct mem_ctl_info pointer
1074  * @e:                  error description
1075  *
1076  * This raw function is used internally by edac_mc_handle_error(). It should
1077  * only be called directly when the hardware error come directly from BIOS,
1078  * like in the case of APEI GHES driver.
1079  */
1080 void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type,
1081                               struct mem_ctl_info *mci,
1082                               struct edac_raw_error_desc *e)
1083 {
1084         char detail[80];
1085         int pos[EDAC_MAX_LAYERS] = { e->top_layer, e->mid_layer, e->low_layer };
1086
1087         /* Memory type dependent details about the error */
1088         if (type == HW_EVENT_ERR_CORRECTED) {
1089                 snprintf(detail, sizeof(detail),
1090                         "page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx",
1091                         e->page_frame_number, e->offset_in_page,
1092                         e->grain, e->syndrome);
1093                 edac_ce_error(mci, e->error_count, pos, e->msg, e->location, e->label,
1094                               detail, e->other_detail, e->enable_per_layer_report,
1095                               e->page_frame_number, e->offset_in_page, e->grain);
1096         } else {
1097                 snprintf(detail, sizeof(detail),
1098                         "page:0x%lx offset:0x%lx grain:%ld",
1099                         e->page_frame_number, e->offset_in_page, e->grain);
1100
1101                 edac_ue_error(mci, e->error_count, pos, e->msg, e->location, e->label,
1102                               detail, e->other_detail, e->enable_per_layer_report);
1103         }
1104
1105
1106 }
1107 EXPORT_SYMBOL_GPL(edac_raw_mc_handle_error);
1108
1109 /**
1110  * edac_mc_handle_error - reports a memory event to userspace
1111  *
1112  * @type:               severity of the error (CE/UE/Fatal)
1113  * @mci:                a struct mem_ctl_info pointer
1114  * @error_count:        Number of errors of the same type
1115  * @page_frame_number:  mem page where the error occurred
1116  * @offset_in_page:     offset of the error inside the page
1117  * @syndrome:           ECC syndrome
1118  * @top_layer:          Memory layer[0] position
1119  * @mid_layer:          Memory layer[1] position
1120  * @low_layer:          Memory layer[2] position
1121  * @msg:                Message meaningful to the end users that
1122  *                      explains the event
1123  * @other_detail:       Technical details about the event that
1124  *                      may help hardware manufacturers and
1125  *                      EDAC developers to analyse the event
1126  */
1127 void edac_mc_handle_error(const enum hw_event_mc_err_type type,
1128                           struct mem_ctl_info *mci,
1129                           const u16 error_count,
1130                           const unsigned long page_frame_number,
1131                           const unsigned long offset_in_page,
1132                           const unsigned long syndrome,
1133                           const int top_layer,
1134                           const int mid_layer,
1135                           const int low_layer,
1136                           const char *msg,
1137                           const char *other_detail)
1138 {
1139         char *p;
1140         int row = -1, chan = -1;
1141         int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer };
1142         int i, n_labels = 0;
1143         u8 grain_bits;
1144         struct edac_raw_error_desc *e = &mci->error_desc;
1145
1146         edac_dbg(3, "MC%d\n", mci->mc_idx);
1147
1148         /* Fills the error report buffer */
1149         memset(e, 0, sizeof (*e));
1150         e->error_count = error_count;
1151         e->top_layer = top_layer;
1152         e->mid_layer = mid_layer;
1153         e->low_layer = low_layer;
1154         e->page_frame_number = page_frame_number;
1155         e->offset_in_page = offset_in_page;
1156         e->syndrome = syndrome;
1157         e->msg = msg;
1158         e->other_detail = other_detail;
1159
1160         /*
1161          * Check if the event report is consistent and if the memory
1162          * location is known. If it is known, enable_per_layer_report will be
1163          * true, the DIMM(s) label info will be filled and the per-layer
1164          * error counters will be incremented.
1165          */
1166         for (i = 0; i < mci->n_layers; i++) {
1167                 if (pos[i] >= (int)mci->layers[i].size) {
1168
1169                         edac_mc_printk(mci, KERN_ERR,
1170                                        "INTERNAL ERROR: %s value is out of range (%d >= %d)\n",
1171                                        edac_layer_name[mci->layers[i].type],
1172                                        pos[i], mci->layers[i].size);
1173                         /*
1174                          * Instead of just returning it, let's use what's
1175                          * known about the error. The increment routines and
1176                          * the DIMM filter logic will do the right thing by
1177                          * pointing the likely damaged DIMMs.
1178                          */
1179                         pos[i] = -1;
1180                 }
1181                 if (pos[i] >= 0)
1182                         e->enable_per_layer_report = true;
1183         }
1184
1185         /*
1186          * Get the dimm label/grain that applies to the match criteria.
1187          * As the error algorithm may not be able to point to just one memory
1188          * stick, the logic here will get all possible labels that could
1189          * pottentially be affected by the error.
1190          * On FB-DIMM memory controllers, for uncorrected errors, it is common
1191          * to have only the MC channel and the MC dimm (also called "branch")
1192          * but the channel is not known, as the memory is arranged in pairs,
1193          * where each memory belongs to a separate channel within the same
1194          * branch.
1195          */
1196         p = e->label;
1197         *p = '\0';
1198
1199         for (i = 0; i < mci->tot_dimms; i++) {
1200                 struct dimm_info *dimm = mci->dimms[i];
1201
1202                 if (top_layer >= 0 && top_layer != dimm->location[0])
1203                         continue;
1204                 if (mid_layer >= 0 && mid_layer != dimm->location[1])
1205                         continue;
1206                 if (low_layer >= 0 && low_layer != dimm->location[2])
1207                         continue;
1208
1209                 /* get the max grain, over the error match range */
1210                 if (dimm->grain > e->grain)
1211                         e->grain = dimm->grain;
1212
1213                 /*
1214                  * If the error is memory-controller wide, there's no need to
1215                  * seek for the affected DIMMs because the whole
1216                  * channel/memory controller/...  may be affected.
1217                  * Also, don't show errors for empty DIMM slots.
1218                  */
1219                 if (e->enable_per_layer_report && dimm->nr_pages) {
1220                         if (n_labels >= EDAC_MAX_LABELS) {
1221                                 e->enable_per_layer_report = false;
1222                                 break;
1223                         }
1224                         n_labels++;
1225                         if (p != e->label) {
1226                                 strcpy(p, OTHER_LABEL);
1227                                 p += strlen(OTHER_LABEL);
1228                         }
1229                         strcpy(p, dimm->label);
1230                         p += strlen(p);
1231                         *p = '\0';
1232
1233                         /*
1234                          * get csrow/channel of the DIMM, in order to allow
1235                          * incrementing the compat API counters
1236                          */
1237                         edac_dbg(4, "%s csrows map: (%d,%d)\n",
1238                                  mci->csbased ? "rank" : "dimm",
1239                                  dimm->csrow, dimm->cschannel);
1240                         if (row == -1)
1241                                 row = dimm->csrow;
1242                         else if (row >= 0 && row != dimm->csrow)
1243                                 row = -2;
1244
1245                         if (chan == -1)
1246                                 chan = dimm->cschannel;
1247                         else if (chan >= 0 && chan != dimm->cschannel)
1248                                 chan = -2;
1249                 }
1250         }
1251
1252         if (!e->enable_per_layer_report) {
1253                 strcpy(e->label, "any memory");
1254         } else {
1255                 edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan);
1256                 if (p == e->label)
1257                         strcpy(e->label, "unknown memory");
1258                 if (type == HW_EVENT_ERR_CORRECTED) {
1259                         if (row >= 0) {
1260                                 mci->csrows[row]->ce_count += error_count;
1261                                 if (chan >= 0)
1262                                         mci->csrows[row]->channels[chan]->ce_count += error_count;
1263                         }
1264                 } else
1265                         if (row >= 0)
1266                                 mci->csrows[row]->ue_count += error_count;
1267         }
1268
1269         /* Fill the RAM location data */
1270         p = e->location;
1271
1272         for (i = 0; i < mci->n_layers; i++) {
1273                 if (pos[i] < 0)
1274                         continue;
1275
1276                 p += sprintf(p, "%s:%d ",
1277                              edac_layer_name[mci->layers[i].type],
1278                              pos[i]);
1279         }
1280         if (p > e->location)
1281                 *(p - 1) = '\0';
1282
1283         /* Report the error via the trace interface */
1284         grain_bits = fls_long(e->grain) + 1;
1285         trace_mc_event(type, e->msg, e->label, e->error_count,
1286                        mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer,
1287                        PAGES_TO_MiB(e->page_frame_number) | e->offset_in_page,
1288                        grain_bits, e->syndrome, e->other_detail);
1289
1290         edac_raw_mc_handle_error(type, mci, e);
1291 }
1292 EXPORT_SYMBOL_GPL(edac_mc_handle_error);