drivers/edac/edac_mc.c

   1 /*
   2  * edac_mc kernel module
   3  * (C) 2005, 2006 Linux Networx (http://lnxi.com)
   4  * This file may be distributed under the terms of the
   5  * GNU General Public License.
   6  *
   7  * Written by Thayne Harbaugh
   8  * Based on work by Dan Hollis <goemon at anime dot net> and others.
   9  *      http://www.anime.net/~goemon/linux-ecc/
  10  *
  11  * Modified by Dave Peterson and Doug Thompson
  12  *
  13  */
  14
  15 #include <linux/module.h>
  16 #include <linux/proc_fs.h>
  17 #include <linux/kernel.h>
  18 #include <linux/types.h>
  19 #include <linux/smp.h>
  20 #include <linux/init.h>
  21 #include <linux/sysctl.h>
  22 #include <linux/highmem.h>
  23 #include <linux/timer.h>
  24 #include <linux/slab.h>
  25 #include <linux/jiffies.h>
  26 #include <linux/spinlock.h>
  27 #include <linux/list.h>
  28 #include <linux/ctype.h>
  29 #include <linux/edac.h>
  30 #include <asm/uaccess.h>
  31 #include <asm/page.h>
  32 #include <asm/edac.h>
  33 #include "edac_core.h"
  34 #include "edac_module.h"
  35
  36 /* lock to memory controller's control array */
  37 static DEFINE_MUTEX(mem_ctls_mutex);
  38 static LIST_HEAD(mc_devices);
  39
  40 #ifdef CONFIG_EDAC_DEBUG
  41
  42 static void edac_mc_dump_channel(struct rank_info *chan)
  43 {
  44         debugf4("\tchannel = %p\n", chan);
  45         debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx);
  46         debugf4("\tchannel->csrow = %p\n\n", chan->csrow);
  47         debugf4("\tchannel->dimm = %p\n", chan->dimm);
  48 }
  49
  50 static void edac_mc_dump_dimm(struct dimm_info *dimm)
  51 {
  52         int i;
  53
  54         debugf4("\tdimm = %p\n", dimm);
  55         debugf4("\tdimm->label = '%s'\n", dimm->label);
  56         debugf4("\tdimm->nr_pages = 0x%x\n", dimm->nr_pages);
  57         debugf4("\tdimm location ");
  58         for (i = 0; i < dimm->mci->n_layers; i++) {
  59                 printk(KERN_CONT "%d", dimm->location[i]);
  60                 if (i < dimm->mci->n_layers - 1)
  61                         printk(KERN_CONT ".");
  62         }
  63         printk(KERN_CONT "\n");
  64         debugf4("\tdimm->grain = %d\n", dimm->grain);
  65         debugf4("\tdimm->nr_pages = 0x%x\n", dimm->nr_pages);
  66 }
  67
  68 static void edac_mc_dump_csrow(struct csrow_info *csrow)
  69 {
  70         debugf4("\tcsrow = %p\n", csrow);
  71         debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx);
  72         debugf4("\tcsrow->first_page = 0x%lx\n", csrow->first_page);
  73         debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page);
  74         debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask);
  75         debugf4("\tcsrow->nr_channels = %d\n", csrow->nr_channels);
  76         debugf4("\tcsrow->channels = %p\n", csrow->channels);
  77         debugf4("\tcsrow->mci = %p\n\n", csrow->mci);
  78 }
  79
  80 static void edac_mc_dump_mci(struct mem_ctl_info *mci)
  81 {
  82         debugf3("\tmci = %p\n", mci);
  83         debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap);
  84         debugf3("\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
  85         debugf3("\tmci->edac_cap = %lx\n", mci->edac_cap);
  86         debugf4("\tmci->edac_check = %p\n", mci->edac_check);
  87         debugf3("\tmci->nr_csrows = %d, csrows = %p\n",
  88                 mci->nr_csrows, mci->csrows);
  89         debugf3("\tmci->nr_dimms = %d, dimms = %p\n",
  90                 mci->tot_dimms, mci->dimms);
  91         debugf3("\tdev = %p\n", mci->dev);
  92         debugf3("\tmod_name:ctl_name = %s:%s\n", mci->mod_name, mci->ctl_name);
  93         debugf3("\tpvt_info = %p\n\n", mci->pvt_info);
  94 }
  95
  96 #endif                          /* CONFIG_EDAC_DEBUG */
  97
  98 /*
  99  * keep those in sync with the enum mem_type
 100  */
 101 const char *edac_mem_types[] = {
 102         "Empty csrow",
 103         "Reserved csrow type",
 104         "Unknown csrow type",
 105         "Fast page mode RAM",
 106         "Extended data out RAM",
 107         "Burst Extended data out RAM",
 108         "Single data rate SDRAM",
 109         "Registered single data rate SDRAM",
 110         "Double data rate SDRAM",
 111         "Registered Double data rate SDRAM",
 112         "Rambus DRAM",
 113         "Unbuffered DDR2 RAM",
 114         "Fully buffered DDR2",
 115         "Registered DDR2 RAM",
 116         "Rambus XDR",
 117         "Unbuffered DDR3 RAM",
 118         "Registered DDR3 RAM",
 119 };
 120 EXPORT_SYMBOL_GPL(edac_mem_types);
 121
 122 /**
 123  * edac_align_ptr - Prepares the pointer offsets for a single-shot allocation
 124  * @p:          pointer to a pointer with the memory offset to be used. At
 125  *              return, this will be incremented to point to the next offset
 126  * @size:       Size of the data structure to be reserved
 127  * @n_elems:    Number of elements that should be reserved
 128  *
 129  * If 'size' is a constant, the compiler will optimize this whole function
 130  * down to either a no-op or the addition of a constant to the value of '*p'.
 131  *
 132  * The 'p' pointer is absolutely needed to keep the proper advancing
 133  * further in memory to the proper offsets when allocating the struct along
 134  * with its embedded structs, as edac_device_alloc_ctl_info() does it
 135  * above, for example.
 136  *
 137  * At return, the pointer 'p' will be incremented to be used on a next call
 138  * to this function.
 139  */
 140 void *edac_align_ptr(void **p, unsigned size, int n_elems)
 141 {
 142         unsigned align, r;
 143         void *ptr = *p;
 144
 145         *p += size * n_elems;
 146
 147         /*
 148          * 'p' can possibly be an unaligned item X such that sizeof(X) is
 149          * 'size'.  Adjust 'p' so that its alignment is at least as
 150          * stringent as what the compiler would provide for X and return
 151          * the aligned result.
 152          * Here we assume that the alignment of a "long long" is the most
 153          * stringent alignment that the compiler will ever provide by default.
 154          * As far as I know, this is a reasonable assumption.
 155          */
 156         if (size > sizeof(long))
 157                 align = sizeof(long long);
 158         else if (size > sizeof(int))
 159                 align = sizeof(long);
 160         else if (size > sizeof(short))
 161                 align = sizeof(int);
 162         else if (size > sizeof(char))
 163                 align = sizeof(short);
 164         else
 165                 return (char *)ptr;
 166
 167         r = size % align;
 168
 169         if (r == 0)
 170                 return (char *)ptr;
 171
 172         *p += align - r;
 173
 174         return (void *)(((unsigned long)ptr) + align - r);
 175 }
 176
 177 /**
 178  * edac_mc_alloc: Allocate and partially fill a struct mem_ctl_info structure
 179  * @mc_num:             Memory controller number
 180  * @n_layers:           Number of MC hierarchy layers
 181  * layers:              Describes each layer as seen by the Memory Controller
 182  * @size_pvt:           size of private storage needed
 183  *
 184  *
 185  * Everything is kmalloc'ed as one big chunk - more efficient.
 186  * Only can be used if all structures have the same lifetime - otherwise
 187  * you have to allocate and initialize your own structures.
 188  *
 189  * Use edac_mc_free() to free mc structures allocated by this function.
 190  *
 191  * NOTE: drivers handle multi-rank memories in different ways: in some
 192  * drivers, one multi-rank memory stick is mapped as one entry, while, in
 193  * others, a single multi-rank memory stick would be mapped into several
 194  * entries. Currently, this function will allocate multiple struct dimm_info
 195  * on such scenarios, as grouping the multiple ranks require drivers change.
 196  *
 197  * Returns:
 198  *      On failure: NULL
 199  *      On success: struct mem_ctl_info pointer
 200  */
 201 struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
 202                                    unsigned n_layers,
 203                                    struct edac_mc_layer *layers,
 204                                    unsigned sz_pvt)
 205 {
 206         struct mem_ctl_info *mci;
 207         struct edac_mc_layer *layer;
 208         struct csrow_info *csi, *csr;
 209         struct rank_info *chi, *chp, *chan;
 210         struct dimm_info *dimm;
 211         u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
 212         unsigned pos[EDAC_MAX_LAYERS];
 213         void *pvt, *ptr = NULL;
 214         unsigned size, tot_dimms = 1, count = 1;
 215         unsigned tot_csrows = 1, tot_channels = 1, tot_errcount = 0;
 216         int i, j, err, row, chn;
 217         bool per_rank = false;
 218
 219         BUG_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0);
 220         /*
 221          * Calculate the total amount of dimms and csrows/cschannels while
 222          * in the old API emulation mode
 223          */
 224         for (i = 0; i < n_layers; i++) {
 225                 tot_dimms *= layers[i].size;
 226                 if (layers[i].is_virt_csrow)
 227                         tot_csrows *= layers[i].size;
 228                 else
 229                         tot_channels *= layers[i].size;
 230
 231                 if (layers[i].type == EDAC_MC_LAYER_CHIP_SELECT)
 232                         per_rank = true;
 233         }
 234
 235         /* Figure out the offsets of the various items from the start of an mc
 236          * structure.  We want the alignment of each item to be at least as
 237          * stringent as what the compiler would provide if we could simply
 238          * hardcode everything into a single struct.
 239          */
 240         mci = edac_align_ptr(&ptr, sizeof(*mci), 1);
 241         layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers);
 242         csi = edac_align_ptr(&ptr, sizeof(*csi), tot_csrows);
 243         chi = edac_align_ptr(&ptr, sizeof(*chi), tot_csrows * tot_channels);
 244         dimm = edac_align_ptr(&ptr, sizeof(*dimm), tot_dimms);
 245         for (i = 0; i < n_layers; i++) {
 246                 count *= layers[i].size;
 247                 debugf4("%s: errcount layer %d size %d\n", __func__, i, count);
 248                 ce_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
 249                 ue_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
 250                 tot_errcount += 2 * count;
 251         }
 252
 253         debugf4("%s: allocating %d error counters\n", __func__, tot_errcount);
 254         pvt = edac_align_ptr(&ptr, sz_pvt, 1);
 255         size = ((unsigned long)pvt) + sz_pvt;
 256
 257         debugf1("%s(): allocating %u bytes for mci data (%d %s, %d csrows/channels)\n",
 258                 __func__, size,
 259                 tot_dimms,
 260                 per_rank ? "ranks" : "dimms",
 261                 tot_csrows * tot_channels);
 262         mci = kzalloc(size, GFP_KERNEL);
 263         if (mci == NULL)
 264                 return NULL;
 265
 266         /* Adjust pointers so they point within the memory we just allocated
 267          * rather than an imaginary chunk of memory located at address 0.
 268          */
 269         layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer));
 270         csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi));
 271         chi = (struct rank_info *)(((char *)mci) + ((unsigned long)chi));
 272         dimm = (struct dimm_info *)(((char *)mci) + ((unsigned long)dimm));
 273         for (i = 0; i < n_layers; i++) {
 274                 mci->ce_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ce_per_layer[i]));
 275                 mci->ue_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ue_per_layer[i]));
 276         }
 277         pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
 278
 279         /* setup index and various internal pointers */
 280         mci->mc_idx = mc_num;
 281         mci->csrows = csi;
 282         mci->dimms  = dimm;
 283         mci->tot_dimms = tot_dimms;
 284         mci->pvt_info = pvt;
 285         mci->n_layers = n_layers;
 286         mci->layers = layer;
 287         memcpy(mci->layers, layers, sizeof(*layer) * n_layers);
 288         mci->nr_csrows = tot_csrows;
 289         mci->num_cschannel = tot_channels;
 290         mci->mem_is_per_rank = per_rank;
 291
 292         /*
 293          * Fill the csrow struct
 294          */
 295         for (row = 0; row < tot_csrows; row++) {
 296                 csr = &csi[row];
 297                 csr->csrow_idx = row;
 298                 csr->mci = mci;
 299                 csr->nr_channels = tot_channels;
 300                 chp = &chi[row * tot_channels];
 301                 csr->channels = chp;
 302
 303                 for (chn = 0; chn < tot_channels; chn++) {
 304                         chan = &chp[chn];
 305                         chan->chan_idx = chn;
 306                         chan->csrow = csr;
 307                 }
 308         }
 309
 310         /*
 311          * Fill the dimm struct
 312          */
 313         memset(&pos, 0, sizeof(pos));
 314         row = 0;
 315         chn = 0;
 316         debugf4("%s: initializing %d %s\n", __func__, tot_dimms,
 317                 per_rank ? "ranks" : "dimms");
 318         for (i = 0; i < tot_dimms; i++) {
 319                 chan = &csi[row].channels[chn];
 320                 dimm = EDAC_DIMM_PTR(layer, mci->dimms, n_layers,
 321                                pos[0], pos[1], pos[2]);
 322                 dimm->mci = mci;
 323
 324                 debugf2("%s: %d: %s%zd (%d:%d:%d): row %d, chan %d\n", __func__,
 325                         i, per_rank ? "rank" : "dimm", (dimm - mci->dimms),
 326                         pos[0], pos[1], pos[2], row, chn);
 327
 328                 /* Copy DIMM location */
 329                 for (j = 0; j < n_layers; j++)
 330                         dimm->location[j] = pos[j];
 331
 332                 /* Link it to the csrows old API data */
 333                 chan->dimm = dimm;
 334                 dimm->csrow = row;
 335                 dimm->cschannel = chn;
 336
 337                 /* Increment csrow location */
 338                 row++;
 339                 if (row == tot_csrows) {
 340                         row = 0;
 341                         chn++;
 342                 }
 343
 344                 /* Increment dimm location */
 345                 for (j = n_layers - 1; j >= 0; j--) {
 346                         pos[j]++;
 347                         if (pos[j] < layers[j].size)
 348                                 break;
 349                         pos[j] = 0;
 350                 }
 351         }
 352
 353         mci->op_state = OP_ALLOC;
 354         INIT_LIST_HEAD(&mci->grp_kobj_list);
 355
 356         /*
 357          * Initialize the 'root' kobj for the edac_mc controller
 358          */
 359         err = edac_mc_register_sysfs_main_kobj(mci);
 360         if (err) {
 361                 kfree(mci);
 362                 return NULL;
 363         }
 364
 365         /* at this point, the root kobj is valid, and in order to
 366          * 'free' the object, then the function:
 367          *      edac_mc_unregister_sysfs_main_kobj() must be called
 368          * which will perform kobj unregistration and the actual free
 369          * will occur during the kobject callback operation
 370          */
 371         return mci;
 372 }
 373 EXPORT_SYMBOL_GPL(edac_mc_alloc);
 374
 375 /**
 376  * edac_mc_free
 377  *      'Free' a previously allocated 'mci' structure
 378  * @mci: pointer to a struct mem_ctl_info structure
 379  */
 380 void edac_mc_free(struct mem_ctl_info *mci)
 381 {
 382         debugf1("%s()\n", __func__);
 383
 384         edac_mc_unregister_sysfs_main_kobj(mci);
 385
 386         /* free the mci instance memory here */
 387         kfree(mci);
 388 }
 389 EXPORT_SYMBOL_GPL(edac_mc_free);
 390
 391
 392 /**
 393  * find_mci_by_dev
 394  *
 395  *      scan list of controllers looking for the one that manages
 396  *      the 'dev' device
 397  * @dev: pointer to a struct device related with the MCI
 398  */
 399 struct mem_ctl_info *find_mci_by_dev(struct device *dev)
 400 {
 401         struct mem_ctl_info *mci;
 402         struct list_head *item;
 403
 404         debugf3("%s()\n", __func__);
 405
 406         list_for_each(item, &mc_devices) {
 407                 mci = list_entry(item, struct mem_ctl_info, link);
 408
 409                 if (mci->dev == dev)
 410                         return mci;
 411         }
 412
 413         return NULL;
 414 }
 415 EXPORT_SYMBOL_GPL(find_mci_by_dev);
 416
 417 /*
 418  * handler for EDAC to check if NMI type handler has asserted interrupt
 419  */
 420 static int edac_mc_assert_error_check_and_clear(void)
 421 {
 422         int old_state;
 423
 424         if (edac_op_state == EDAC_OPSTATE_POLL)
 425                 return 1;
 426
 427         old_state = edac_err_assert;
 428         edac_err_assert = 0;
 429
 430         return old_state;
 431 }
 432
 433 /*
 434  * edac_mc_workq_function
 435  *      performs the operation scheduled by a workq request
 436  */
 437 static void edac_mc_workq_function(struct work_struct *work_req)
 438 {
 439         struct delayed_work *d_work = to_delayed_work(work_req);
 440         struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);
 441
 442         mutex_lock(&mem_ctls_mutex);
 443
 444         /* if this control struct has movd to offline state, we are done */
 445         if (mci->op_state == OP_OFFLINE) {
 446                 mutex_unlock(&mem_ctls_mutex);
 447                 return;
 448         }
 449
 450         /* Only poll controllers that are running polled and have a check */
 451         if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
 452                 mci->edac_check(mci);
 453
 454         mutex_unlock(&mem_ctls_mutex);
 455
 456         /* Reschedule */
 457         queue_delayed_work(edac_workqueue, &mci->work,
 458                         msecs_to_jiffies(edac_mc_get_poll_msec()));
 459 }
 460
 461 /*
 462  * edac_mc_workq_setup
 463  *      initialize a workq item for this mci
 464  *      passing in the new delay period in msec
 465  *
 466  *      locking model:
 467  *
 468  *              called with the mem_ctls_mutex held
 469  */
 470 static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
 471 {
 472         debugf0("%s()\n", __func__);
 473
 474         /* if this instance is not in the POLL state, then simply return */
 475         if (mci->op_state != OP_RUNNING_POLL)
 476                 return;
 477
 478         INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
 479         queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
 480 }
 481
 482 /*
 483  * edac_mc_workq_teardown
 484  *      stop the workq processing on this mci
 485  *
 486  *      locking model:
 487  *
 488  *              called WITHOUT lock held
 489  */
 490 static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
 491 {
 492         int status;
 493
 494         if (mci->op_state != OP_RUNNING_POLL)
 495                 return;
 496
 497         status = cancel_delayed_work(&mci->work);
 498         if (status == 0) {
 499                 debugf0("%s() not canceled, flush the queue\n",
 500                         __func__);
 501
 502                 /* workq instance might be running, wait for it */
 503                 flush_workqueue(edac_workqueue);
 504         }
 505 }
 506
 507 /*
 508  * edac_mc_reset_delay_period(unsigned long value)
 509  *
 510  *      user space has updated our poll period value, need to
 511  *      reset our workq delays
 512  */
 513 void edac_mc_reset_delay_period(int value)
 514 {
 515         struct mem_ctl_info *mci;
 516         struct list_head *item;
 517
 518         mutex_lock(&mem_ctls_mutex);
 519
 520         /* scan the list and turn off all workq timers, doing so under lock
 521          */
 522         list_for_each(item, &mc_devices) {
 523                 mci = list_entry(item, struct mem_ctl_info, link);
 524
 525                 if (mci->op_state == OP_RUNNING_POLL)
 526                         cancel_delayed_work(&mci->work);
 527         }
 528
 529         mutex_unlock(&mem_ctls_mutex);
 530
 531
 532         /* re-walk the list, and reset the poll delay */
 533         mutex_lock(&mem_ctls_mutex);
 534
 535         list_for_each(item, &mc_devices) {
 536                 mci = list_entry(item, struct mem_ctl_info, link);
 537
 538                 edac_mc_workq_setup(mci, (unsigned long) value);
 539         }
 540
 541         mutex_unlock(&mem_ctls_mutex);
 542 }
 543
 544
 545
 546 /* Return 0 on success, 1 on failure.
 547  * Before calling this function, caller must
 548  * assign a unique value to mci->mc_idx.
 549  *
 550  *      locking model:
 551  *
 552  *              called with the mem_ctls_mutex lock held
 553  */
 554 static int add_mc_to_global_list(struct mem_ctl_info *mci)
 555 {
 556         struct list_head *item, *insert_before;
 557         struct mem_ctl_info *p;
 558
 559         insert_before = &mc_devices;
 560
 561         p = find_mci_by_dev(mci->dev);
 562         if (unlikely(p != NULL))
 563                 goto fail0;
 564
 565         list_for_each(item, &mc_devices) {
 566                 p = list_entry(item, struct mem_ctl_info, link);
 567
 568                 if (p->mc_idx >= mci->mc_idx) {
 569                         if (unlikely(p->mc_idx == mci->mc_idx))
 570                                 goto fail1;
 571
 572                         insert_before = item;
 573                         break;
 574                 }
 575         }
 576
 577         list_add_tail_rcu(&mci->link, insert_before);
 578         atomic_inc(&edac_handlers);
 579         return 0;
 580
 581 fail0:
 582         edac_printk(KERN_WARNING, EDAC_MC,
 583                 "%s (%s) %s %s already assigned %d\n", dev_name(p->dev),
 584                 edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
 585         return 1;
 586
 587 fail1:
 588         edac_printk(KERN_WARNING, EDAC_MC,
 589                 "bug in low-level driver: attempt to assign\n"
 590                 "    duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
 591         return 1;
 592 }
 593
 594 static void del_mc_from_global_list(struct mem_ctl_info *mci)
 595 {
 596         atomic_dec(&edac_handlers);
 597         list_del_rcu(&mci->link);
 598
 599         /* these are for safe removal of devices from global list while
 600          * NMI handlers may be traversing list
 601          */
 602         synchronize_rcu();
 603         INIT_LIST_HEAD(&mci->link);
 604 }
 605
 606 /**
 607  * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'.
 608  *
 609  * If found, return a pointer to the structure.
 610  * Else return NULL.
 611  *
 612  * Caller must hold mem_ctls_mutex.
 613  */
 614 struct mem_ctl_info *edac_mc_find(int idx)
 615 {
 616         struct list_head *item;
 617         struct mem_ctl_info *mci;
 618
 619         list_for_each(item, &mc_devices) {
 620                 mci = list_entry(item, struct mem_ctl_info, link);
 621
 622                 if (mci->mc_idx >= idx) {
 623                         if (mci->mc_idx == idx)
 624                                 return mci;
 625
 626                         break;
 627                 }
 628         }
 629
 630         return NULL;
 631 }
 632 EXPORT_SYMBOL(edac_mc_find);
 633
 634 /**
 635  * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
 636  *                 create sysfs entries associated with mci structure
 637  * @mci: pointer to the mci structure to be added to the list
 638  *
 639  * Return:
 640  *      0       Success
 641  *      !0      Failure
 642  */
 643
 644 /* FIXME - should a warning be printed if no error detection? correction? */
 645 int edac_mc_add_mc(struct mem_ctl_info *mci)
 646 {
 647         debugf0("%s()\n", __func__);
 648
 649 #ifdef CONFIG_EDAC_DEBUG
 650         if (edac_debug_level >= 3)
 651                 edac_mc_dump_mci(mci);
 652
 653         if (edac_debug_level >= 4) {
 654                 int i;
 655
 656                 for (i = 0; i < mci->nr_csrows; i++) {
 657                         int j;
 658
 659                         edac_mc_dump_csrow(&mci->csrows[i]);
 660                         for (j = 0; j < mci->csrows[i].nr_channels; j++)
 661                                 edac_mc_dump_channel(&mci->csrows[i].
 662                                                 channels[j]);
 663                 }
 664                 for (i = 0; i < mci->tot_dimms; i++)
 665                         edac_mc_dump_dimm(&mci->dimms[i]);
 666         }
 667 #endif
 668         mutex_lock(&mem_ctls_mutex);
 669
 670         if (add_mc_to_global_list(mci))
 671                 goto fail0;
 672
 673         /* set load time so that error rate can be tracked */
 674         mci->start_time = jiffies;
 675
 676         if (edac_create_sysfs_mci_device(mci)) {
 677                 edac_mc_printk(mci, KERN_WARNING,
 678                         "failed to create sysfs device\n");
 679                 goto fail1;
 680         }
 681
 682         /* If there IS a check routine, then we are running POLLED */
 683         if (mci->edac_check != NULL) {
 684                 /* This instance is NOW RUNNING */
 685                 mci->op_state = OP_RUNNING_POLL;
 686
 687                 edac_mc_workq_setup(mci, edac_mc_get_poll_msec());
 688         } else {
 689                 mci->op_state = OP_RUNNING_INTERRUPT;
 690         }
 691
 692         /* Report action taken */
 693         edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
 694                 " DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci));
 695
 696         mutex_unlock(&mem_ctls_mutex);
 697         return 0;
 698
 699 fail1:
 700         del_mc_from_global_list(mci);
 701
 702 fail0:
 703         mutex_unlock(&mem_ctls_mutex);
 704         return 1;
 705 }
 706 EXPORT_SYMBOL_GPL(edac_mc_add_mc);
 707
 708 /**
 709  * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
 710  *                 remove mci structure from global list
 711  * @pdev: Pointer to 'struct device' representing mci structure to remove.
 712  *
 713  * Return pointer to removed mci structure, or NULL if device not found.
 714  */
 715 struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
 716 {
 717         struct mem_ctl_info *mci;
 718
 719         debugf0("%s()\n", __func__);
 720
 721         mutex_lock(&mem_ctls_mutex);
 722
 723         /* find the requested mci struct in the global list */
 724         mci = find_mci_by_dev(dev);
 725         if (mci == NULL) {
 726                 mutex_unlock(&mem_ctls_mutex);
 727                 return NULL;
 728         }
 729
 730         del_mc_from_global_list(mci);
 731         mutex_unlock(&mem_ctls_mutex);
 732
 733         /* flush workq processes */
 734         edac_mc_workq_teardown(mci);
 735
 736         /* marking MCI offline */
 737         mci->op_state = OP_OFFLINE;
 738
 739         /* remove from sysfs */
 740         edac_remove_sysfs_mci_device(mci);
 741
 742         edac_printk(KERN_INFO, EDAC_MC,
 743                 "Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
 744                 mci->mod_name, mci->ctl_name, edac_dev_name(mci));
 745
 746         return mci;
 747 }
 748 EXPORT_SYMBOL_GPL(edac_mc_del_mc);
 749
 750 static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
 751                                 u32 size)
 752 {
 753         struct page *pg;
 754         void *virt_addr;
 755         unsigned long flags = 0;
 756
 757         debugf3("%s()\n", __func__);
 758
 759         /* ECC error page was not in our memory. Ignore it. */
 760         if (!pfn_valid(page))
 761                 return;
 762
 763         /* Find the actual page structure then map it and fix */
 764         pg = pfn_to_page(page);
 765
 766         if (PageHighMem(pg))
 767                 local_irq_save(flags);
 768
 769         virt_addr = kmap_atomic(pg);
 770
 771         /* Perform architecture specific atomic scrub operation */
 772         atomic_scrub(virt_addr + offset, size);
 773
 774         /* Unmap and complete */
 775         kunmap_atomic(virt_addr);
 776
 777         if (PageHighMem(pg))
 778                 local_irq_restore(flags);
 779 }
 780
 781 /* FIXME - should return -1 */
 782 int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
 783 {
 784         struct csrow_info *csrows = mci->csrows;
 785         int row, i, j, n;
 786
 787         debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page);
 788         row = -1;
 789
 790         for (i = 0; i < mci->nr_csrows; i++) {
 791                 struct csrow_info *csrow = &csrows[i];
 792                 n = 0;
 793                 for (j = 0; j < csrow->nr_channels; j++) {
 794                         struct dimm_info *dimm = csrow->channels[j].dimm;
 795                         n += dimm->nr_pages;
 796                 }
 797                 if (n == 0)
 798                         continue;
 799
 800                 debugf3("MC%d: %s(): first(0x%lx) page(0x%lx) last(0x%lx) "
 801                         "mask(0x%lx)\n", mci->mc_idx, __func__,
 802                         csrow->first_page, page, csrow->last_page,
 803                         csrow->page_mask);
 804
 805                 if ((page >= csrow->first_page) &&
 806                     (page <= csrow->last_page) &&
 807                     ((page & csrow->page_mask) ==
 808                      (csrow->first_page & csrow->page_mask))) {
 809                         row = i;
 810                         break;
 811                 }
 812         }
 813
 814         if (row == -1)
 815                 edac_mc_printk(mci, KERN_ERR,
 816                         "could not look up page error address %lx\n",
 817                         (unsigned long)page);
 818
 819         return row;
 820 }
 821 EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
 822
 823 const char *edac_layer_name[] = {
 824         [EDAC_MC_LAYER_BRANCH] = "branch",
 825         [EDAC_MC_LAYER_CHANNEL] = "channel",
 826         [EDAC_MC_LAYER_SLOT] = "slot",
 827         [EDAC_MC_LAYER_CHIP_SELECT] = "csrow",
 828 };
 829 EXPORT_SYMBOL_GPL(edac_layer_name);
 830
 831 static void edac_inc_ce_error(struct mem_ctl_info *mci,
 832                                     bool enable_per_layer_report,
 833                                     const int pos[EDAC_MAX_LAYERS])
 834 {
 835         int i, index = 0;
 836
 837         mci->ce_count++;
 838
 839         if (!enable_per_layer_report) {
 840                 mci->ce_noinfo_count++;
 841                 return;
 842         }
 843
 844         for (i = 0; i < mci->n_layers; i++) {
 845                 if (pos[i] < 0)
 846                         break;
 847                 index += pos[i];
 848                 mci->ce_per_layer[i][index]++;
 849
 850                 if (i < mci->n_layers - 1)
 851                         index *= mci->layers[i + 1].size;
 852         }
 853 }
 854
 855 static void edac_inc_ue_error(struct mem_ctl_info *mci,
 856                                     bool enable_per_layer_report,
 857                                     const int pos[EDAC_MAX_LAYERS])
 858 {
 859         int i, index = 0;
 860
 861         mci->ue_count++;
 862
 863         if (!enable_per_layer_report) {
 864                 mci->ce_noinfo_count++;
 865                 return;
 866         }
 867
 868         for (i = 0; i < mci->n_layers; i++) {
 869                 if (pos[i] < 0)
 870                         break;
 871                 index += pos[i];
 872                 mci->ue_per_layer[i][index]++;
 873
 874                 if (i < mci->n_layers - 1)
 875                         index *= mci->layers[i + 1].size;
 876         }
 877 }
 878
 879 static void edac_ce_error(struct mem_ctl_info *mci,
 880                           const int pos[EDAC_MAX_LAYERS],
 881                           const char *msg,
 882                           const char *location,
 883                           const char *label,
 884                           const char *detail,
 885                           const char *other_detail,
 886                           const bool enable_per_layer_report,
 887                           const unsigned long page_frame_number,
 888                           const unsigned long offset_in_page,
 889                           u32 grain)
 890 {
 891         unsigned long remapped_page;
 892
 893         if (edac_mc_get_log_ce()) {
 894                 if (other_detail && *other_detail)
 895                         edac_mc_printk(mci, KERN_WARNING,
 896                                        "CE %s on %s (%s%s - %s)\n",
 897                                        msg, label, location,
 898                                        detail, other_detail);
 899                 else
 900                         edac_mc_printk(mci, KERN_WARNING,
 901                                        "CE %s on %s (%s%s)\n",
 902                                        msg, label, location,
 903                                        detail);
 904         }
 905         edac_inc_ce_error(mci, enable_per_layer_report, pos);
 906
 907         if (mci->scrub_mode & SCRUB_SW_SRC) {
 908                 /*
 909                         * Some memory controllers (called MCs below) can remap
 910                         * memory so that it is still available at a different
 911                         * address when PCI devices map into memory.
 912                         * MC's that can't do this, lose the memory where PCI
 913                         * devices are mapped. This mapping is MC-dependent
 914                         * and so we call back into the MC driver for it to
 915                         * map the MC page to a physical (CPU) page which can
 916                         * then be mapped to a virtual page - which can then
 917                         * be scrubbed.
 918                         */
 919                 remapped_page = mci->ctl_page_to_phys ?
 920                         mci->ctl_page_to_phys(mci, page_frame_number) :
 921                         page_frame_number;
 922
 923                 edac_mc_scrub_block(remapped_page,
 924                                         offset_in_page, grain);
 925         }
 926 }
 927
 928 static void edac_ue_error(struct mem_ctl_info *mci,
 929                           const int pos[EDAC_MAX_LAYERS],
 930                           const char *msg,
 931                           const char *location,
 932                           const char *label,
 933                           const char *detail,
 934                           const char *other_detail,
 935                           const bool enable_per_layer_report)
 936 {
 937         if (edac_mc_get_log_ue()) {
 938                 if (other_detail && *other_detail)
 939                         edac_mc_printk(mci, KERN_WARNING,
 940                                        "UE %s on %s (%s%s - %s)\n",
 941                                        msg, label, location, detail,
 942                                        other_detail);
 943                 else
 944                         edac_mc_printk(mci, KERN_WARNING,
 945                                        "UE %s on %s (%s%s)\n",
 946                                        msg, label, location, detail);
 947         }
 948
 949         if (edac_mc_get_panic_on_ue()) {
 950                 if (other_detail && *other_detail)
 951                         panic("UE %s on %s (%s%s - %s)\n",
 952                               msg, label, location, detail, other_detail);
 953                 else
 954                         panic("UE %s on %s (%s%s)\n",
 955                               msg, label, location, detail);
 956         }
 957
 958         edac_inc_ue_error(mci, enable_per_layer_report, pos);
 959 }
 960
 961 #define OTHER_LABEL " or "
 962 void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 963                           struct mem_ctl_info *mci,
 964                           const unsigned long page_frame_number,
 965                           const unsigned long offset_in_page,
 966                           const unsigned long syndrome,
 967                           const int layer0,
 968                           const int layer1,
 969                           const int layer2,
 970                           const char *msg,
 971                           const char *other_detail,
 972                           const void *mcelog)
 973 {
 974         /* FIXME: too much for stack: move it to some pre-alocated area */
 975         char detail[80], location[80];
 976         char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * mci->tot_dimms];
 977         char *p;
 978         int row = -1, chan = -1;
 979         int pos[EDAC_MAX_LAYERS] = { layer0, layer1, layer2 };
 980         int i;
 981         u32 grain;
 982         bool enable_per_layer_report = false;
 983
 984         debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
 985
 986         /*
 987          * Check if the event report is consistent and if the memory
 988          * location is known. If it is known, enable_per_layer_report will be
 989          * true, the DIMM(s) label info will be filled and the per-layer
 990          * error counters will be incremented.
 991          */
 992         for (i = 0; i < mci->n_layers; i++) {
 993                 if (pos[i] >= (int)mci->layers[i].size) {
 994                         if (type == HW_EVENT_ERR_CORRECTED)
 995                                 p = "CE";
 996                         else
 997                                 p = "UE";
 998
 999                         edac_mc_printk(mci, KERN_ERR,
1000                                        "INTERNAL ERROR: %s value is out of range (%d >= %d)\n",
1001                                        edac_layer_name[mci->layers[i].type],
1002                                        pos[i], mci->layers[i].size);
1003                         /*
1004                          * Instead of just returning it, let's use what's
1005                          * known about the error. The increment routines and
1006                          * the DIMM filter logic will do the right thing by
1007                          * pointing the likely damaged DIMMs.
1008                          */
1009                         pos[i] = -1;
1010                 }
1011                 if (pos[i] >= 0)
1012                         enable_per_layer_report = true;
1013         }
1014
1015         /*
1016          * Get the dimm label/grain that applies to the match criteria.
1017          * As the error algorithm may not be able to point to just one memory
1018          * stick, the logic here will get all possible labels that could
1019          * pottentially be affected by the error.
1020          * On FB-DIMM memory controllers, for uncorrected errors, it is common
1021          * to have only the MC channel and the MC dimm (also called "branch")
1022          * but the channel is not known, as the memory is arranged in pairs,
1023          * where each memory belongs to a separate channel within the same
1024          * branch.
1025          */
1026         grain = 0;
1027         p = label;
1028         *p = '\0';
1029         for (i = 0; i < mci->tot_dimms; i++) {
1030                 struct dimm_info *dimm = &mci->dimms[i];
1031
1032                 if (layer0 >= 0 && layer0 != dimm->location[0])
1033                         continue;
1034                 if (layer1 >= 0 && layer1 != dimm->location[1])
1035                         continue;
1036                 if (layer2 >= 0 && layer2 != dimm->location[2])
1037                         continue;
1038
1039                 /* get the max grain, over the error match range */
1040                 if (dimm->grain > grain)
1041                         grain = dimm->grain;
1042
1043                 /*
1044                  * If the error is memory-controller wide, there's no need to
1045                  * seek for the affected DIMMs because the whole
1046                  * channel/memory controller/...  may be affected.
1047                  * Also, don't show errors for empty DIMM slots.
1048                  */
1049                 if (enable_per_layer_report && dimm->nr_pages) {
1050                         if (p != label) {
1051                                 strcpy(p, OTHER_LABEL);
1052                                 p += strlen(OTHER_LABEL);
1053                         }
1054                         strcpy(p, dimm->label);
1055                         p += strlen(p);
1056                         *p = '\0';
1057
1058                         /*
1059                          * get csrow/channel of the DIMM, in order to allow
1060                          * incrementing the compat API counters
1061                          */
1062                         debugf4("%s: %s csrows map: (%d,%d)\n",
1063                                 __func__,
1064                                 mci->mem_is_per_rank ? "rank" : "dimm",
1065                                 dimm->csrow, dimm->cschannel);
1066
1067                         if (row == -1)
1068                                 row = dimm->csrow;
1069                         else if (row >= 0 && row != dimm->csrow)
1070                                 row = -2;
1071
1072                         if (chan == -1)
1073                                 chan = dimm->cschannel;
1074                         else if (chan >= 0 && chan != dimm->cschannel)
1075                                 chan = -2;
1076                 }
1077         }
1078
1079         if (!enable_per_layer_report) {
1080                 strcpy(label, "any memory");
1081         } else {
1082                 debugf4("%s: csrow/channel to increment: (%d,%d)\n",
1083                         __func__, row, chan);
1084                 if (p == label)
1085                         strcpy(label, "unknown memory");
1086                 if (type == HW_EVENT_ERR_CORRECTED) {
1087                         if (row >= 0) {
1088                                 mci->csrows[row].ce_count++;
1089                                 if (chan >= 0)
1090                                         mci->csrows[row].channels[chan].ce_count++;
1091                         }
1092                 } else
1093                         if (row >= 0)
1094                                 mci->csrows[row].ue_count++;
1095         }
1096
1097         /* Fill the RAM location data */
1098         p = location;
1099         for (i = 0; i < mci->n_layers; i++) {
1100                 if (pos[i] < 0)
1101                         continue;
1102
1103                 p += sprintf(p, "%s:%d ",
1104                              edac_layer_name[mci->layers[i].type],
1105                              pos[i]);
1106         }
1107
1108         /* Memory type dependent details about the error */
1109         if (type == HW_EVENT_ERR_CORRECTED) {
1110                 snprintf(detail, sizeof(detail),
1111                         "page:0x%lx offset:0x%lx grain:%d syndrome:0x%lx",
1112                         page_frame_number, offset_in_page,
1113                         grain, syndrome);
1114                 edac_ce_error(mci, pos, msg, location, label, detail,
1115                               other_detail, enable_per_layer_report,
1116                               page_frame_number, offset_in_page, grain);
1117         } else {
1118                 snprintf(detail, sizeof(detail),
1119                         "page:0x%lx offset:0x%lx grain:%d",
1120                         page_frame_number, offset_in_page, grain);
1121
1122                 edac_ue_error(mci, pos, msg, location, label, detail,
1123                               other_detail, enable_per_layer_report);
1124         }
1125 }
1126 EXPORT_SYMBOL_GPL(edac_mc_handle_error);