edac: Allow panic on correctable errors (CE)

Add an EDAC device flag and associated sysfs entries to
allow an EDAC driver to be configured to panic the kernel
if a correctable error is detected. Though correctable
errors (by definition) have no adverse system effects,
a panic may still be useful, since a correctable error may
be indicative of a marginal system state.

Change-Id: I98921469254aa7b999979c1c7d9186286f982a0c
Signed-off-by: Stepan Moskovchenko <stepanm@codeaurora.org>
This commit is contained in:
Stepan Moskovchenko 2014-06-10 19:24:50 -07:00 committed by David Keitel
parent 4a7d6158d6
commit 0e0c931df1
3 changed files with 38 additions and 0 deletions

View file

@ -223,6 +223,7 @@ struct edac_device_ctl_info {
/* Per instance controls for this edac_device */
int log_ue; /* boolean for logging UEs */
int log_ce; /* boolean for logging CEs */
int panic_on_ce; /* boolean for panic'ing on an CE */
int panic_on_ue; /* boolean for panic'ing on an UE */
unsigned poll_msec; /* number of milliseconds to poll interval */
unsigned long delay; /* number of jiffies for poll_msec */

View file

@ -611,6 +611,12 @@ static inline int edac_device_get_log_ue(struct edac_device_ctl_info *edac_dev)
return edac_dev->log_ue;
}
static inline int edac_device_get_panic_on_ce(struct edac_device_ctl_info
*edac_dev)
{
return edac_dev->panic_on_ce;
}
static inline int edac_device_get_panic_on_ue(struct edac_device_ctl_info
*edac_dev)
{
@ -660,6 +666,11 @@ void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
"CE: %s instance: %s block: %s '%s'\n",
edac_dev->ctl_name, instance->name,
block ? block->name : "N/A", msg);
if (edac_device_get_panic_on_ce(edac_dev))
panic("EDAC %s: CE instance: %s block %s '%s'\n",
edac_dev->ctl_name, instance->name,
block ? block->name : "N/A", msg);
}
EXPORT_SYMBOL_GPL(edac_device_handle_ce);

View file

@ -62,6 +62,13 @@ static ssize_t edac_device_ctl_log_ce_store(struct edac_device_ctl_info
return count;
}
/* 'panic_on_ce' */
static ssize_t edac_device_ctl_panic_on_ce_show(struct edac_device_ctl_info
*ctl_info, char *data)
{
return snprintf(data, PAGE_SIZE, "%u\n", ctl_info->panic_on_ce);
}
/* 'panic_on_ue' */
static ssize_t edac_device_ctl_panic_on_ue_show(struct edac_device_ctl_info
*ctl_info, char *data)
@ -69,6 +76,21 @@ static ssize_t edac_device_ctl_panic_on_ue_show(struct edac_device_ctl_info
return sprintf(data, "%u\n", ctl_info->panic_on_ue);
}
static ssize_t edac_device_ctl_panic_on_ce_store(struct edac_device_ctl_info
*ctl_info, const char *data,
size_t count)
{
unsigned long val;
/* if parameter is zero, turn off flag, if non-zero turn on flag */
if (kstrtoul(data, 0, &val) < 0)
return -EINVAL;
ctl_info->panic_on_ce = !!val;
return count;
}
static ssize_t edac_device_ctl_panic_on_ue_store(struct edac_device_ctl_info
*ctl_info, const char *data,
size_t count)
@ -156,6 +178,9 @@ CTL_INFO_ATTR(log_ue, S_IRUGO | S_IWUSR,
edac_device_ctl_log_ue_show, edac_device_ctl_log_ue_store);
CTL_INFO_ATTR(log_ce, S_IRUGO | S_IWUSR,
edac_device_ctl_log_ce_show, edac_device_ctl_log_ce_store);
CTL_INFO_ATTR(panic_on_ce, S_IRUGO | S_IWUSR,
edac_device_ctl_panic_on_ce_show,
edac_device_ctl_panic_on_ce_store);
CTL_INFO_ATTR(panic_on_ue, S_IRUGO | S_IWUSR,
edac_device_ctl_panic_on_ue_show,
edac_device_ctl_panic_on_ue_store);
@ -164,6 +189,7 @@ CTL_INFO_ATTR(poll_msec, S_IRUGO | S_IWUSR,
/* Base Attributes of the EDAC_DEVICE ECC object */
static struct ctl_info_attribute *device_ctrl_attr[] = {
&attr_ctl_info_panic_on_ce,
&attr_ctl_info_panic_on_ue,
&attr_ctl_info_log_ue,
&attr_ctl_info_log_ce,