Skip to content

Commit

Permalink
edac: edac_mc_handle_error(): add an error_count parameter
Browse files Browse the repository at this point in the history
In order to avoid loosing error events, it is desirable to group
error events together and generate a single trace for several identical
errors.

The trace API already allows reporting multiple errors. Change the
handle_error function to also allow that.

The changes at the drivers were made by this small script:

	$file .=$_ while (<>);
	$file =~ s/(edac_mc_handle_error)\s*\(([^\,]+)\,([^\,]+)\,/$1($2,$3, 1,/g;
	print $file;

Signed-off-by: Mauro Carvalho Chehab <[email protected]>
  • Loading branch information
Mauro Carvalho Chehab committed Jun 12, 2012
1 parent 03f7eae commit 9eb07a7
Show file tree
Hide file tree
Showing 28 changed files with 104 additions and 95 deletions.
22 changes: 11 additions & 11 deletions drivers/edac/amd64_edac.c
Original file line number Diff line number Diff line change
Expand Up @@ -1046,7 +1046,7 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
if (!src_mci) {
amd64_mc_err(mci, "failed to map error addr 0x%lx to a node\n",
(unsigned long)sys_addr);
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
page, offset, syndrome,
-1, -1, -1,
"failed to map error addr to a node",
Expand All @@ -1057,7 +1057,7 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
/* Now map the sys_addr to a CSROW */
csrow = sys_addr_to_csrow(src_mci, sys_addr);
if (csrow < 0) {
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
page, offset, syndrome,
-1, -1, -1,
"failed to map error addr to a csrow",
Expand All @@ -1077,7 +1077,7 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
amd64_mc_warn(src_mci, "unknown syndrome 0x%04x - "
"possible error reporting race\n",
syndrome);
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
page, offset, syndrome,
csrow, -1, -1,
"unknown syndrome - possible error reporting race",
Expand All @@ -1096,7 +1096,7 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
channel = ((sys_addr & BIT(3)) != 0);
}

edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, src_mci,
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, src_mci, 1,
page, offset, syndrome,
csrow, channel, -1,
"", "");
Expand Down Expand Up @@ -1608,7 +1608,7 @@ static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
csrow = f1x_translate_sysaddr_to_cs(pvt, sys_addr, &nid, &chan);

if (csrow < 0) {
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
page, offset, syndrome,
-1, -1, -1,
"failed to map error addr to a csrow",
Expand All @@ -1624,7 +1624,7 @@ static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
if (dct_ganging_enabled(pvt))
chan = get_channel_from_ecc_syndrome(mci, syndrome);

edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
page, offset, syndrome,
csrow, chan, -1,
"", "");
Expand Down Expand Up @@ -1909,7 +1909,7 @@ static void amd64_handle_ce(struct mem_ctl_info *mci, struct mce *m)
/* Ensure that the Error Address is VALID */
if (!(m->status & MCI_STATUS_ADDRV)) {
amd64_mc_err(mci, "HW has no ERROR_ADDRESS available\n");
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
0, 0, 0,
-1, -1, -1,
"HW has no ERROR_ADDRESS available",
Expand Down Expand Up @@ -1937,7 +1937,7 @@ static void amd64_handle_ue(struct mem_ctl_info *mci, struct mce *m)

if (!(m->status & MCI_STATUS_ADDRV)) {
amd64_mc_err(mci, "HW has no ERROR_ADDRESS available\n");
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
0, 0, 0,
-1, -1, -1,
"HW has no ERROR_ADDRESS available",
Expand All @@ -1956,7 +1956,7 @@ static void amd64_handle_ue(struct mem_ctl_info *mci, struct mce *m)
if (!src_mci) {
amd64_mc_err(mci, "ERROR ADDRESS (0x%lx) NOT mapped to a MC\n",
(unsigned long)sys_addr);
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
page, offset, 0,
-1, -1, -1,
"ERROR ADDRESS NOT mapped to a MC",
Expand All @@ -1970,13 +1970,13 @@ static void amd64_handle_ue(struct mem_ctl_info *mci, struct mce *m)
if (csrow < 0) {
amd64_mc_err(mci, "ERROR_ADDRESS (0x%lx) NOT mapped to CS\n",
(unsigned long)sys_addr);
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
page, offset, 0,
-1, -1, -1,
"ERROR ADDRESS NOT mapped to CS",
"");
} else {
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
page, offset, 0,
csrow, -1, -1,
"", "");
Expand Down
4 changes: 2 additions & 2 deletions drivers/edac/amd76x_edac.c
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ static int amd76x_process_error_info(struct mem_ctl_info *mci,

if (handle_errors) {
row = (info->ecc_mode_status >> 4) & 0xf;
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
mci->csrows[row]->first_page, 0, 0,
row, 0, -1,
mci->ctl_name, "");
Expand All @@ -160,7 +160,7 @@ static int amd76x_process_error_info(struct mem_ctl_info *mci,

if (handle_errors) {
row = info->ecc_mode_status & 0xf;
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
mci->csrows[row]->first_page, 0, 0,
row, 0, -1,
mci->ctl_name, "");
Expand Down
4 changes: 2 additions & 2 deletions drivers/edac/cell_edac.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ static void cell_edac_count_ce(struct mem_ctl_info *mci, int chan, u64 ar)
syndrome = (ar & 0x000000001fe00000ul) >> 21;

/* TODO: Decoding of the error address */
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
csrow->first_page + pfn, offset, syndrome,
0, chan, -1, "", "");
}
Expand All @@ -70,7 +70,7 @@ static void cell_edac_count_ue(struct mem_ctl_info *mci, int chan, u64 ar)
offset = address & ~PAGE_MASK;

/* TODO: Decoding of the error address */
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
csrow->first_page + pfn, offset, 0,
0, chan, -1, "", "");
}
Expand Down
4 changes: 2 additions & 2 deletions drivers/edac/cpc925_edac.c
Original file line number Diff line number Diff line change
Expand Up @@ -554,15 +554,15 @@ static void cpc925_mc_check(struct mem_ctl_info *mci)
if (apiexcp & CECC_EXCP_DETECTED) {
cpc925_mc_printk(mci, KERN_INFO, "DRAM CECC Fault\n");
channel = cpc925_mc_find_channel(mci, syndrome);
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
pfn, offset, syndrome,
csrow, channel, -1,
mci->ctl_name, "");
}

if (apiexcp & UECC_EXCP_DETECTED) {
cpc925_mc_printk(mci, KERN_INFO, "DRAM UECC Fault\n");
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
pfn, offset, 0,
csrow, -1, -1,
mci->ctl_name, "");
Expand Down
8 changes: 4 additions & 4 deletions drivers/edac/e752x_edac.c
Original file line number Diff line number Diff line change
Expand Up @@ -371,7 +371,7 @@ static void do_process_ce(struct mem_ctl_info *mci, u16 error_one,
channel = !(error_one & 1);

/* e752x mc reads 34:6 of the DRAM linear address */
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
page, offset_in_page(sec1_add << 4), sec1_syndrome,
row, channel, -1,
"e752x CE", "");
Expand Down Expand Up @@ -408,7 +408,7 @@ static void do_process_ue(struct mem_ctl_info *mci, u16 error_one,
edac_mc_find_csrow_by_page(mci, block_page);

/* e752x mc reads 34:6 of the DRAM linear address */
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
block_page,
offset_in_page(error_2b << 4), 0,
row, -1, -1,
Expand All @@ -427,7 +427,7 @@ static void do_process_ue(struct mem_ctl_info *mci, u16 error_one,
edac_mc_find_csrow_by_page(mci, block_page);

/* e752x mc reads 34:6 of the DRAM linear address */
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
block_page,
offset_in_page(error_2b << 4), 0,
row, -1, -1,
Expand All @@ -454,7 +454,7 @@ static inline void process_ue_no_info_wr(struct mem_ctl_info *mci,
return;

edac_dbg(3, "\n");
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 0, 0, 0,
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0,
-1, -1, -1,
"e752x UE log memory write", "");
}
Expand Down
8 changes: 4 additions & 4 deletions drivers/edac/e7xxx_edac.c
Original file line number Diff line number Diff line change
Expand Up @@ -219,14 +219,14 @@ static void process_ce(struct mem_ctl_info *mci, struct e7xxx_error_info *info)
row = edac_mc_find_csrow_by_page(mci, page);
/* convert syndrome to channel */
channel = e7xxx_find_channel(syndrome);
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, page, 0, syndrome,
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, page, 0, syndrome,
row, channel, -1, "e7xxx CE", "");
}

static void process_ce_no_info(struct mem_ctl_info *mci)
{
edac_dbg(3, "\n");
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 0, 0, 0, -1, -1, -1,
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0, -1, -1, -1,
"e7xxx CE log register overflow", "");
}

Expand All @@ -242,15 +242,15 @@ static void process_ue(struct mem_ctl_info *mci, struct e7xxx_error_info *info)
block_page = error_2b >> 6; /* convert to 4k address */
row = edac_mc_find_csrow_by_page(mci, block_page);

edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, block_page, 0, 0,
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, block_page, 0, 0,
row, -1, -1, "e7xxx UE", "");
}

static void process_ue_no_info(struct mem_ctl_info *mci)
{
edac_dbg(3, "\n");

edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 0, 0, 0, -1, -1, -1,
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0, -1, -1, -1,
"e7xxx UE log register overflow", "");
}

Expand Down
1 change: 1 addition & 0 deletions drivers/edac/edac_core.h
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,7 @@ extern int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci,
unsigned long page);
void edac_mc_handle_error(const enum hw_event_mc_err_type type,
struct mem_ctl_info *mci,
const u16 error_count,
const unsigned long page_frame_number,
const unsigned long offset_in_page,
const unsigned long syndrome,
Expand Down
Loading

0 comments on commit 9eb07a7

Please sign in to comment.