forked from torvalds/linux
-
Notifications
You must be signed in to change notification settings - Fork 8
/
i82860_edac.c
363 lines (294 loc) · 9.06 KB
/
i82860_edac.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
/*
* Intel 82860 Memory Controller kernel module
* (C) 2005 Red Hat (http://www.redhat.com)
* This file may be distributed under the terms of the
* GNU General Public License.
*
* Written by Ben Woodard <[email protected]>
* shamelessly copied from and based upon the edac_i82875 driver
* by Thayne Harbaugh of Linux Networx. (http://lnxi.com)
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/pci.h>
#include <linux/pci_ids.h>
#include <linux/edac.h>
#include "edac_module.h"
#define EDAC_MOD_STR "i82860_edac"
#define i82860_printk(level, fmt, arg...) \
edac_printk(level, "i82860", fmt, ##arg)
#define i82860_mc_printk(mci, level, fmt, arg...) \
edac_mc_chipset_printk(mci, level, "i82860", fmt, ##arg)
#ifndef PCI_DEVICE_ID_INTEL_82860_0
#define PCI_DEVICE_ID_INTEL_82860_0 0x2531
#endif /* PCI_DEVICE_ID_INTEL_82860_0 */
#define I82860_MCHCFG 0x50
#define I82860_GBA 0x60
#define I82860_GBA_MASK 0x7FF
#define I82860_GBA_SHIFT 24
#define I82860_ERRSTS 0xC8
#define I82860_EAP 0xE4
#define I82860_DERRCTL_STS 0xE2
enum i82860_chips {
I82860 = 0,
};
struct i82860_dev_info {
const char *ctl_name;
};
struct i82860_error_info {
u16 errsts;
u32 eap;
u16 derrsyn;
u16 errsts2;
};
static const struct i82860_dev_info i82860_devs[] = {
[I82860] = {
.ctl_name = "i82860"},
};
static struct pci_dev *mci_pdev; /* init dev: in case that AGP code
* has already registered driver
*/
static struct edac_pci_ctl_info *i82860_pci;
static void i82860_get_error_info(struct mem_ctl_info *mci,
struct i82860_error_info *info)
{
struct pci_dev *pdev;
pdev = to_pci_dev(mci->pdev);
/*
* This is a mess because there is no atomic way to read all the
* registers at once and the registers can transition from CE being
* overwritten by UE.
*/
pci_read_config_word(pdev, I82860_ERRSTS, &info->errsts);
pci_read_config_dword(pdev, I82860_EAP, &info->eap);
pci_read_config_word(pdev, I82860_DERRCTL_STS, &info->derrsyn);
pci_read_config_word(pdev, I82860_ERRSTS, &info->errsts2);
pci_write_bits16(pdev, I82860_ERRSTS, 0x0003, 0x0003);
/*
* If the error is the same for both reads then the first set of reads
* is valid. If there is a change then there is a CE no info and the
* second set of reads is valid and should be UE info.
*/
if (!(info->errsts2 & 0x0003))
return;
if ((info->errsts ^ info->errsts2) & 0x0003) {
pci_read_config_dword(pdev, I82860_EAP, &info->eap);
pci_read_config_word(pdev, I82860_DERRCTL_STS, &info->derrsyn);
}
}
static int i82860_process_error_info(struct mem_ctl_info *mci,
struct i82860_error_info *info,
int handle_errors)
{
struct dimm_info *dimm;
int row;
if (!(info->errsts2 & 0x0003))
return 0;
if (!handle_errors)
return 1;
if ((info->errsts ^ info->errsts2) & 0x0003) {
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0,
-1, -1, -1, "UE overwrote CE", "");
info->errsts = info->errsts2;
}
info->eap >>= PAGE_SHIFT;
row = edac_mc_find_csrow_by_page(mci, info->eap);
dimm = mci->csrows[row]->channels[0]->dimm;
if (info->errsts & 0x0002)
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
info->eap, 0, 0,
dimm->location[0], dimm->location[1], -1,
"i82860 UE", "");
else
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
info->eap, 0, info->derrsyn,
dimm->location[0], dimm->location[1], -1,
"i82860 CE", "");
return 1;
}
static void i82860_check(struct mem_ctl_info *mci)
{
struct i82860_error_info info;
i82860_get_error_info(mci, &info);
i82860_process_error_info(mci, &info, 1);
}
static void i82860_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev)
{
unsigned long last_cumul_size;
u16 mchcfg_ddim; /* DRAM Data Integrity Mode 0=none, 2=edac */
u16 value;
u32 cumul_size;
struct csrow_info *csrow;
struct dimm_info *dimm;
int index;
pci_read_config_word(pdev, I82860_MCHCFG, &mchcfg_ddim);
mchcfg_ddim = mchcfg_ddim & 0x180;
last_cumul_size = 0;
/* The group row boundary (GRA) reg values are boundary address
* for each DRAM row with a granularity of 16MB. GRA regs are
* cumulative; therefore GRA15 will contain the total memory contained
* in all eight rows.
*/
for (index = 0; index < mci->nr_csrows; index++) {
csrow = mci->csrows[index];
dimm = csrow->channels[0]->dimm;
pci_read_config_word(pdev, I82860_GBA + index * 2, &value);
cumul_size = (value & I82860_GBA_MASK) <<
(I82860_GBA_SHIFT - PAGE_SHIFT);
edac_dbg(3, "(%d) cumul_size 0x%x\n", index, cumul_size);
if (cumul_size == last_cumul_size)
continue; /* not populated */
csrow->first_page = last_cumul_size;
csrow->last_page = cumul_size - 1;
dimm->nr_pages = cumul_size - last_cumul_size;
last_cumul_size = cumul_size;
dimm->grain = 1 << 12; /* I82860_EAP has 4KiB reolution */
dimm->mtype = MEM_RMBS;
dimm->dtype = DEV_UNKNOWN;
dimm->edac_mode = mchcfg_ddim ? EDAC_SECDED : EDAC_NONE;
}
}
static int i82860_probe1(struct pci_dev *pdev, int dev_idx)
{
struct mem_ctl_info *mci;
struct edac_mc_layer layers[2];
struct i82860_error_info discard;
/*
* RDRAM has channels but these don't map onto the csrow abstraction.
* According with the datasheet, there are 2 Rambus channels, supporting
* up to 16 direct RDRAM devices.
* The device groups from the GRA registers seem to map reasonably
* well onto the notion of a chip select row.
* There are 16 GRA registers and since the name is associated with
* the channel and the GRA registers map to physical devices so we are
* going to make 1 channel for group.
*/
layers[0].type = EDAC_MC_LAYER_CHANNEL;
layers[0].size = 2;
layers[0].is_virt_csrow = true;
layers[1].type = EDAC_MC_LAYER_SLOT;
layers[1].size = 8;
layers[1].is_virt_csrow = true;
mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, 0);
if (!mci)
return -ENOMEM;
edac_dbg(3, "init mci\n");
mci->pdev = &pdev->dev;
mci->mtype_cap = MEM_FLAG_DDR;
mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
/* I"m not sure about this but I think that all RDRAM is SECDED */
mci->edac_cap = EDAC_FLAG_SECDED;
mci->mod_name = EDAC_MOD_STR;
mci->ctl_name = i82860_devs[dev_idx].ctl_name;
mci->dev_name = pci_name(pdev);
mci->edac_check = i82860_check;
mci->ctl_page_to_phys = NULL;
i82860_init_csrows(mci, pdev);
i82860_get_error_info(mci, &discard); /* clear counters */
/* Here we assume that we will never see multiple instances of this
* type of memory controller. The ID is therefore hardcoded to 0.
*/
if (edac_mc_add_mc(mci)) {
edac_dbg(3, "failed edac_mc_add_mc()\n");
goto fail;
}
/* allocating generic PCI control info */
i82860_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR);
if (!i82860_pci) {
printk(KERN_WARNING
"%s(): Unable to create PCI control\n",
__func__);
printk(KERN_WARNING
"%s(): PCI error report via EDAC not setup\n",
__func__);
}
/* get this far and it's successful */
edac_dbg(3, "success\n");
return 0;
fail:
edac_mc_free(mci);
return -ENODEV;
}
/* returns count (>= 0), or negative on error */
static int i82860_init_one(struct pci_dev *pdev,
const struct pci_device_id *ent)
{
int rc;
edac_dbg(0, "\n");
i82860_printk(KERN_INFO, "i82860 init one\n");
if (pci_enable_device(pdev) < 0)
return -EIO;
rc = i82860_probe1(pdev, ent->driver_data);
if (rc == 0)
mci_pdev = pci_dev_get(pdev);
return rc;
}
static void i82860_remove_one(struct pci_dev *pdev)
{
struct mem_ctl_info *mci;
edac_dbg(0, "\n");
if (i82860_pci)
edac_pci_release_generic_ctl(i82860_pci);
if ((mci = edac_mc_del_mc(&pdev->dev)) == NULL)
return;
edac_mc_free(mci);
}
static const struct pci_device_id i82860_pci_tbl[] = {
{
PCI_VEND_DEV(INTEL, 82860_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
I82860},
{
0,
} /* 0 terminated list. */
};
MODULE_DEVICE_TABLE(pci, i82860_pci_tbl);
static struct pci_driver i82860_driver = {
.name = EDAC_MOD_STR,
.probe = i82860_init_one,
.remove = i82860_remove_one,
.id_table = i82860_pci_tbl,
};
static int __init i82860_init(void)
{
int pci_rc;
edac_dbg(3, "\n");
/* Ensure that the OPSTATE is set correctly for POLL or NMI */
opstate_init();
if ((pci_rc = pci_register_driver(&i82860_driver)) < 0)
goto fail0;
if (!mci_pdev) {
mci_pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
PCI_DEVICE_ID_INTEL_82860_0, NULL);
if (mci_pdev == NULL) {
edac_dbg(0, "860 pci_get_device fail\n");
pci_rc = -ENODEV;
goto fail1;
}
pci_rc = i82860_init_one(mci_pdev, i82860_pci_tbl);
if (pci_rc < 0) {
edac_dbg(0, "860 init fail\n");
pci_rc = -ENODEV;
goto fail1;
}
}
return 0;
fail1:
pci_unregister_driver(&i82860_driver);
fail0:
pci_dev_put(mci_pdev);
return pci_rc;
}
static void __exit i82860_exit(void)
{
edac_dbg(3, "\n");
pci_unregister_driver(&i82860_driver);
pci_dev_put(mci_pdev);
}
module_init(i82860_init);
module_exit(i82860_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com) "
"Ben Woodard <[email protected]>");
MODULE_DESCRIPTION("ECC support for Intel 82860 memory hub controllers");
module_param(edac_op_state, int, 0444);
MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");