forked from HewlettPackard/cacti
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathddr3_cache.cfg
259 lines (193 loc) · 8.58 KB
/
ddr3_cache.cfg
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
# Cache size
//-size (bytes) 2048
//-size (bytes) 4096
//-size (bytes) 32768
//-size (bytes) 131072
//-size (bytes) 262144
//-size (bytes) 1048576
//-size (bytes) 2097152
//-size (bytes) 4194304
-size (bytes) 8388608
//-size (bytes) 16777216
//-size (bytes) 33554432
//-size (bytes) 134217728
//-size (bytes) 67108864
//-size (bytes) 1073741824
# power gating
-Array Power Gating - "false"
-WL Power Gating - "false"
-CL Power Gating - "false"
-Bitline floating - "false"
-Interconnect Power Gating - "false"
-Power Gating Performance Loss 0.01
# Line size
//-block size (bytes) 8
-block size (bytes) 64
# To model Fully Associative cache, set associativity to zero
//-associativity 0
//-associativity 2
//-associativity 4
//-associativity 8
-associativity 8
-read-write port 1
-exclusive read port 0
-exclusive write port 0
-single ended read ports 0
# Multiple banks connected using a bus
-UCA bank count 1
-technology (u) 0.022
//-technology (u) 0.040
//-technology (u) 0.032
//-technology (u) 0.090
# following three parameters are meaningful only for main memories
-page size (bits) 8192
-burst length 8
-internal prefetch width 8
# following parameter can have one of five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram)
-Data array cell type - "itrs-hp"
//-Data array cell type - "itrs-lstp"
//-Data array cell type - "itrs-lop"
# following parameter can have one of three values -- (itrs-hp, itrs-lstp, itrs-lop)
-Data array peripheral type - "itrs-hp"
//-Data array peripheral type - "itrs-lstp"
//-Data array peripheral type - "itrs-lop"
# following parameter can have one of five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram)
-Tag array cell type - "itrs-hp"
//-Tag array cell type - "itrs-lstp"
//-Tag array cell type - "itrs-lop"
# following parameter can have one of three values -- (itrs-hp, itrs-lstp, itrs-lop)
-Tag array peripheral type - "itrs-hp"
//-Tag array peripheral type - "itrs-lstp"
//-Tag array peripheral type - "itrs-lop
# Bus width include data bits and address bits required by the decoder
//-output/input bus width 16
-output/input bus width 512
// 300-400 in steps of 10
-operating temperature (K) 360
# Type of memory - cache (with a tag array) or ram (scratch ram similar to a register file)
# or main memory (no tag array and every access will happen at a page granularity Ref: CACTI 5.3 report)
-cache type "cache"
//-cache type "ram"
//-cache type "main memory"
# to model special structure like branch target buffers, directory, etc.
# change the tag size parameter
# if you want cacti to calculate the tagbits, set the tag size to "default"
-tag size (b) "default"
//-tag size (b) 22
# fast - data and tag access happen in parallel
# sequential - data array is accessed after accessing the tag array
# normal - data array lookup and tag access happen in parallel
# final data block is broadcasted in data array h-tree
# after getting the signal from the tag array
//-access mode (normal, sequential, fast) - "fast"
-access mode (normal, sequential, fast) - "normal"
//-access mode (normal, sequential, fast) - "sequential"
# DESIGN OBJECTIVE for UCA (or banks in NUCA)
-design objective (weight delay, dynamic power, leakage power, cycle time, area) 0:0:0:100:0
# Percentage deviation from the minimum value
# Ex: A deviation value of 10:1000:1000:1000:1000 will try to find an organization
# that compromises at most 10% delay.
# NOTE: Try reasonable values for % deviation. Inconsistent deviation
# percentage values will not produce any valid organizations. For example,
# 0:0:100:100:100 will try to identify an organization that has both
# least delay and dynamic power. Since such an organization is not possible, CACTI will
# throw an error. Refer CACTI-6 Technical report for more details
-deviate (delay, dynamic power, leakage power, cycle time, area) 20:100000:100000:100000:100000
# Objective for NUCA
-NUCAdesign objective (weight delay, dynamic power, leakage power, cycle time, area) 100:100:0:0:100
-NUCAdeviate (delay, dynamic power, leakage power, cycle time, area) 10:10000:10000:10000:10000
# Set optimize tag to ED or ED^2 to obtain a cache configuration optimized for
# energy-delay or energy-delay sq. product
# Note: Optimize tag will disable weight or deviate values mentioned above
# Set it to NONE to let weight and deviate values determine the
# appropriate cache configuration
//-Optimize ED or ED^2 (ED, ED^2, NONE): "ED"
-Optimize ED or ED^2 (ED, ED^2, NONE): "ED^2"
//-Optimize ED or ED^2 (ED, ED^2, NONE): "NONE"
-Cache model (NUCA, UCA) - "UCA"
//-Cache model (NUCA, UCA) - "NUCA"
# In order for CACTI to find the optimal NUCA bank value the following
# variable should be assigned 0.
-NUCA bank count 0
# NOTE: for nuca network frequency is set to a default value of
# 5GHz in time.c. CACTI automatically
# calculates the maximum possible frequency and downgrades this value if necessary
# By default CACTI considers both full-swing and low-swing
# wires to find an optimal configuration. However, it is possible to
# restrict the search space by changing the signaling from "default" to
# "fullswing" or "lowswing" type.
-Wire signaling (fullswing, lowswing, default) - "Global_30"
//-Wire signaling (fullswing, lowswing, default) - "default"
//-Wire signaling (fullswing, lowswing, default) - "lowswing"
//-Wire inside mat - "global"
-Wire inside mat - "semi-global"
//-Wire outside mat - "global"
-Wire outside mat - "semi-global"
-Interconnect projection - "conservative"
//-Interconnect projection - "aggressive"
# Contention in network (which is a function of core count and cache level) is one of
# the critical factor used for deciding the optimal bank count value
# core count can be 4, 8, or 16
//-Core count 4
-Core count 8
//-Core count 16
-Cache level (L2/L3) - "L3"
-Add ECC - "true"
//-Print level (DETAILED, CONCISE) - "CONCISE"
-Print level (DETAILED, CONCISE) - "DETAILED"
# for debugging
//-Print input parameters - "true"
-Print input parameters - "false"
# force CACTI to model the cache with the
# following Ndbl, Ndwl, Nspd, Ndsam,
# and Ndcm values
//-Force cache config - "true"
-Force cache config - "false"
-Ndwl 1
-Ndbl 1
-Nspd 0
-Ndcm 1
-Ndsam1 0
-Ndsam2 0
#### Default CONFIGURATION values for baseline external IO parameters to DRAM.
# Memory Type (D=DDR3, L=LPDDR2, W=WideIO, S=Low-swing differential)
-dram_type "D"
//-dram_type "L"
//-dram_type "W"
//-dram_type "S"
# Memory State (R=Read, W=Write, I=Idle or S=Sleep)
//-iostate "R"
-iostate "W"
//-iostate "I"
//-iostate "S"
# Is ECC Enabled (Y=Yes, N=No)
-dram_ecc "Y"
#Address bus timing
//-addr_timing 0.5 //DDR, for LPDDR2 and LPDDR3
-addr_timing 1.0 //SDR for DDR3, Wide-IO
//-addr_timing 2.0 //2T timing
//addr_timing 3.0 // 3T timing
# Bandwidth (Gbytes per second, this is the effective bandwidth)
-bus_bw 12.8 GBps //Valid range 0 to 2*bus_freq*num_dq
# Memory Density (Gbit per memory/DRAM die)
-mem_density 4 Gb //Valid values 2^n Gb
# IO frequency (MHz) (frequency of the external memory interface).
-bus_freq 800 MHz //Valid range 0 to 1.5 GHz for DDR3, 0 to 1.2 GHz for LPDDR3, 0 - 800 MHz for WideIO and 0 - 3 GHz for Low-swing differential
# Duty Cycle (fraction of time in the Memory State defined above)
-duty_cycle 1.0 //Valid range 0 to 1.0
# Activity factor for Data (0->1 transitions) per cycle (for DDR, need to account for the higher activity in this parameter. E.g. max. activity factor for DDR is 1.0, for SDR is 0.5)
-activity_dq 1.0 //Valid range 0 to 1.0 for DDR, 0 to 0.5 for SDR
# Activity factor for Control/Address (0->1 transitions) per cycle (for DDR, need to account for the higher activity in this parameter. E.g. max. activity factor for DDR is 1.0, for SDR is 0.5)
-activity_ca 0.5 //Valid range 0 to 1.0 for DDR, 0 to 0.5 for SDR
# Number of DQ pins
-num_dq 72 //Include ECC pins as well (if present). If ECC pins are included, the bus bandwidth is 2*(num_dq-#of ECC pins)*bus_freq. Valid range 0 to 72.
# Number of DQS pins
-num_dqs 18 //2 x differential pairs. Include ECC pins as well. Valid range 0 to 18. For x4 memories, could have 36 DQS pins.
# Number of CA pins
-num_ca 25 //Valid range 0 to 35 pins.
# Number of CLK pins
-num_clk 2 //2 x differential pair. Valid values: 0/2/4.
# Number of Physical Ranks
-num_mem_dq 2 //Number of ranks (loads on DQ and DQS) per DIMM or buffer chip
# Width of the Memory Data Bus
-mem_data_width 8 //x4 or x8 or x16 or x32 or x128 memories