1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
|
#include "cacheinfo.hh"
#include <cctk_Parameters.h>
#include <vectors.h>
template<int D>
vect<int,D>
pad_shape (bbox<int,D> const& extent)
{
assert (all (extent.shape() >= 0));
return pad_shape(extent.shape() / extent.stride());
}
template<int D>
vect<int,D>
pad_shape (vect<int,D> const& shape)
{
DECLARE_CCTK_PARAMETERS;
assert (all(shape>=0));
static bool have_cacheinfo = false;
static vector<cacheinfo_t> cacheinfo;
if (not have_cacheinfo) {
// Ignore L1 caches that are probably too small to be useful (e.g.
// on Intel or AMD processors)
// TODO: make this a parameter
if (D1size >= 128*1024) {
cacheinfo.push_back(cacheinfo_t(D1size, D1linesize, D1assoc));
}
#if 0
// TODO: this is too simplistic:
// Add page size as a cache
if (pagesize>0) {
cacheinfo.push_back(cacheinfo_t(pagesize));
}
#endif
if (L2size>0) {
cacheinfo.push_back(cacheinfo_t(L2size, L2linesize, L2assoc));
}
if (L3size>0) {
cacheinfo.push_back(cacheinfo_t(L3size, L3linesize, L3assoc));
}
if (TLB_D1entries>0) {
ptrdiff_t const TLB_D1size = TLB_D1entries * TLB_D1pagesize * TLB_D1assoc;
cacheinfo.push_back(cacheinfo_t(TLB_D1size, TLB_D1pagesize, TLB_D1assoc));
}
if (TLB_L2entries>0) {
ptrdiff_t const TLB_L2size = TLB_L2entries * TLB_L2pagesize * TLB_L2assoc;
cacheinfo.push_back(cacheinfo_t(TLB_L2size, TLB_L2pagesize, TLB_L2assoc));
}
// TODO: sort caches by their sizes
for (size_t n=0; n<cacheinfo.size(); ++n) {
cacheinfo_t const& ci = cacheinfo.at(n);
if (n>0) {
// Ensure that the cache size is larger than the next lower
// cache size
assert (ci.size() > cacheinfo.at(n-1).size());
// Ensure that the cache line size is evenly divided by the
// next lower cache line size
assert (ci.linesize() % cacheinfo.at(n-1).linesize() == 0);
assert (ci.stride() > cacheinfo.at(n-1).stride());
}
} // for cacheinfo
have_cacheinfo = true;
} // if not have_cacheinfo
vect<int,D> padded_shape;
int accumulated_npoints = 1;
for (int d=0; d<D; ++d) {
int npoints = shape[d];
if (d == 0) {
#if VECTORISE && VECTORISE_ALIGNED_ARRAYS
// Pad array to a multiple of the vector size. Note that this is
// a hard requirement, so that we can emit aligned load/store
// operations.
npoints = align_up (npoints, CCTK_REAL_VEC_SIZE);
#endif
if (vector_size > 0) {
npoints = align_up (npoints, vector_size);
}
}
for (size_t n=0; n<cacheinfo.size(); ++n) {
cacheinfo_t const& ci = cacheinfo.at(n);
// Pad array in this direction to a multiple of this cache line
// size
assert (ci.linesize() % sizeof(CCTK_REAL) == 0);
int const linesize = ci.linesize() / sizeof(CCTK_REAL);
assert (is_power_of_2(linesize));
if (npoints * accumulated_npoints >= linesize) {
// The extent is at least one cache line long: round up to the
// next full cache line
npoints = align_up (npoints, linesize);
} else {
#if 0
// The extent is less than one cache line long: Ensure that
// the array size divides the cache line size evenly by
// rounding to the next power of 2
// NOTE: This is disabled, since this would align everything
// to powers of 2.
npoints = next_power_of_2(npoints);
#endif
}
// Avoid multiples of the cache stride
assert (ci.stride() % sizeof(CCTK_REAL) == 0);
int const stride = ci.stride() / sizeof(CCTK_REAL);
if (npoints * accumulated_npoints % stride == 0) {
assert (linesize < stride);
npoints += linesize;
}
} // for cacheinfo
padded_shape[d] = npoints;
accumulated_npoints *= npoints;
}
assert (prod (padded_shape) == accumulated_npoints);
// self-check
for (int d=0; d<D; ++d) {
assert (padded_shape[d] >= shape[d]);
#if VECTORISE && VECTORISE_ALIGNED_ARRAYS
if (d == 0) {
assert (padded_shape[d] % CCTK_REAL_VEC_SIZE == 0);
}
#endif
if (vector_size > 0) {
if (d == 0) {
assert (padded_shape[d] % vector_size == 0);
}
}
// TODO: add self-checks for the other requirements as well
}
if (verbose) {
ostringstream buf;
buf << "padding " << shape << " to " << padded_shape;
CCTK_INFO (buf.str().c_str());
}
return padded_shape;
}
template vect<int,3> pad_shape (bbox<int,3> const& extent);
template vect<int,3> pad_shape (vect<int,3> const& shape);
template vect<int,4> pad_shape (bbox<int,4> const& extent);
template vect<int,4> pad_shape (vect<int,4> const& shape);
|