Line data Source code
1 : /*----------------------------------------------------------------------------*/
2 : /* CP2K: A general program to perform molecular dynamics simulations */
3 : /* Copyright 2000-2026 CP2K developers group <https://cp2k.org> */
4 : /* */
5 : /* SPDX-License-Identifier: BSD-3-Clause */
6 : /*----------------------------------------------------------------------------*/
7 : #include "dbm_shard.h"
8 : #include "../offload/offload_mempool.h"
9 : #include "dbm_hyperparams.h"
10 :
11 : #include <assert.h>
12 : #include <omp.h>
13 : #include <stdbool.h>
14 : #include <stddef.h>
15 : #include <stdlib.h>
16 : #include <string.h>
17 :
18 : /*******************************************************************************
19 : * \brief Internal routine for finding a power of two greater than given number.
20 : * \author Ole Schuett
21 : ******************************************************************************/
22 8067040 : static int next_power2(const int start) {
23 8067040 : int candidate = 2;
24 29487808 : while (candidate < start) {
25 21420768 : candidate *= 2;
26 : }
27 8067040 : return candidate;
28 : }
29 :
30 : /*******************************************************************************
31 : * \brief Internal routine for finding a prime greater equal than given number.
32 : * \author Ole Schuett
33 : ******************************************************************************/
34 8067040 : static int next_prime(const int start) {
35 8067040 : int candidate = start, divisor = 0;
36 31921326 : while (divisor < candidate) {
37 509139267 : for (divisor = 2; divisor < candidate; divisor++) {
38 501072227 : if (candidate % divisor == 0) {
39 15787246 : candidate++;
40 15787246 : break;
41 : }
42 : }
43 : }
44 8067040 : return candidate;
45 : }
46 :
47 : /*******************************************************************************
48 : * \brief Internal routine for initializing a shard's hashtable.
49 : * \author Ole Schuett
50 : ******************************************************************************/
51 8067040 : static void hashtable_init(dbm_shard_t *shard) {
52 : // Choosing size as power of two allows to replace modulo with bitwise AND.
53 16134080 : shard->hashtable_size =
54 8067040 : next_power2(DBM_HASHTABLE_FACTOR * shard->nblocks_allocated);
55 8067040 : shard->hashtable_prime = next_prime(shard->hashtable_size);
56 8067040 : shard->hashtable = calloc(shard->hashtable_size, sizeof(int));
57 8067040 : assert(shard->hashtable != NULL);
58 8067040 : }
59 :
60 : /*******************************************************************************
61 : * \brief Internal routine for initializing a shard.
62 : * \author Ole Schuett
63 : ******************************************************************************/
64 1719592 : void dbm_shard_init(dbm_shard_t *shard) {
65 1719592 : shard->nblocks = 0;
66 1719592 : shard->nblocks_allocated = 0;
67 1719592 : shard->blocks = NULL;
68 1719592 : hashtable_init(shard);
69 1719592 : shard->data_size = 0;
70 1719592 : shard->data_promised = 0;
71 1719592 : shard->data_allocated = 0;
72 1719592 : shard->data = NULL;
73 1719592 : omp_init_lock(&shard->lock);
74 1719592 : }
75 :
76 : /*******************************************************************************
77 : * \brief Internal routine for copying content of shard_b into shard_a.
78 : * \author Ole Schuett
79 : ******************************************************************************/
80 431090 : void dbm_shard_copy(dbm_shard_t *shard_a, const dbm_shard_t *shard_b) {
81 431090 : assert(shard_a != NULL && shard_b != NULL);
82 :
83 431090 : if (shard_a->nblocks_allocated < shard_b->nblocks) {
84 403323 : free(shard_a->blocks);
85 403323 : shard_a->blocks = malloc(shard_b->nblocks * sizeof(dbm_block_t));
86 403323 : shard_a->nblocks_allocated = shard_b->nblocks;
87 403323 : assert(shard_a->blocks != NULL);
88 : }
89 431090 : shard_a->nblocks = shard_b->nblocks;
90 :
91 431090 : if (shard_a->hashtable_size < shard_b->hashtable_size) {
92 406142 : free(shard_a->hashtable);
93 406142 : shard_a->hashtable = malloc(shard_b->hashtable_size * sizeof(int));
94 406142 : assert(shard_a->hashtable != NULL);
95 : }
96 431090 : shard_a->hashtable_size = shard_b->hashtable_size;
97 431090 : shard_a->hashtable_prime = shard_b->hashtable_prime;
98 :
99 431090 : if (shard_a->data_allocated < shard_b->data_size) {
100 403323 : offload_mempool_host_free(shard_a->data);
101 806646 : shard_a->data =
102 403323 : offload_mempool_host_malloc(shard_b->data_size * sizeof(double));
103 403323 : shard_a->data_allocated = shard_b->data_size;
104 403323 : assert(shard_a->data != NULL);
105 : }
106 431090 : shard_a->data_size = shard_b->data_size;
107 :
108 431090 : if (shard_b->nblocks != 0) {
109 403353 : assert(shard_a->blocks != NULL && shard_b->blocks != NULL);
110 403353 : memcpy(shard_a->blocks, shard_b->blocks,
111 403353 : shard_b->nblocks * sizeof(dbm_block_t));
112 : }
113 431090 : if (shard_b->hashtable_size != 0) {
114 431090 : assert(shard_a->hashtable != NULL && shard_b->hashtable != NULL);
115 431090 : memcpy(shard_a->hashtable, shard_b->hashtable,
116 431090 : shard_b->hashtable_size * sizeof(int));
117 : }
118 431090 : if (shard_b->data_size != 0) {
119 403353 : assert(shard_a->data != NULL && shard_b->data != NULL);
120 403353 : memcpy(shard_a->data, shard_b->data, shard_b->data_size * sizeof(double));
121 : }
122 431090 : }
123 :
124 : /*******************************************************************************
125 : * \brief Internal routine for releasing a shard.
126 : * \author Ole Schuett
127 : ******************************************************************************/
128 1719592 : void dbm_shard_release(dbm_shard_t *shard) {
129 1719592 : free(shard->blocks);
130 1719592 : free(shard->hashtable);
131 1719592 : offload_mempool_host_free(shard->data);
132 1719592 : omp_destroy_lock(&shard->lock);
133 1719592 : }
134 :
135 : /*******************************************************************************
136 : * \brief Private hash function based on Cantor pairing function.
137 : * https://en.wikipedia.org/wiki/Pairing_function#Cantor_pairing_function
138 : * Szudzik's elegant pairing proved to be too asymmetric wrt. row / col.
139 : * Using unsigned int to return a positive number even after overflow.
140 : * \author Ole Schuett
141 : ******************************************************************************/
142 251828082 : static inline unsigned int hash(const unsigned int row,
143 : const unsigned int col) {
144 251828082 : return (row + col) * (row + col + 1) / 2 + row; // Division by 2 is cheap.
145 : }
146 :
147 : /*******************************************************************************
148 : * \brief Internal routine for masking a slot in the hash-table.
149 : * \author Hans Pabst
150 : ******************************************************************************/
151 251828082 : static inline int hashtable_mask(const dbm_shard_t *shard) {
152 251828082 : return shard->hashtable_size - 1;
153 : }
154 :
155 : /*******************************************************************************
156 : * \brief Private routine for inserting a block into a shard's hashtable.
157 : * \author Ole Schuett
158 : ******************************************************************************/
159 134873367 : static void hashtable_insert(dbm_shard_t *shard, const int block_idx) {
160 134873367 : assert(0 <= block_idx && block_idx < shard->nblocks);
161 134873367 : const dbm_block_t *blk = &shard->blocks[block_idx];
162 134873367 : const unsigned int h = hash(blk->row, blk->col);
163 134873367 : int slot = (shard->hashtable_prime * h) & hashtable_mask(shard);
164 134963663 : for (;; slot = (slot + 1) & hashtable_mask(shard)) { // linear probing
165 143860618 : for (int i = slot; i < shard->hashtable_size; ++i) {
166 143815470 : if (shard->hashtable[i] == 0) { // 0 means empty
167 134873367 : shard->hashtable[i] = block_idx + 1; // 1-based
168 134873367 : return;
169 : }
170 : }
171 : }
172 : }
173 :
174 : /*******************************************************************************
175 : * \brief Internal routine for looking up a block from a shard.
176 : * \author Ole Schuett
177 : ******************************************************************************/
178 116954715 : dbm_block_t *dbm_shard_lookup(const dbm_shard_t *shard, const int row,
179 : const int col) {
180 116954715 : int slot = (shard->hashtable_prime * hash(row, col)) & hashtable_mask(shard);
181 96257 : for (;; slot = (slot + 1) & hashtable_mask(shard)) { // linear probing
182 125359612 : for (int i = slot; i < shard->hashtable_size; ++i) {
183 125263355 : const int block_idx = shard->hashtable[i];
184 125263355 : if (block_idx == 0) { // 1-based, 0 means empty
185 : return NULL; // block not found
186 : }
187 81792589 : assert(0 < block_idx && block_idx <= shard->nblocks);
188 81792589 : dbm_block_t *blk = &shard->blocks[block_idx - 1];
189 81792589 : if (blk->row == row && blk->col == col) {
190 : return blk;
191 : }
192 : }
193 : }
194 : }
195 :
196 : /*******************************************************************************
197 : * \brief Internal routine for allocating the metadata of a new block.
198 : * \author Ole Schuett
199 : ******************************************************************************/
200 53191007 : dbm_block_t *dbm_shard_promise_new_block(dbm_shard_t *shard, const int row,
201 : const int col, const int block_size) {
202 : // Grow blocks array if necessary.
203 53191007 : if (shard->nblocks_allocated < shard->nblocks + 1) {
204 6347448 : shard->nblocks_allocated = DBM_ALLOCATION_FACTOR * (shard->nblocks + 1);
205 6347448 : assert((shard->nblocks + 1) <= shard->nblocks_allocated);
206 6347448 : shard->blocks =
207 6347448 : realloc(shard->blocks, shard->nblocks_allocated * sizeof(dbm_block_t));
208 6347448 : assert(shard->blocks != NULL);
209 :
210 : // rebuild hashtable
211 6347448 : free(shard->hashtable);
212 6347448 : hashtable_init(shard);
213 88029808 : for (int i = 0; i < shard->nblocks; i++) {
214 81682360 : hashtable_insert(shard, i);
215 : }
216 : }
217 :
218 53191007 : const int new_block_idx = shard->nblocks;
219 53191007 : shard->nblocks++;
220 53191007 : dbm_block_t *new_block = &shard->blocks[new_block_idx];
221 53191007 : new_block->row = row;
222 53191007 : new_block->col = col;
223 53191007 : new_block->offset = shard->data_promised;
224 53191007 : shard->data_promised += block_size;
225 : // The data_size will be increased after the memory is allocated and zeroed.
226 53191007 : hashtable_insert(shard, new_block_idx);
227 53191007 : return new_block;
228 : }
229 :
230 : /*******************************************************************************
231 : * \brief Internal routine for allocating and zeroing any promised block's data.
232 : * \author Ole Schuett
233 : ******************************************************************************/
234 11570537 : void dbm_shard_allocate_promised_blocks(dbm_shard_t *shard) {
235 :
236 : // Reallocate data array if necessary.
237 11570537 : if (shard->data_promised > shard->data_allocated) {
238 1622783 : const double *data = shard->data;
239 1622783 : shard->data_allocated = DBM_ALLOCATION_FACTOR * shard->data_promised;
240 1622783 : assert(shard->data_promised <= shard->data_allocated);
241 3245566 : shard->data =
242 1622783 : offload_mempool_host_malloc(shard->data_allocated * sizeof(double));
243 1622783 : assert(shard->data != NULL);
244 1622783 : if (data != NULL) {
245 520326 : memcpy(shard->data, data, shard->data_size * sizeof(double));
246 520326 : offload_mempool_host_free(data);
247 : }
248 : }
249 :
250 : // Zero new blocks.
251 : // The following memset is usually the first touch of the memory, which leads
252 : // to frequent page faults. The executing thread determines the NUMA location
253 11570537 : if (shard->data_promised > shard->data_size) {
254 11242965 : const int tail = shard->data_promised - shard->data_size;
255 11242965 : memset(shard->data + shard->data_size, 0, tail * sizeof(double));
256 11242965 : shard->data_size = shard->data_promised;
257 : }
258 11570537 : }
259 :
260 : /*******************************************************************************
261 : * \brief Internal routine for getting block or promising a new one.
262 : * \author Ole Schuett
263 : ******************************************************************************/
264 28942994 : dbm_block_t *dbm_shard_get_or_promise_block(dbm_shard_t *shard, const int row,
265 : const int col,
266 : const int block_size) {
267 28942994 : dbm_block_t *existing_blk = dbm_shard_lookup(shard, row, col);
268 28942994 : if (existing_blk != NULL) {
269 : return existing_blk;
270 : } else {
271 26714108 : return dbm_shard_promise_new_block(shard, row, col, block_size);
272 : }
273 : }
274 :
275 : /*******************************************************************************
276 : * \brief Internal routine for getting block or allocating a new one.
277 : * \author Ole Schuett
278 : ******************************************************************************/
279 38554349 : dbm_block_t *dbm_shard_get_or_allocate_block(dbm_shard_t *shard, const int row,
280 : const int col,
281 : const int block_size) {
282 38554349 : dbm_block_t *existing_blk = dbm_shard_lookup(shard, row, col);
283 38554349 : if (existing_blk != NULL) {
284 : return existing_blk;
285 : }
286 :
287 : // Create a new block.
288 9926519 : dbm_block_t *new_blk =
289 9926519 : dbm_shard_promise_new_block(shard, row, col, block_size);
290 9926519 : dbm_shard_allocate_promised_blocks(shard);
291 :
292 9926519 : return new_blk;
293 : }
294 :
295 : // EOF
|