LCOV - code coverage report
Current view: top level - src/grid/dgemm - grid_dgemm_context.c (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:cccd2f3) Lines: 72.6 % 277 201
Test Date: 2026-05-06 07:07:47 Functions: 61.9 % 21 13

            Line data    Source code
       1              : /*----------------------------------------------------------------------------*/
       2              : /*  CP2K: A general program to perform molecular dynamics simulations         */
       3              : /*  Copyright 2000-2026 CP2K developers group <https://cp2k.org>              */
       4              : /*                                                                            */
       5              : /*  SPDX-License-Identifier: BSD-3-Clause                                     */
       6              : /*----------------------------------------------------------------------------*/
       7              : 
       8              : #include <math.h>
       9              : #include <omp.h>
      10              : #include <stdio.h>
      11              : #include <stdlib.h>
      12              : #include <string.h>
      13              : 
      14              : #include "../common/grid_library.h"
      15              : #include "grid_dgemm_collocate.h"
      16              : #include "grid_dgemm_collocation_integration.h"
      17              : #include "grid_dgemm_context.h"
      18              : #include "grid_dgemm_private_header.h"
      19              : #include "grid_dgemm_task_list.h"
      20              : #include "grid_dgemm_tensor_local.h"
      21              : #include "grid_dgemm_utils.h"
      22              : 
      23            0 : void return_dh(void *const ptr, const int level, double *const dh) {
      24            0 :   grid_context *const ctx = (grid_context *)ptr;
      25              : 
      26            0 :   assert(ctx->checksum == ctx_checksum);
      27            0 :   dh[0] = ctx->grid[level].dh[0][0];
      28            0 :   dh[1] = ctx->grid[level].dh[0][1];
      29            0 :   dh[2] = ctx->grid[level].dh[0][2];
      30            0 :   dh[3] = ctx->grid[level].dh[1][0];
      31            0 :   dh[4] = ctx->grid[level].dh[1][1];
      32            0 :   dh[5] = ctx->grid[level].dh[1][2];
      33            0 :   dh[6] = ctx->grid[level].dh[2][0];
      34            0 :   dh[7] = ctx->grid[level].dh[2][1];
      35            0 :   dh[8] = ctx->grid[level].dh[2][2];
      36            0 : }
      37              : 
      38            0 : void return_dh_inv(void *const ptr, const int level, double *const dh_inv) {
      39            0 :   grid_context *const ctx = (grid_context *)ptr;
      40              : 
      41            0 :   assert(ctx->checksum == ctx_checksum);
      42            0 :   dh_inv[0] = ctx->grid[level].dh_inv[0][0];
      43            0 :   dh_inv[1] = ctx->grid[level].dh_inv[0][1];
      44            0 :   dh_inv[2] = ctx->grid[level].dh_inv[0][2];
      45            0 :   dh_inv[3] = ctx->grid[level].dh_inv[1][0];
      46            0 :   dh_inv[4] = ctx->grid[level].dh_inv[1][1];
      47            0 :   dh_inv[5] = ctx->grid[level].dh_inv[1][2];
      48            0 :   dh_inv[6] = ctx->grid[level].dh_inv[2][0];
      49            0 :   dh_inv[7] = ctx->grid[level].dh_inv[2][1];
      50            0 :   dh_inv[8] = ctx->grid[level].dh_inv[2][2];
      51            0 : }
      52              : 
      53            0 : int return_num_devs(void *const ptr) {
      54            0 :   grid_context *const ctx = (grid_context *)ptr;
      55            0 :   assert(ctx->checksum == ctx_checksum);
      56              : 
      57            0 :   return ctx->number_of_devices;
      58              : }
      59              : 
      60            0 : int return_device_id(void *const ptr, const int device) {
      61            0 :   grid_context *const ctx = (grid_context *)ptr;
      62            0 :   assert(ctx->checksum == ctx_checksum);
      63              : 
      64            0 :   return ctx->device_id[device];
      65              : }
      66              : 
      67            0 : int is_grid_orthorhombic(void *const ptr) {
      68            0 :   grid_context *const ctx = (grid_context *)ptr;
      69            0 :   assert(ctx->checksum == ctx_checksum);
      70            0 :   return ctx->orthorhombic;
      71              : }
      72              : 
      73            0 : void update_queue_length(void *const ptr, const int queue_length) {
      74            0 :   grid_context *const ctx = (grid_context *)ptr;
      75            0 :   assert(ctx->checksum == ctx_checksum);
      76            0 :   ctx->queue_length = queue_length;
      77            0 : }
      78              : 
      79           20 : void update_atoms_position(const int natoms,
      80              :                            const double atoms_positions[natoms][3],
      81              :                            grid_context *data) {
      82           20 :   assert(data != NULL);
      83              : 
      84           20 :   if (natoms == 0)
      85              :     return;
      86              : 
      87           20 :   if (data->atom_positions == NULL) {
      88            8 :     data->atom_positions = malloc(3 * natoms * sizeof(double));
      89              :   } else {
      90           12 :     if (natoms > data->natoms) {
      91            0 :       data->atom_positions =
      92            0 :           realloc(data->atom_positions, 3 * natoms * sizeof(double));
      93              :     }
      94              :   }
      95           20 :   assert(data->atom_positions != NULL);
      96              : 
      97           20 :   data->natoms = natoms;
      98              : 
      99           20 :   if (data->atom_positions) {
     100           78 :     for (int i = 0; i < natoms; i++) {
     101           58 :       data->atom_positions[3 * i] = atoms_positions[i][0];
     102           58 :       data->atom_positions[3 * i + 1] = atoms_positions[i][1];
     103           58 :       data->atom_positions[3 * i + 2] = atoms_positions[i][2];
     104              :     }
     105              :   }
     106              : }
     107              : 
     108           20 : void update_atoms_kinds(const int natoms, const int *atoms_kinds,
     109              :                         grid_context *data) {
     110           20 :   assert(data != NULL);
     111              : 
     112              :   // data->atom_kinds is a table that give the type of a given atom.
     113           20 :   if (natoms == 0)
     114              :     return;
     115              : 
     116           20 :   if (data->atom_kinds == NULL) {
     117            8 :     data->atom_kinds = malloc(natoms * sizeof(int));
     118              :   } else {
     119           12 :     if ((natoms > data->natoms) && (data->natoms > 0)) {
     120            0 :       data->atom_kinds = realloc(data->atom_kinds, natoms * sizeof(int));
     121              :     }
     122              :   }
     123           20 :   assert(data->atom_kinds != NULL);
     124              :   // data->natoms is initialized before calling this function
     125           20 :   if (data->natoms)
     126           20 :     memcpy(data->atom_kinds, atoms_kinds, sizeof(int) * natoms);
     127              : 
     128           78 :   for (int i = 0; i < natoms; i++) {
     129           58 :     data->atom_kinds[i] -= 1;
     130              :   }
     131              : }
     132              : 
     133           20 : void update_block_offsets(const int nblocks, const int *const block_offsets,
     134              :                           grid_context *data) {
     135           20 :   assert(data != NULL);
     136              : 
     137           20 :   if (nblocks == 0)
     138              :     return;
     139              : 
     140           19 :   if (data->block_offsets == NULL) {
     141            7 :     data->block_offsets = malloc(nblocks * sizeof(int));
     142              :   } else {
     143           12 :     if ((nblocks > data->nblocks_total) && (data->nblocks_total > 0)) {
     144            0 :       data->block_offsets = realloc(data->block_offsets, sizeof(int) * nblocks);
     145              :     }
     146              :   }
     147           19 :   assert(data->block_offsets != NULL);
     148              : 
     149           19 :   data->nblocks = nblocks;
     150           19 :   data->nblocks_total = imax(data->nblocks_total, nblocks);
     151           19 :   if (nblocks)
     152           19 :     memcpy(data->block_offsets, block_offsets, nblocks * sizeof(int));
     153              : }
     154              : 
     155           20 : void update_basis_set(const int nkinds, const grid_basis_set **const basis_sets,
     156              :                       grid_context *data) {
     157           20 :   if (nkinds > data->nkinds_total) {
     158            8 :     if (data->basis_sets == NULL) {
     159            8 :       data->basis_sets = malloc(nkinds * sizeof(grid_basis_set *));
     160              :     } else {
     161            0 :       data->basis_sets =
     162            0 :           realloc(data->basis_sets, nkinds * sizeof(grid_basis_set *));
     163              :     }
     164              :   }
     165           20 :   assert(data->basis_sets != NULL);
     166           20 :   data->nkinds = nkinds;
     167           20 :   data->nkinds_total = imax(data->nkinds_total, nkinds);
     168           20 :   memcpy(data->basis_sets, basis_sets, nkinds * sizeof(grid_basis_set *));
     169           20 : }
     170              : 
     171           20 : void update_task_lists(const int nlevels, const int ntasks,
     172              :                        const int *const level_list, const int *const iatom_list,
     173              :                        const int *const jatom_list, const int *const iset_list,
     174              :                        const int *const jset_list, const int *const ipgf_list,
     175              :                        const int *const jpgf_list,
     176              :                        const int *const border_mask_list,
     177              :                        const int *block_num_list,
     178              :                        const double *const radius_list,
     179              :                        const double rab_list[ntasks][3], grid_context *ctx) {
     180              : 
     181           20 :   assert(ctx->checksum == ctx_checksum);
     182              : 
     183           20 :   if (nlevels == 0)
     184              :     return;
     185              : 
     186           20 :   if (ctx->ntasks == 0) {
     187              :     // Count tasks per level.
     188            8 :     size_t size = nlevels * sizeof(int);
     189            8 :     ctx->tasks_per_level = malloc(size);
     190            8 :     ctx->tasks = malloc(nlevels * sizeof(_task *));
     191              :     /* memset(ctx->tasks, 0, nlevels * sizeof(_task *)); */
     192            8 :     if (ntasks)
     193            7 :       ctx->tasks[0] = malloc(ntasks * sizeof(_task));
     194              :     else
     195            1 :       ctx->tasks[0] = NULL;
     196              :   } else {
     197           12 :     if (ctx->nlevels_total < nlevels) {
     198              :       /* save the address of the full task list. NULL when completly empty */
     199            0 :       ctx->tasks = realloc(ctx->tasks, nlevels * sizeof(_task *));
     200            0 :       assert(ctx->tasks != NULL);
     201              :     }
     202           12 :     if (ctx->ntasks_total < ntasks) {
     203            0 :       ctx->tasks[0] = realloc(ctx->tasks[0], ntasks * sizeof(_task));
     204            0 :       assert(ctx->tasks[0] != NULL);
     205              :     }
     206              :   }
     207              : 
     208           20 :   memset(ctx->tasks_per_level, 0, nlevels * sizeof(int));
     209           20 :   ctx->nlevels = nlevels;
     210           20 :   ctx->nlevels_total = imax(ctx->nlevels_total, nlevels);
     211           20 :   ctx->ntasks_total = imax(ctx->ntasks_total, ntasks);
     212           20 :   ctx->ntasks = ntasks;
     213              : 
     214         5793 :   for (int i = 0; i < ntasks; i++) {
     215         5773 :     ctx->tasks_per_level[level_list[i] - 1]++;
     216         5773 :     assert(i == 0 || level_list[i] >= level_list[i - 1]); // expect ordered list
     217              :   }
     218              : 
     219           80 :   for (int i = 1; i < ctx->nlevels; i++) {
     220           60 :     ctx->tasks[i] = ctx->tasks[i - 1] + ctx->tasks_per_level[i - 1];
     221              :   }
     222              : 
     223           20 :   _task *const tasks = ctx->tasks[0];
     224           20 : #pragma omp parallel for schedule(static) if (ntasks > GRID_OMP_MIN_ITERATIONS)
     225              :   for (int i = 0; i < ntasks; i++) {
     226              :     _task *const task = &tasks[i];
     227              :     task->level = level_list[i] - 1;
     228              :     task->iatom = iatom_list[i] - 1;
     229              :     task->jatom = jatom_list[i] - 1;
     230              :     task->iset = iset_list[i] - 1;
     231              :     task->jset = jset_list[i] - 1;
     232              :     task->ipgf = ipgf_list[i] - 1;
     233              :     task->jpgf = jpgf_list[i] - 1;
     234              :     task->border_mask = border_mask_list[i];
     235              :     task->block_num = block_num_list[i] - 1;
     236              :     task->radius = radius_list[i];
     237              :     task->rab[0] = rab_list[i][0];
     238              :     task->rab[1] = rab_list[i][1];
     239              :     task->rab[2] = rab_list[i][2];
     240              :     const int iatom = task->iatom;
     241              :     const int jatom = task->jatom;
     242              :     const int iset = task->iset;
     243              :     const int jset = task->jset;
     244              :     const int ipgf = task->ipgf;
     245              :     const int jpgf = task->jpgf;
     246              :     const int ikind = ctx->atom_kinds[iatom];
     247              :     const int jkind = ctx->atom_kinds[jatom];
     248              :     const grid_basis_set *ibasis = ctx->basis_sets[ikind];
     249              :     const grid_basis_set *jbasis = ctx->basis_sets[jkind];
     250              :     const int ncoseta = ncoset(ibasis->lmax[iset]);
     251              :     const int ncosetb = ncoset(jbasis->lmax[jset]);
     252              : 
     253              :     task->zeta[0] = ibasis->zet[iset * ibasis->maxpgf + ipgf];
     254              :     task->zeta[1] = jbasis->zet[jset * jbasis->maxpgf + jpgf];
     255              : 
     256              :     const double *ra = &ctx->atom_positions[3 * iatom];
     257              :     const double zetp = task->zeta[0] + task->zeta[1];
     258              :     const double f = task->zeta[1] / zetp;
     259              :     const double rab2 = task->rab[0] * task->rab[0] +
     260              :                         task->rab[1] * task->rab[1] +
     261              :                         task->rab[2] * task->rab[2];
     262              : 
     263              :     task->prefactor = exp(-task->zeta[0] * f * rab2);
     264              :     task->zetp = zetp;
     265              : 
     266              :     for (int i = 0; i < 3; i++) {
     267              :       task->ra[i] = ra[i];
     268              :       task->rp[i] = ra[i] + f * task->rab[i];
     269              :       task->rb[i] = ra[i] + task->rab[i];
     270              :     }
     271              : 
     272              :     task->lmax[0] = ibasis->lmax[iset];
     273              :     task->lmax[1] = jbasis->lmax[jset];
     274              :     task->lmin[0] = ibasis->lmin[iset];
     275              :     task->lmin[1] = jbasis->lmin[jset];
     276              : 
     277              :     task->update_block_ = i == 0 || level_list[i] != level_list[i - 1] ||
     278              :                           block_num_list[i] != block_num_list[i - 1] ||
     279              :                           iset_list[i] != iset_list[i - 1] ||
     280              :                           jset_list[i] != jset_list[i - 1];
     281              : 
     282              :     task->offset[0] = ipgf * ncoseta;
     283              :     task->offset[1] = jpgf * ncosetb;
     284              :   }
     285              : 
     286              :   // Find largest Cartesian subblock size.
     287           20 :   ctx->maxco = 0;
     288           56 :   for (int i = 0; i < ctx->nkinds; i++) {
     289           36 :     ctx->maxco = imax(ctx->maxco, ctx->basis_sets[i]->maxco);
     290              :   }
     291              : }
     292              : 
     293           20 : void update_layouts(const int nlevels, const int npts_global[nlevels][3],
     294              :                     const int npts_local[nlevels][3],
     295              :                     const int shift_local[nlevels][3],
     296              :                     const int border_width[nlevels][3],
     297              :                     const double dh[nlevels][3][3],
     298              :                     const double dh_inv[nlevels][3][3], grid_context *ctx) {
     299              : 
     300           20 :   assert(ctx != NULL);
     301           20 :   assert(ctx->checksum == ctx_checksum);
     302              : 
     303           20 :   if (ctx->layouts != NULL) {
     304           12 :     free(ctx->layouts);
     305              :   }
     306              : 
     307           20 :   ctx->layouts = malloc(sizeof(_layout) * nlevels);
     308              : 
     309          100 :   for (int level = 0; level < nlevels; level++) {
     310          320 :     for (int i = 0; i < 3; i++) {
     311          240 :       ctx->layouts[level].npts_global[i] = npts_global[level][i];
     312          240 :       ctx->layouts[level].npts_local[i] = npts_local[level][i];
     313          240 :       ctx->layouts[level].shift_local[i] = shift_local[level][i];
     314          240 :       ctx->layouts[level].border_width[i] = border_width[level][i];
     315          960 :       for (int j = 0; j < 3; j++) {
     316          720 :         ctx->layouts[level].dh[i][j] = dh[level][i][j];
     317          720 :         ctx->layouts[level].dh_inv[i][j] = dh_inv[level][i][j];
     318              :       }
     319              :     }
     320              :   }
     321           20 : }
     322              : 
     323           20 : void update_grid(const int nlevels, grid_context *ctx) {
     324           20 :   assert(ctx != NULL);
     325           20 :   assert(ctx->checksum == ctx_checksum);
     326              : 
     327           20 :   if (nlevels == 0)
     328              :     return;
     329              : 
     330           20 :   if (ctx->grid == NULL) {
     331            8 :     ctx->grid = malloc(sizeof(tensor) * nlevels);
     332              :   } else {
     333           12 :     if (ctx->nlevels_total < nlevels) {
     334            0 :       ctx->grid = realloc(ctx->grid, sizeof(tensor) * nlevels);
     335              :     }
     336              :   }
     337           20 :   assert(ctx->grid != NULL);
     338              : 
     339           20 :   ctx->nlevels_total = imax(ctx->nlevels_total, nlevels);
     340           20 :   ctx->nlevels = nlevels;
     341              : }
     342              : 
     343            8 : void *create_grid_context_dgemm(
     344              :     const bool orthorhombic, const int ntasks, const int nlevels,
     345              :     const int natoms, const int nkinds, const int nblocks,
     346              :     const int *block_offsets, const double atom_positions[natoms][3],
     347              :     const int *const atom_kinds, const grid_basis_set **const basis_sets,
     348              :     const int *const level_list, const int *const iatom_list,
     349              :     const int *jatom_list, const int *const iset_list,
     350              :     const int *const jset_list, const int *const ipgf_list,
     351              :     const int *const jpgf_list, const int *const border_mask_list,
     352              :     const int *block_num_list, const double *const radius_list,
     353              :     const double rab_list[ntasks][3], const int npts_global[nlevels][3],
     354              :     const int npts_local[nlevels][3], const int shift_local[nlevels][3],
     355              :     const int border_width[nlevels][3], const double dh[nlevels][3][3],
     356              :     const double dh_inv[nlevels][3][3]) {
     357              : 
     358            8 :   grid_context *ctx = malloc(sizeof(grid_context));
     359              : 
     360            8 :   memset(ctx, 0, sizeof(grid_context));
     361              : 
     362            8 :   ctx->checksum = ctx_checksum;
     363            8 :   ctx->orthorhombic = orthorhombic;
     364            8 :   update_block_offsets(nblocks, block_offsets, ctx);
     365            8 :   update_atoms_position(natoms, atom_positions, ctx);
     366            8 :   update_atoms_kinds(natoms, atom_kinds, ctx);
     367            8 :   update_basis_set(nkinds, basis_sets, ctx);
     368            8 :   update_task_lists(nlevels, ntasks, level_list, iatom_list, jatom_list,
     369              :                     iset_list, jset_list, ipgf_list, jpgf_list,
     370              :                     border_mask_list, block_num_list, radius_list, rab_list,
     371              :                     ctx);
     372            8 :   update_layouts(nlevels, npts_global, npts_local, shift_local, border_width,
     373              :                  dh, dh_inv, ctx);
     374            8 :   update_grid(nlevels, ctx);
     375              : 
     376            8 :   const int max_threads = omp_get_max_threads();
     377              : 
     378            8 :   ctx->handler =
     379            8 :       malloc(sizeof(struct collocation_integration_ *) * max_threads);
     380              : 
     381           16 :   for (int i = 0; i < max_threads; i++) {
     382            8 :     ctx->handler[i] = collocate_create_handle();
     383              :   }
     384              : 
     385            8 :   ctx->number_of_handler = max_threads;
     386              : 
     387            8 :   return ctx;
     388              : }
     389              : 
     390           12 : void update_grid_context_dgemm(
     391              :     const bool orthorhombic, const int ntasks, const int nlevels,
     392              :     const int natoms, const int nkinds, const int nblocks,
     393              :     const int *block_offsets, const double atom_positions[natoms][3],
     394              :     const int *const atom_kinds, const grid_basis_set **const basis_sets,
     395              :     const int *const level_list, const int *const iatom_list,
     396              :     const int *jatom_list, const int *const iset_list,
     397              :     const int *const jset_list, const int *const ipgf_list,
     398              :     const int *const jpgf_list, const int *const border_mask_list,
     399              :     const int *block_num_list, const double *const radius_list,
     400              :     const double rab_list[ntasks][3], const int npts_global[nlevels][3],
     401              :     const int npts_local[nlevels][3], const int shift_local[nlevels][3],
     402              :     const int border_width[nlevels][3], const double dh[nlevels][3][3],
     403              :     const double dh_inv[nlevels][3][3], void *ptr) {
     404              : 
     405           12 :   assert(ptr != NULL);
     406           12 :   grid_context *ctx = (grid_context *)ptr;
     407           12 :   assert(ctx->checksum == ctx_checksum);
     408              : 
     409           12 :   ctx->orthorhombic = orthorhombic;
     410           12 :   update_block_offsets(nblocks, block_offsets, ctx);
     411           12 :   update_atoms_position(natoms, atom_positions, ctx);
     412           12 :   update_atoms_kinds(natoms, atom_kinds, ctx);
     413           12 :   update_basis_set(nkinds, basis_sets, ctx);
     414           12 :   update_task_lists(nlevels, ntasks, level_list, iatom_list, jatom_list,
     415              :                     iset_list, jset_list, ipgf_list, jpgf_list,
     416              :                     border_mask_list, block_num_list, radius_list, rab_list,
     417              :                     ctx);
     418           12 :   update_layouts(nlevels, npts_global, npts_local, shift_local, border_width,
     419              :                  dh, dh_inv, ctx);
     420           12 :   update_grid(nlevels, ctx);
     421              : 
     422              :   // Find largest Cartesian subblock size.
     423           12 :   ctx->maxco = 0;
     424           36 :   for (int i = 0; i < nkinds; i++) {
     425           24 :     ctx->maxco = imax(ctx->maxco, ctx->basis_sets[i]->maxco);
     426              :   }
     427           12 : }
     428              : 
     429            0 : void initialize_grid_context_on_gpu(void *ptr, const int number_of_devices,
     430              :                                     const int *device_id) {
     431            0 :   assert(ptr != NULL);
     432            0 :   grid_context *ctx = (grid_context *)ptr;
     433            0 :   assert(ctx->checksum == ctx_checksum);
     434            0 :   ctx->work_on_gpu = false;
     435            0 :   if (number_of_devices <= 0) {
     436              :     return;
     437              :   }
     438              : 
     439            0 :   ctx->number_of_devices = number_of_devices;
     440            0 :   ctx->queue_length = 8192;
     441            0 :   if (ctx->device_id == NULL) {
     442            0 :     ctx->device_id = malloc(sizeof(int) * number_of_devices);
     443              :   } else {
     444            0 :     ctx->device_id = realloc(ctx->device_id, sizeof(int) * number_of_devices);
     445              :   }
     446            0 :   assert(ctx->device_id != NULL);
     447              : 
     448            0 :   memcpy(ctx->device_id, device_id, sizeof(int) * number_of_devices);
     449              : }
     450              : 
     451            8 : void destroy_grid_context_dgemm(void *ptr) {
     452            8 :   assert(ptr);
     453            8 :   grid_context *ctx = (grid_context *)ptr;
     454            8 :   assert(ctx->checksum == ctx_checksum);
     455            8 :   free(ctx->block_offsets);
     456            8 :   free(ctx->atom_positions);
     457            8 :   free(ctx->atom_kinds);
     458            8 :   free(ctx->basis_sets);
     459            8 :   free(ctx->tasks[0]);
     460            8 :   free(ctx->tasks);
     461            8 :   free(ctx->tasks_per_level);
     462            8 :   free(ctx->layouts);
     463            8 :   free(ctx->grid);
     464            8 :   if (ctx->device_id)
     465            0 :     free(ctx->device_id);
     466              : 
     467            8 :   if (ctx->handler) {
     468           16 :     for (int i = 0; i < ctx->number_of_handler; i++) {
     469            8 :       collocate_destroy_handle(ctx->handler[i]);
     470              :     }
     471            8 :     free(ctx->handler);
     472              :   }
     473              : 
     474            8 :   free(ctx);
     475            8 : }
     476              : 
     477            0 : void apply_cutoff(void *ptr) {
     478            0 :   assert(ptr);
     479            0 :   grid_context *ctx = (grid_context *)ptr;
     480            0 :   assert(ctx->checksum == ctx_checksum);
     481            0 :   ctx->apply_cutoff = true;
     482            0 : }
     483              : 
     484         1280 : void set_grid_parameters(
     485              :     tensor *grid, const bool orthorhombic,
     486              :     const int grid_full_size[3],  /* size of the full grid */
     487              :     const int grid_local_size[3], /* size of the local grid block */
     488              :     const int shift_local[3],     /* coordinates of the lower coordinates of the
     489              :                                      local grid window */
     490              :     const int border_width[3],    /* width of the borders */
     491              :     const double
     492              :         dh[3][3], /* displacement vectors of the grid (cartesian) -> (ijk) */
     493              :     const double dh_inv[3][3], /* (ijk) -> (x,y,z) */
     494              :     offload_buffer *grid_) {
     495         1280 :   memset(grid, 0, sizeof(tensor));
     496         1280 :   initialize_tensor_3(grid, grid_local_size[2], grid_local_size[1],
     497              :                       grid_local_size[0]);
     498              : 
     499         1280 :   grid->data = grid_->host_buffer;
     500         1280 :   grid->ld_ = grid_local_size[0];
     501              : 
     502         1280 :   setup_global_grid_size(grid, &grid_full_size[0]);
     503              : 
     504              :   /* the grid is divided over several ranks or not periodic */
     505         1280 :   if ((grid_local_size[0] != grid_full_size[0]) ||
     506         1280 :       (grid_local_size[1] != grid_full_size[1]) ||
     507         1280 :       (grid_local_size[2] != grid_full_size[2])) {
     508            0 :     setup_grid_window(grid, shift_local, border_width, 0);
     509              :   } else {
     510         1280 :     grid->window_shift[0] = 0;
     511         1280 :     grid->window_shift[1] = 0;
     512         1280 :     grid->window_shift[2] = 0;
     513              : 
     514         1280 :     grid->window_size[0] = grid->size[0];
     515         1280 :     grid->window_size[1] = grid->size[1];
     516         1280 :     grid->window_size[2] = grid->size[2];
     517              :   }
     518              : 
     519         1280 :   grid->dh[0][0] = dh[0][0];
     520         1280 :   grid->dh[0][1] = dh[0][1];
     521         1280 :   grid->dh[0][2] = dh[0][2];
     522         1280 :   grid->dh[1][0] = dh[1][0];
     523         1280 :   grid->dh[1][1] = dh[1][1];
     524         1280 :   grid->dh[1][2] = dh[1][2];
     525         1280 :   grid->dh[2][0] = dh[2][0];
     526         1280 :   grid->dh[2][1] = dh[2][1];
     527         1280 :   grid->dh[2][2] = dh[2][2];
     528              : 
     529         1280 :   grid->dh_inv[0][0] = dh_inv[0][0];
     530         1280 :   grid->dh_inv[0][1] = dh_inv[0][1];
     531         1280 :   grid->dh_inv[0][2] = dh_inv[0][2];
     532         1280 :   grid->dh_inv[1][0] = dh_inv[1][0];
     533         1280 :   grid->dh_inv[1][1] = dh_inv[1][1];
     534         1280 :   grid->dh_inv[1][2] = dh_inv[1][2];
     535         1280 :   grid->dh_inv[2][0] = dh_inv[2][0];
     536         1280 :   grid->dh_inv[2][1] = dh_inv[2][1];
     537         1280 :   grid->dh_inv[2][2] = dh_inv[2][2];
     538              : 
     539         1280 :   verify_orthogonality(dh, grid->orthogonal);
     540              : 
     541         1280 :   if (orthorhombic) {
     542          672 :     grid->orthogonal[0] = true;
     543          672 :     grid->orthogonal[1] = true;
     544          672 :     grid->orthogonal[2] = true;
     545              :   }
     546         1280 : }
     547              : 
     548              : /*******************************************************************************
     549              :  * \brief Allocates a task list for the dgemm backend.
     550              :  *        See grid_task_list.h for details.
     551              :  ******************************************************************************/
     552           20 : void grid_dgemm_create_task_list(
     553              :     const bool orthorhombic, const int ntasks, const int nlevels,
     554              :     const int natoms, const int nkinds, const int nblocks,
     555              :     const int block_offsets[nblocks], const double atom_positions[natoms][3],
     556              :     const int atom_kinds[natoms], const grid_basis_set *basis_sets[nkinds],
     557              :     const int level_list[ntasks], const int iatom_list[ntasks],
     558              :     const int jatom_list[ntasks], const int iset_list[ntasks],
     559              :     const int jset_list[ntasks], const int ipgf_list[ntasks],
     560              :     const int jpgf_list[ntasks], const int border_mask_list[ntasks],
     561              :     const int block_num_list[ntasks], const double radius_list[ntasks],
     562              :     const double rab_list[ntasks][3], const int npts_global[nlevels][3],
     563              :     const int npts_local[nlevels][3], const int shift_local[nlevels][3],
     564              :     const int border_width[nlevels][3], const double dh[nlevels][3][3],
     565              :     const double dh_inv[nlevels][3][3], grid_dgemm_task_list **task_list) {
     566              : 
     567           20 :   if (*task_list == NULL) {
     568            8 :     *task_list = create_grid_context_dgemm(
     569              :         orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
     570              :         atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
     571              :         jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
     572              :         border_mask_list, block_num_list, radius_list, rab_list, npts_global,
     573              :         npts_local, shift_local, border_width, dh, dh_inv);
     574              :   } else {
     575           12 :     update_grid_context_dgemm(
     576              :         orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
     577              :         atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
     578              :         jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
     579              :         border_mask_list, block_num_list, radius_list, rab_list, npts_global,
     580              :         npts_local, shift_local, border_width, dh, dh_inv, *task_list);
     581              :   }
     582              : 
     583           20 :   const grid_library_config config = grid_library_get_config();
     584           20 :   if (config.apply_cutoff) {
     585            0 :     apply_cutoff(*task_list);
     586              :   }
     587           20 : }
     588              : 
     589              : /*******************************************************************************
     590              :  * \brief Deallocates given task list, basis_sets have to be freed separately.
     591              :  ******************************************************************************/
     592            8 : void grid_dgemm_free_task_list(grid_dgemm_task_list *task_list) {
     593            8 :   destroy_grid_context_dgemm(task_list);
     594            8 : }
        

Generated by: LCOV version 2.0-1