LCOV - code coverage report
Current view: top level - src/grid - grid_replay.c (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:cccd2f3) Lines: 88.4 % 301 266
Test Date: 2026-05-06 07:07:47 Functions: 100.0 % 9 9

            Line data    Source code
       1              : /*----------------------------------------------------------------------------*/
       2              : /*  CP2K: A general program to perform molecular dynamics simulations         */
       3              : /*  Copyright 2000-2026 CP2K developers group <https://cp2k.org>              */
       4              : /*                                                                            */
       5              : /*  SPDX-License-Identifier: BSD-3-Clause                                     */
       6              : /*----------------------------------------------------------------------------*/
       7              : 
       8              : #include <assert.h>
       9              : #include <fenv.h>
      10              : #include <limits.h>
      11              : #include <math.h>
      12              : #include <omp.h>
      13              : #include <stdarg.h>
      14              : #include <stdbool.h>
      15              : #include <stdio.h>
      16              : #include <stdlib.h>
      17              : #include <string.h>
      18              : 
      19              : #include "../offload/offload_buffer.h"
      20              : #include "common/grid_common.h"
      21              : #include "grid_replay.h"
      22              : 
      23              : #include "cpu/grid_cpu_collocate.h"
      24              : #include "cpu/grid_cpu_integrate.h"
      25              : #include "grid_task_list.h"
      26              : 
      27              : /*******************************************************************************
      28              :  * \brief Reads next line from given filehandle and handles errors.
      29              :  * \author Ole Schuett
      30              :  ******************************************************************************/
      31       655456 : static void read_next_line(char line[], int length, FILE *fp) {
      32      1310912 :   if (fgets(line, length, fp) == NULL) {
      33            0 :     fprintf(stderr, "Error: Could not read line.\n");
      34            0 :     abort();
      35              :   }
      36       655456 : }
      37              : 
      38              : /*******************************************************************************
      39              :  * \brief Shorthand for parsing a single integer value.
      40              :  * \author Ole Schuett
      41              :  ******************************************************************************/
      42         1248 : static int parse_int(const char key[], FILE *fp) {
      43         1248 :   int value;
      44         1248 :   char line[100], format[100];
      45         1248 :   read_next_line(line, sizeof(line), fp);
      46         1248 :   snprintf(format, sizeof(format), "%s %%i", key);
      47         1248 :   if (sscanf(line, format, &value) != 1) {
      48            0 :     assert(!"parse_int failed");
      49              :   }
      50         1248 :   return value;
      51              : }
      52              : 
      53              : /*******************************************************************************
      54              :  * \brief Shorthand for parsing a vector of three integer values.
      55              :  * \author Ole Schuett
      56              :  ******************************************************************************/
      57          416 : static void parse_int3(const char key[], FILE *fp, int vec[3]) {
      58          416 :   char line[100], format[100];
      59          416 :   read_next_line(line, sizeof(line), fp);
      60          416 :   snprintf(format, sizeof(format), "%s %%i %%i %%i", key);
      61          416 :   if (sscanf(line, format, &vec[0], &vec[1], &vec[2]) != 3) {
      62            0 :     assert(!"parse_int3 failed");
      63              :   }
      64          416 : }
      65              : 
      66              : /*******************************************************************************
      67              :  * \brief Shorthand for parsing a single double value.
      68              :  * \author Ole Schuett
      69              :  ******************************************************************************/
      70          416 : static double parse_double(const char key[], FILE *fp) {
      71          416 :   double value;
      72          416 :   char line[100], format[100];
      73          416 :   read_next_line(line, sizeof(line), fp);
      74          416 :   snprintf(format, sizeof(format), "%s %%lf", key);
      75          416 :   if (sscanf(line, format, &value) != 1) {
      76            0 :     assert(!"parse_double failed");
      77              :   }
      78          416 :   return value;
      79              : }
      80              : 
      81              : /*******************************************************************************
      82              :  * \brief Shorthand for parsing a vector of three double values.
      83              :  * \author Ole Schuett
      84              :  ******************************************************************************/
      85          416 : static void parse_double3(const char key[], FILE *fp, double vec[3]) {
      86          416 :   char line[100], format[100];
      87          416 :   read_next_line(line, sizeof(line), fp);
      88          416 :   snprintf(format, sizeof(format), "%s %%lf %%lf %%lf", key);
      89          416 :   if (sscanf(line, format, &vec[0], &vec[1], &vec[2]) != 3) {
      90            0 :     assert(!"parse_double3 failed");
      91              :   }
      92          416 : }
      93              : 
      94              : /*******************************************************************************
      95              :  * \brief Shorthand for parsing a 3x3 matrix of doubles.
      96              :  * \author Ole Schuett
      97              :  ******************************************************************************/
      98          312 : static void parse_double3x3(const char key[], FILE *fp, double mat[3][3]) {
      99          312 :   char line[100], format[100];
     100         1248 :   for (int i = 0; i < 3; i++) {
     101          936 :     read_next_line(line, sizeof(line), fp);
     102          936 :     snprintf(format, sizeof(format), "%s %i %%lf %%lf %%lf", key, i);
     103          936 :     if (sscanf(line, format, &mat[i][0], &mat[i][1], &mat[i][2]) != 3) {
     104            0 :       assert(!"parse_double3x3 failed");
     105              :     }
     106              :   }
     107          312 : }
     108              : 
     109              : /*******************************************************************************
     110              :  * \brief Creates mock basis set using the identity as decontraction matrix.
     111              :  * \author Ole Schuett
     112              :  ******************************************************************************/
     113          104 : static void create_dummy_basis_set(const int size, const int lmin,
     114              :                                    const int lmax, const double zet,
     115          104 :                                    grid_basis_set **basis_set) {
     116              : 
     117          104 :   double sphi_mutable[size][size];
     118         1788 :   for (int i = 0; i < size; i++) {
     119        47808 :     for (int j = 0; j < size; j++) {
     120        90564 :       sphi_mutable[i][j] = (i == j) ? 1.0 : 0.0; // identity matrix
     121              :     }
     122              :   }
     123          104 :   const double(*sphi)[size] = (const double(*)[size])sphi_mutable;
     124              : 
     125          104 :   const int npgf = size / ncoset(lmax);
     126          104 :   assert(size == npgf * ncoset(lmax));
     127              : 
     128          104 :   const int first_sgf[1] = {1};
     129              : 
     130          104 :   double zet_array_mutable[1][npgf];
     131          808 :   for (int i = 0; i < npgf; i++) {
     132          704 :     zet_array_mutable[0][i] = zet;
     133              :   }
     134          104 :   const double(*zet_array)[npgf] = (const double(*)[npgf])zet_array_mutable;
     135              : 
     136          104 :   grid_create_basis_set(/*nset=*/1,
     137              :                         /*nsgf=*/size,
     138              :                         /*maxco=*/size,
     139              :                         /*maxpgf=*/npgf,
     140              :                         /*lmin=*/&lmin,
     141              :                         /*lmax=*/&lmax,
     142              :                         /*npgf=*/&npgf,
     143              :                         /*nsgf_set=*/&size,
     144              :                         /*first_sgf=*/first_sgf,
     145              :                         /*sphi=*/sphi,
     146              :                         /*zet=*/zet_array, basis_set);
     147          104 : }
     148              : 
     149              : /*******************************************************************************
     150              :  * \brief Creates mock task list with one task per cycle.
     151              :  * \author Ole Schuett
     152              :  ******************************************************************************/
     153           52 : static void create_dummy_task_list(
     154              :     const bool orthorhombic, const int border_mask, const double ra[3],
     155              :     const double rab[3], const double radius, const grid_basis_set *basis_set_a,
     156              :     const grid_basis_set *basis_set_b, const int o1, const int o2,
     157              :     const int la_max, const int lb_max, const int cycles,
     158              :     const int cycles_per_block, const int npts_global[][3],
     159              :     const int npts_local[][3], const int shift_local[][3],
     160              :     const int border_width[][3], const double dh[][3][3],
     161           52 :     const double dh_inv[][3][3], grid_task_list **task_list) {
     162              : 
     163           52 :   const int ntasks = cycles;
     164           52 :   const int nlevels = 1;
     165           52 :   const int natoms = 2;
     166           52 :   const int nkinds = 2;
     167           52 :   int nblocks = cycles / cycles_per_block + 1;
     168              : 
     169              :   /* we can not have more blocks than the number of tasks */
     170           52 :   if (cycles == 1) {
     171           52 :     nblocks = 1;
     172              :   }
     173              : 
     174           52 :   int block_offsets[nblocks];
     175           52 :   memset(block_offsets, 0, nblocks * sizeof(int)); // all point to same data
     176           52 :   const double atom_positions[2][3] = {
     177           52 :       {ra[0], ra[1], ra[2]}, {rab[0] + ra[0], rab[1] + ra[1], rab[2] + ra[2]}};
     178           52 :   const int atom_kinds[2] = {1, 2};
     179           52 :   const grid_basis_set *basis_sets[2] = {basis_set_a, basis_set_b};
     180           52 :   const int ipgf = o1 / ncoset(la_max) + 1;
     181           52 :   const int jpgf = o2 / ncoset(lb_max) + 1;
     182           52 :   assert(o1 == (ipgf - 1) * ncoset(la_max));
     183           52 :   assert(o2 == (jpgf - 1) * ncoset(lb_max));
     184              : 
     185           52 :   int level_list[ntasks], iatom_list[ntasks], jatom_list[ntasks];
     186           52 :   int iset_list[ntasks], jset_list[ntasks], ipgf_list[ntasks],
     187           52 :       jpgf_list[ntasks];
     188           52 :   int border_mask_list[ntasks], block_num_list[ntasks];
     189           52 :   double radius_list[ntasks], rab_list_mutable[ntasks][3];
     190          104 :   for (int i = 0; i < cycles; i++) {
     191           52 :     level_list[i] = 1;
     192           52 :     iatom_list[i] = 1;
     193           52 :     jatom_list[i] = 2;
     194           52 :     iset_list[i] = 1;
     195           52 :     jset_list[i] = 1;
     196           52 :     ipgf_list[i] = ipgf;
     197           52 :     jpgf_list[i] = jpgf;
     198           52 :     border_mask_list[i] = border_mask;
     199           52 :     block_num_list[i] = i / cycles_per_block + 1;
     200           52 :     radius_list[i] = radius;
     201           52 :     rab_list_mutable[i][0] = rab[0];
     202           52 :     rab_list_mutable[i][1] = rab[1];
     203           52 :     rab_list_mutable[i][2] = rab[2];
     204              :   }
     205           52 :   const double(*rab_list)[3] = (const double(*)[3])rab_list_mutable;
     206              : 
     207           52 :   grid_create_task_list(
     208              :       orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
     209              :       atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
     210              :       jatom_list, iset_list, jset_list, ipgf_list, jpgf_list, border_mask_list,
     211              :       block_num_list, radius_list, rab_list, npts_global, npts_local,
     212              :       shift_local, border_width, dh, dh_inv, task_list);
     213           52 : }
     214              : 
     215              : /*******************************************************************************
     216              :  * \brief Reads a .task file, collocates/integrates it, and compares results.
     217              :  *        See grid_replay.h for details.
     218              :  * \author Ole Schuett
     219              :  ******************************************************************************/
     220          104 : bool grid_replay(const char *filename, const int cycles, const bool collocate,
     221              :                  const bool batch, const int cycles_per_block,
     222              :                  const double tolerance) {
     223              : 
     224          104 :   if (cycles < 1) {
     225            0 :     fprintf(stderr, "Error: Cycles have to be greater than zero.\n");
     226            0 :     exit(1);
     227              :   }
     228              : 
     229          104 :   if (cycles_per_block < 1 || cycles_per_block > cycles) {
     230            0 :     fprintf(stderr,
     231              :             "Error: Cycles per block has to be between 1 and cycles.\n");
     232            0 :     exit(1);
     233              :   }
     234              : 
     235          104 :   FILE *fp = fopen(filename, "r");
     236          104 :   if (fp == NULL) {
     237            0 :     fprintf(stderr, "Could not open task file: %s\n", filename);
     238            0 :     exit(1);
     239              :   }
     240              : 
     241          104 :   char header_line[100];
     242          104 :   read_next_line(header_line, sizeof(header_line), fp);
     243          104 :   if (strcmp(header_line, "#Grid task v10\n") != 0) {
     244            0 :     fprintf(stderr, "Error: Wrong file header.\n");
     245            0 :     abort();
     246              :   }
     247              : 
     248          104 :   const bool orthorhombic = parse_int("orthorhombic", fp);
     249          104 :   const int border_mask = parse_int("border_mask", fp);
     250          104 :   const enum grid_func func = (enum grid_func)parse_int("func", fp);
     251          104 :   const bool compute_tau = (func == GRID_FUNC_DADB);
     252          104 :   const int la_max = parse_int("la_max", fp);
     253          104 :   const int la_min = parse_int("la_min", fp);
     254          104 :   const int lb_max = parse_int("lb_max", fp);
     255          104 :   const int lb_min = parse_int("lb_min", fp);
     256          104 :   const double zeta = parse_double("zeta", fp);
     257          104 :   const double zetb = parse_double("zetb", fp);
     258          104 :   const double rscale = parse_double("rscale", fp);
     259              : 
     260          104 :   double dh_mutable[3][3], dh_inv_mutable[3][3], ra[3], rab[3];
     261          104 :   parse_double3x3("dh", fp, dh_mutable);
     262          104 :   parse_double3x3("dh_inv", fp, dh_inv_mutable);
     263          104 :   parse_double3("ra", fp, ra);
     264          104 :   parse_double3("rab", fp, rab);
     265          104 :   const double(*dh)[3] = (const double(*)[3])dh_mutable;
     266          104 :   const double(*dh_inv)[3] = (const double(*)[3])dh_inv_mutable;
     267              : 
     268          104 :   int npts_global[3], npts_local[3], shift_local[3], border_width[3];
     269          104 :   parse_int3("npts_global", fp, npts_global);
     270          104 :   parse_int3("npts_local", fp, npts_local);
     271          104 :   parse_int3("shift_local", fp, shift_local);
     272          104 :   parse_int3("border_width", fp, border_width);
     273              : 
     274          104 :   const double radius = parse_double("radius", fp);
     275          104 :   const int o1 = parse_int("o1", fp);
     276          104 :   const int o2 = parse_int("o2", fp);
     277          104 :   const int n1 = parse_int("n1", fp);
     278          104 :   const int n2 = parse_int("n2", fp);
     279          104 :   const size_t n12 = (size_t)n1 * (size_t)n2;
     280              : 
     281          104 :   double *pab_storage = malloc(n12 * sizeof(double));
     282          104 :   if (pab_storage == NULL) {
     283            0 :     fprintf(stderr, "Error: Could not allocate pab buffer.\n");
     284            0 :     abort();
     285              :   }
     286         1688 :   double(*pab)[n1] = (double(*)[n1])pab_storage;
     287              :   char line[100], format[100];
     288         1688 :   for (int i = 0; i < n2; i++) {
     289        46120 :     for (int j = 0; j < n1; j++) {
     290        44536 :       read_next_line(line, sizeof(line), fp);
     291        44536 :       snprintf(format, sizeof(format), "pab %i %i %%lf", i, j);
     292        44536 :       if (sscanf(line, format, &pab[i][j]) != 1) {
     293            0 :         assert(!"parse_pab failed");
     294              :       }
     295              :     }
     296              :   }
     297              : 
     298          104 :   const int npts_local_total = npts_local[0] * npts_local[1] * npts_local[2];
     299          104 :   offload_buffer *grid_ref = NULL;
     300          104 :   offload_create_buffer(npts_local_total, &grid_ref);
     301          104 :   memset(grid_ref->host_buffer, 0, npts_local_total * sizeof(double));
     302              : 
     303          104 :   const int ngrid_nonzero = parse_int("ngrid_nonzero", fp);
     304       578328 :   for (int n = 0; n < ngrid_nonzero; n++) {
     305       578224 :     int i, j, k;
     306       578224 :     double value;
     307       578224 :     read_next_line(line, sizeof(line), fp);
     308       578224 :     if (sscanf(line, "grid %i %i %i %le", &i, &j, &k, &value) != 4) {
     309            0 :       assert(!"parse_grid failed");
     310              :     }
     311       578224 :     grid_ref->host_buffer[k * npts_local[1] * npts_local[0] +
     312       578224 :                           j * npts_local[0] + i] = value;
     313              :   }
     314              : 
     315          104 :   double *hab_ref_storage = calloc(n12, sizeof(double));
     316          104 :   if (hab_ref_storage == NULL) {
     317            0 :     fprintf(stderr, "Error: Could not allocate hab_ref buffer.\n");
     318            0 :     abort();
     319              :   }
     320              :   double(*hab_ref)[n1] = (double(*)[n1])hab_ref_storage;
     321          896 :   for (int i = o2; i < ncoset(lb_max) + o2; i++) {
     322        29848 :     for (int j = o1; j < ncoset(la_max) + o1; j++) {
     323        29056 :       read_next_line(line, sizeof(line), fp);
     324        29056 :       snprintf(format, sizeof(format), "hab %i %i %%lf", i, j);
     325        29056 :       if (sscanf(line, format, &hab_ref[i][j]) != 1) {
     326            0 :         assert(!"parse_hab failed");
     327              :       }
     328              :     }
     329              :   }
     330              : 
     331          104 :   double forces_ref[2][3];
     332          104 :   parse_double3("force_a", fp, forces_ref[0]);
     333          104 :   parse_double3("force_b", fp, forces_ref[1]);
     334              : 
     335          104 :   double virial_ref[3][3];
     336          104 :   parse_double3x3("virial", fp, virial_ref);
     337              : 
     338          104 :   char footer_line[100];
     339          104 :   read_next_line(footer_line, sizeof(footer_line), fp);
     340          104 :   if (strcmp(footer_line, "#THE_END\n") != 0) {
     341            0 :     fprintf(stderr, "Error: Wrong footer line.\n");
     342            0 :     abort();
     343              :   }
     344              : 
     345          104 :   if (fclose(fp) != 0) {
     346            0 :     fprintf(stderr, "Could not close task file: %s\n", filename);
     347            0 :     abort();
     348              :   }
     349              : 
     350          104 :   offload_buffer *grid_test = NULL;
     351          104 :   offload_create_buffer(npts_local_total, &grid_test);
     352          104 :   double *hab_test_storage = calloc(n12, sizeof(double));
     353          104 :   if (hab_test_storage == NULL) {
     354            0 :     fprintf(stderr, "Error: Could not allocate hab_test buffer.\n");
     355            0 :     abort();
     356              :   }
     357          104 :   double(*hab_test)[n1] = (double(*)[n1])hab_test_storage;
     358          104 :   double forces_test[2][3];
     359          104 :   double virial_test[3][3];
     360          104 :   double start_time, end_time;
     361              : 
     362          104 :   if (batch) {
     363           52 :     grid_basis_set *basisa = NULL, *basisb = NULL;
     364           52 :     create_dummy_basis_set(n1, la_min, la_max, zeta, &basisa);
     365           52 :     create_dummy_basis_set(n2, lb_min, lb_max, zetb, &basisb);
     366           52 :     grid_task_list *task_list = NULL;
     367           52 :     create_dummy_task_list(
     368              :         orthorhombic, border_mask, ra, rab, radius, basisa, basisb, o1, o2,
     369              :         la_max, lb_max, cycles, cycles_per_block, (const int(*)[3])npts_global,
     370              :         (const int(*)[3])npts_local, (const int(*)[3])shift_local,
     371              :         (const int(*)[3])border_width, (const double(*)[3][3])dh,
     372              :         (const double(*)[3][3])dh_inv, &task_list);
     373           52 :     offload_buffer *pab_blocks = NULL, *hab_blocks = NULL;
     374           52 :     offload_create_buffer(n1 * n2, &pab_blocks);
     375           52 :     offload_create_buffer(n1 * n2, &hab_blocks);
     376           52 :     const double f = (collocate) ? rscale : 1.0;
     377          944 :     for (int i = 0; i < n1; i++) {
     378        23160 :       for (int j = 0; j < n2; j++) {
     379        22268 :         pab_blocks->host_buffer[j * n1 + i] = 0.5 * f * pab[j][i];
     380              :       }
     381              :     }
     382           52 :     start_time = omp_get_wtime();
     383           52 :     const int nlevels = 1;
     384           52 :     const int natoms = 2;
     385           52 :     if (collocate) {
     386              :       // collocate
     387           26 :       offload_buffer *grids[1] = {grid_test};
     388           26 :       grid_collocate_task_list(task_list, func, nlevels,
     389              :                                (const int(*)[3])npts_local, pab_blocks, grids);
     390              :     } else {
     391              :       // integrate
     392           26 :       const offload_buffer *grids[1] = {grid_ref};
     393           26 :       grid_integrate_task_list(task_list, compute_tau, natoms, nlevels,
     394              :                                (const int(*)[3])npts_local, pab_blocks, grids,
     395              :                                hab_blocks, forces_test, virial_test);
     396          422 :       for (int i = 0; i < n2; i++) {
     397        11530 :         for (int j = 0; j < n1; j++) {
     398        11134 :           hab_test[i][j] = hab_blocks->host_buffer[i * n1 + j];
     399              :         }
     400              :       }
     401              :     }
     402           52 :     end_time = omp_get_wtime();
     403           52 :     grid_free_basis_set(basisa);
     404           52 :     grid_free_basis_set(basisb);
     405           52 :     grid_free_task_list(task_list);
     406           52 :     offload_free_buffer(pab_blocks);
     407           52 :     offload_free_buffer(hab_blocks);
     408              :   } else {
     409           52 :     start_time = omp_get_wtime();
     410           52 :     if (collocate) {
     411              :       // collocate
     412           26 :       memset(grid_test->host_buffer, 0, npts_local_total * sizeof(double));
     413           52 :       for (int i = 0; i < cycles; i++) {
     414           26 :         grid_cpu_collocate_pgf_product(
     415              :             orthorhombic, border_mask, func, la_max, la_min, lb_max, lb_min,
     416              :             zeta, zetb, rscale, dh, dh_inv, ra, rab, npts_global, npts_local,
     417              :             shift_local, border_width, radius, o1, o2, n1, n2, pab,
     418           26 :             grid_test->host_buffer);
     419              :       }
     420              :     } else {
     421              :       // integrate
     422           26 :       memset(hab_test_storage, 0, n12 * sizeof(double));
     423           26 :       memset(forces_test, 0, 2 * 3 * sizeof(double));
     424           26 :       double virials_test[2][3][3] = {0};
     425           52 :       for (int i = 0; i < cycles; i++) {
     426           26 :         grid_cpu_integrate_pgf_product(
     427              :             orthorhombic, compute_tau, border_mask, la_max, la_min, lb_max,
     428              :             lb_min, zeta, zetb, dh, dh_inv, ra, rab, npts_global, npts_local,
     429              :             shift_local, border_width, radius, o1, o2, n1, n2,
     430           26 :             grid_ref->host_buffer, hab_test, pab, forces_test, virials_test,
     431              :             NULL, NULL, NULL);
     432              :       }
     433          104 :       for (int i = 0; i < 3; i++) {
     434          312 :         for (int j = 0; j < 3; j++) {
     435          234 :           virial_test[i][j] = virials_test[0][i][j] + virials_test[1][i][j];
     436              :         }
     437              :       }
     438              :     }
     439           52 :     end_time = omp_get_wtime();
     440              :   }
     441              : 
     442          104 :   double max_value = 0.0;
     443          104 :   double max_rel_diff = 0.0;
     444          104 :   const double derivatives_precision = 1e-4; // account for higher numeric noise
     445          104 :   if (collocate) {
     446              :     // collocate
     447              :     // compare grid
     448      7151464 :     for (int i = 0; i < npts_local_total; i++) {
     449      7151412 :       const double ref_value = cycles * grid_ref->host_buffer[i];
     450      7151412 :       const double test_value = grid_test->host_buffer[i];
     451      7151412 :       const double diff = fabs(test_value - ref_value);
     452      7151412 :       const double rel_diff = diff / fmax(1.0, fabs(ref_value));
     453      7151412 :       max_rel_diff = fmax(max_rel_diff, rel_diff);
     454      7151412 :       max_value = fmax(max_value, fabs(test_value));
     455              :     }
     456              :   } else {
     457              :     // integrate
     458              :     // compare hab
     459          844 :     for (int i = 0; i < n2; i++) {
     460        23060 :       for (int j = 0; j < n1; j++) {
     461        22268 :         const double ref_value = cycles * hab_ref[i][j];
     462        22268 :         const double test_value = hab_test[i][j];
     463        22268 :         const double diff = fabs(test_value - ref_value);
     464        22268 :         const double rel_diff = diff / fmax(1.0, fabs(ref_value));
     465        22268 :         max_rel_diff = fmax(max_rel_diff, rel_diff);
     466        22268 :         max_value = fmax(max_value, fabs(test_value));
     467        22268 :         if (rel_diff > tolerance) {
     468            0 :           printf("hab[%i, %i] ref: %le test: %le diff:%le rel_diff: %le\n", i,
     469              :                  j, ref_value, test_value, diff, rel_diff);
     470              :         }
     471              :       }
     472              :     }
     473              :     // compare forces
     474          156 :     for (int i = 0; i < 2; i++) {
     475          416 :       for (int j = 0; j < 3; j++) {
     476          312 :         const double ref_value = cycles * forces_ref[i][j];
     477          312 :         const double test_value = forces_test[i][j];
     478          312 :         const double diff = fabs(test_value - ref_value);
     479          312 :         const double rel_diff = diff / fmax(1.0, fabs(ref_value));
     480          312 :         max_rel_diff = fmax(max_rel_diff, rel_diff * derivatives_precision);
     481          312 :         if (rel_diff * derivatives_precision > tolerance) {
     482            0 :           printf("forces[%i, %i] ref: %le test: %le diff:%le rel_diff: %le\n",
     483              :                  i, j, ref_value, test_value, diff, rel_diff);
     484              :         }
     485              :       }
     486              :     }
     487              :     // compare virial
     488          208 :     for (int i = 0; i < 3; i++) {
     489          624 :       for (int j = 0; j < 3; j++) {
     490          468 :         const double ref_value = cycles * virial_ref[i][j];
     491          468 :         const double test_value = virial_test[i][j];
     492          468 :         const double diff = fabs(test_value - ref_value);
     493          468 :         const double rel_diff = diff / fmax(1.0, fabs(ref_value));
     494          468 :         max_rel_diff = fmax(max_rel_diff, rel_diff * derivatives_precision);
     495          468 :         if (rel_diff * derivatives_precision > tolerance) {
     496            0 :           printf("virial[ %i, %i] ref: %le test: %le diff:%le rel_diff: %le\n",
     497              :                  i, j, ref_value, test_value, diff, rel_diff);
     498              :         }
     499              :       }
     500              :     }
     501              :   }
     502          104 :   printf("Task: %-55s   %9s %-7s   Cycles: %e   Max value: %le   "
     503              :          "Max rel diff: %le   Time: %le sec\n",
     504              :          filename, collocate ? "Collocate" : "Integrate",
     505          104 :          batch ? "Batched" : "PGF-CPU", (float)cycles, max_value, max_rel_diff,
     506              :          end_time - start_time);
     507              : 
     508          104 :   offload_free_buffer(grid_ref);
     509          104 :   offload_free_buffer(grid_test);
     510          104 :   free(pab_storage);
     511          104 :   free(hab_ref_storage);
     512          104 :   free(hab_test_storage);
     513              : 
     514              :   // Check floating point exceptions.
     515          104 :   if (fetestexcept(FE_DIVBYZERO) != 0) {
     516            0 :     fprintf(stderr, "Error: Floating point exception FE_DIVBYZERO.\n");
     517            0 :     exit(1);
     518              :   }
     519          104 :   if (fetestexcept(FE_OVERFLOW) != 0) {
     520            0 :     fprintf(stderr, "Error: Floating point exception FE_OVERFLOW.\n");
     521            0 :     exit(1);
     522              :   }
     523              : 
     524          104 :   return max_rel_diff < tolerance;
     525              : }
     526              : 
     527              : // EOF
        

Generated by: LCOV version 2.0-1