LCOV - CP2K Regtests (git:b4bd748)

LCOV - code coverage report

Current view:	top level - src - pao_ml.F (source / functions)		Hit	Total	Coverage
Test:	CP2K Regtests (git:b4bd748)	Lines:	231	236	97.9 %
Date:	2025-03-09 07:56:22	Functions:	12	16	75.0 %

          Line data    Source code

       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2025 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : ! **************************************************************************************************
       9             : !> \brief Main module for PAO Machine Learning
      10             : !> \author Ole Schuett
      11             : ! **************************************************************************************************
      12             : MODULE pao_ml
      13             :    USE atomic_kind_types,               ONLY: atomic_kind_type,&
      14             :                                               get_atomic_kind
      15             :    USE basis_set_types,                 ONLY: gto_basis_set_type
      16             :    USE cell_methods,                    ONLY: cell_create
      17             :    USE cell_types,                      ONLY: cell_type
      18             :    USE cp_dbcsr_api,                    ONLY: dbcsr_iterator_blocks_left,&
      19             :                                               dbcsr_iterator_next_block,&
      20             :                                               dbcsr_iterator_start,&
      21             :                                               dbcsr_iterator_stop,&
      22             :                                               dbcsr_iterator_type,&
      23             :                                               dbcsr_type
      24             :    USE kinds,                           ONLY: default_path_length,&
      25             :                                               default_string_length,&
      26             :                                               dp
      27             :    USE machine,                         ONLY: m_flush
      28             :    USE message_passing,                 ONLY: mp_para_env_type
      29             :    USE pao_input,                       ONLY: id2str,&
      30             :                                               pao_ml_gp,&
      31             :                                               pao_ml_lazy,&
      32             :                                               pao_ml_nn,&
      33             :                                               pao_ml_prior_mean,&
      34             :                                               pao_ml_prior_zero,&
      35             :                                               pao_rotinv_param
      36             :    USE pao_io,                          ONLY: pao_ioblock_type,&
      37             :                                               pao_iokind_type,&
      38             :                                               pao_kinds_ensure_equal,&
      39             :                                               pao_read_raw
      40             :    USE pao_ml_descriptor,               ONLY: pao_ml_calc_descriptor
      41             :    USE pao_ml_gaussprocess,             ONLY: pao_ml_gp_gradient,&
      42             :                                               pao_ml_gp_predict,&
      43             :                                               pao_ml_gp_train
      44             :    USE pao_ml_neuralnet,                ONLY: pao_ml_nn_gradient,&
      45             :                                               pao_ml_nn_predict,&
      46             :                                               pao_ml_nn_train
      47             :    USE pao_types,                       ONLY: pao_env_type,&
      48             :                                               training_matrix_type
      49             :    USE particle_types,                  ONLY: particle_type
      50             :    USE qs_environment_types,            ONLY: get_qs_env,&
      51             :                                               qs_environment_type
      52             :    USE qs_kind_types,                   ONLY: get_qs_kind,&
      53             :                                               qs_kind_type
      54             : #include "./base/base_uses.f90"
      55             : 
      56             :    IMPLICIT NONE
      57             : 
      58             :    PRIVATE
      59             : 
      60             :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'pao_ml'
      61             : 
      62             :    PUBLIC :: pao_ml_init, pao_ml_predict, pao_ml_forces
      63             : 
      64             :    ! linked list used to group training points by kind
      65             :    TYPE training_point_type
      66             :       TYPE(training_point_type), POINTER       :: next => Null()
      67             :       REAL(dp), DIMENSION(:), ALLOCATABLE      :: input
      68             :       REAL(dp), DIMENSION(:), ALLOCATABLE      :: output
      69             :    END TYPE training_point_type
      70             : 
      71             :    TYPE training_list_type
      72             :       CHARACTER(LEN=default_string_length)     :: kindname = ""
      73             :       TYPE(training_point_type), POINTER       :: head => Null()
      74             :       INTEGER                                  :: npoints = 0
      75             :    END TYPE training_list_type
      76             : 
      77             : CONTAINS
      78             : 
      79             : ! **************************************************************************************************
      80             : !> \brief Initializes the learning machinery
      81             : !> \param pao ...
      82             : !> \param qs_env ...
      83             : ! **************************************************************************************************
      84          98 :    SUBROUTINE pao_ml_init(pao, qs_env)
      85             :       TYPE(pao_env_type), POINTER                        :: pao
      86             :       TYPE(qs_environment_type), POINTER                 :: qs_env
      87             : 
      88             :       INTEGER                                            :: i
      89          98 :       TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
      90             :       TYPE(mp_para_env_type), POINTER                    :: para_env
      91             :       TYPE(training_list_type), ALLOCATABLE, &
      92          98 :          DIMENSION(:)                                    :: training_lists
      93             : 
      94          98 :       IF (SIZE(pao%ml_training_set) == 0) RETURN
      95             : 
      96          18 :       IF (pao%iw > 0) WRITE (pao%iw, *) 'PAO|ML| Initializing maschine learning...'
      97             : 
      98          18 :       IF (pao%parameterization /= pao_rotinv_param) &
      99           0 :          CPABORT("PAO maschine learning requires ROTINV parametrization")
     100             : 
     101          18 :       CALL get_qs_env(qs_env, para_env=para_env, atomic_kind_set=atomic_kind_set)
     102             : 
     103             :       ! create training-set data-structure
     104          74 :       ALLOCATE (training_lists(SIZE(atomic_kind_set)))
     105          38 :       DO i = 1, SIZE(training_lists)
     106          38 :          CALL get_atomic_kind(atomic_kind_set(i), name=training_lists(i)%kindname)
     107             :       END DO
     108             : 
     109             :       ! parses training files, calculates descriptors and stores all training-points as linked lists
     110          52 :       DO i = 1, SIZE(pao%ml_training_set)
     111          52 :          CALL add_to_training_list(pao, qs_env, training_lists, filename=pao%ml_training_set(i)%fn)
     112             :       END DO
     113             : 
     114             :       ! ensure there there are training points for all kinds that use pao
     115          18 :       CALL sanity_check(qs_env, training_lists)
     116             : 
     117             :       ! turns linked lists into matrices and syncs them across ranks
     118          18 :       CALL training_list2matrix(training_lists, pao%ml_training_matrices, para_env)
     119             : 
     120             :       ! calculate and subtract prior
     121          18 :       CALL pao_ml_substract_prior(pao%ml_prior, pao%ml_training_matrices)
     122             : 
     123             :       ! print some statistics about the training set and dump it upon request
     124          18 :       CALL pao_ml_print(pao, pao%ml_training_matrices)
     125             : 
     126             :       ! use training-set to train model
     127          18 :       CALL pao_ml_train(pao)
     128             : 
     129         116 :    END SUBROUTINE pao_ml_init
     130             : 
     131             : ! **************************************************************************************************
     132             : !> \brief Reads the given file and adds its training points to linked lists.
     133             : !> \param pao ...
     134             : !> \param qs_env ...
     135             : !> \param training_lists ...
     136             : !> \param filename ...
     137             : ! **************************************************************************************************
     138          34 :    SUBROUTINE add_to_training_list(pao, qs_env, training_lists, filename)
     139             :       TYPE(pao_env_type), POINTER                        :: pao
     140             :       TYPE(qs_environment_type), POINTER                 :: qs_env
     141             :       TYPE(training_list_type), DIMENSION(:)             :: training_lists
     142             :       CHARACTER(LEN=default_path_length)                 :: filename
     143             : 
     144             :       CHARACTER(LEN=default_string_length)               :: param
     145             :       INTEGER                                            :: iatom, ikind, natoms, nkinds, nparams
     146          34 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: atom2kind, kindsmap
     147             :       INTEGER, DIMENSION(2)                              :: ml_range
     148          34 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: hmat, positions
     149          34 :       TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
     150             :       TYPE(cell_type), POINTER                           :: cell
     151             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     152          34 :       TYPE(pao_ioblock_type), ALLOCATABLE, DIMENSION(:)  :: xblocks
     153          34 :       TYPE(pao_iokind_type), ALLOCATABLE, DIMENSION(:)   :: kinds
     154          34 :       TYPE(particle_type), DIMENSION(:), POINTER         :: my_particle_set
     155          34 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
     156             :       TYPE(training_point_type), POINTER                 :: new_point
     157             : 
     158          34 :       NULLIFY (new_point, cell)
     159             : 
     160          17 :       IF (pao%iw > 0) WRITE (pao%iw, '(A,A)') " PAO|ML| Reading training frame from file: ", TRIM(filename)
     161             : 
     162          34 :       CALL get_qs_env(qs_env, para_env=para_env)
     163             : 
     164             :       ! parse training data on first rank
     165          34 :       IF (para_env%is_source()) THEN
     166          17 :          CALL pao_read_raw(filename, param, hmat, kinds, atom2kind, positions, xblocks, ml_range)
     167             : 
     168             :          ! check parametrization
     169          17 :          IF (TRIM(param) .NE. TRIM(ADJUSTL(id2str(pao%parameterization)))) &
     170          17 :             CPABORT("Restart PAO parametrization does not match")
     171             : 
     172             :          ! map read-in kinds onto kinds of this run
     173          17 :          CALL match_kinds(pao, qs_env, kinds, kindsmap)
     174          17 :          nkinds = SIZE(kindsmap)
     175          17 :          natoms = SIZE(positions, 1)
     176             :       END IF
     177             : 
     178             :       ! broadcast parsed raw training data
     179          34 :       CALL para_env%bcast(nkinds)
     180          34 :       CALL para_env%bcast(natoms)
     181          34 :       IF (.NOT. para_env%is_source()) THEN
     182          17 :          ALLOCATE (hmat(3, 3))
     183          51 :          ALLOCATE (kindsmap(nkinds))
     184          51 :          ALLOCATE (positions(natoms, 3))
     185          51 :          ALLOCATE (atom2kind(natoms))
     186             :       END IF
     187          34 :       CALL para_env%bcast(hmat)
     188          34 :       CALL para_env%bcast(kindsmap)
     189          34 :       CALL para_env%bcast(atom2kind)
     190          34 :       CALL para_env%bcast(positions)
     191          34 :       CALL para_env%bcast(ml_range)
     192             : 
     193          34 :       IF (ml_range(1) /= 1 .OR. ml_range(2) /= natoms) THEN
     194           0 :          CPWARN("Skipping some atoms for PAO-ML training.")
     195             :       END IF
     196             : 
     197             :       ! create cell from read-in h-matrix
     198          34 :       CALL cell_create(cell, hmat)
     199             : 
     200             :       ! create a particle_set based on read-in positions and refere to kinds of this run
     201          34 :       CALL get_qs_env(qs_env, atomic_kind_set=atomic_kind_set, qs_kind_set=qs_kind_set)
     202         476 :       ALLOCATE (my_particle_set(natoms))
     203         102 :       DO iatom = 1, natoms
     204          68 :          ikind = kindsmap(atom2kind(iatom))
     205          68 :          my_particle_set(iatom)%atomic_kind => atomic_kind_set(ikind)
     206         306 :          my_particle_set(iatom)%r = positions(iatom, :)
     207             :       END DO
     208             : 
     209             :       ! fill linked list with training points
     210             :       ! Afterwards all ranks will have lists with the same number of entries,
     211             :       ! however the input and output arrays will only be allocated on one rank per entry.
     212             :       ! We farm out the expensive calculation of the descriptor across ranks.
     213         102 :       DO iatom = 1, natoms
     214          68 :          IF (iatom < ml_range(1) .OR. ml_range(2) < iatom) CYCLE
     215          68 :          ALLOCATE (new_point)
     216             : 
     217             :          ! training-point input, calculate descriptor only on one rank
     218          68 :          IF (MOD(iatom - 1, para_env%num_pe) == para_env%mepos) THEN
     219             :             CALL pao_ml_calc_descriptor(pao, &
     220             :                                         my_particle_set, &
     221             :                                         qs_kind_set, &
     222             :                                         cell, &
     223             :                                         iatom=iatom, &
     224          34 :                                         descriptor=new_point%input)
     225             :          END IF
     226             : 
     227             :          ! copy training-point output on first rank
     228          68 :          IF (para_env%is_source()) THEN
     229          34 :             nparams = SIZE(xblocks(iatom)%p, 1)
     230         102 :             ALLOCATE (new_point%output(nparams))
     231         272 :             new_point%output(:) = xblocks(iatom)%p(:, 1)
     232             :          END IF
     233             : 
     234             :          ! add to linked list
     235          68 :          ikind = kindsmap(atom2kind(iatom))
     236          68 :          training_lists(ikind)%npoints = training_lists(ikind)%npoints + 1
     237          68 :          new_point%next => training_lists(ikind)%head
     238         102 :          training_lists(ikind)%head => new_point
     239             :       END DO
     240             : 
     241          34 :       DEALLOCATE (cell, my_particle_set, hmat, kindsmap, positions, atom2kind)
     242             : 
     243         119 :    END SUBROUTINE add_to_training_list
     244             : 
     245             : ! **************************************************************************************************
     246             : !> \brief Make read-in kinds on to atomic-kinds of this run
     247             : !> \param pao ...
     248             : !> \param qs_env ...
     249             : !> \param kinds ...
     250             : !> \param kindsmap ...
     251             : ! **************************************************************************************************
     252          17 :    SUBROUTINE match_kinds(pao, qs_env, kinds, kindsmap)
     253             :       TYPE(pao_env_type), POINTER                        :: pao
     254             :       TYPE(qs_environment_type), POINTER                 :: qs_env
     255             :       TYPE(pao_iokind_type), DIMENSION(:)                :: kinds
     256             :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: kindsmap
     257             : 
     258             :       CHARACTER(LEN=default_string_length)               :: name
     259             :       INTEGER                                            :: ikind, jkind
     260          17 :       TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
     261             : 
     262          17 :       CALL get_qs_env(qs_env, atomic_kind_set=atomic_kind_set)
     263             : 
     264          17 :       CPASSERT(.NOT. ALLOCATED(kindsmap))
     265          51 :       ALLOCATE (kindsmap(SIZE(kinds)))
     266          34 :       kindsmap(:) = -1
     267             : 
     268          34 :       DO ikind = 1, SIZE(kinds)
     269          35 :          DO jkind = 1, SIZE(atomic_kind_set)
     270          18 :             CALL get_atomic_kind(atomic_kind_set(jkind), name=name)
     271             :             ! match kinds via their name
     272          18 :             IF (TRIM(kinds(ikind)%name) .EQ. TRIM(name)) THEN
     273          17 :                CALL pao_kinds_ensure_equal(pao, qs_env, jkind, kinds(ikind))
     274          17 :                kindsmap(ikind) = jkind
     275          17 :                EXIT
     276             :             END IF
     277             :          END DO
     278             :       END DO
     279             : 
     280          34 :       IF (ANY(kindsmap < 1)) &
     281           0 :          CPABORT("PAO: Could not match all kinds from training set")
     282          17 :    END SUBROUTINE match_kinds
     283             : 
     284             : ! **************************************************************************************************
     285             : !> \brief Checks that there is at least one training point per pao-enabled kind
     286             : !> \param qs_env ...
     287             : !> \param training_lists ...
     288             : ! **************************************************************************************************
     289          18 :    SUBROUTINE sanity_check(qs_env, training_lists)
     290             :       TYPE(qs_environment_type), POINTER                 :: qs_env
     291             :       TYPE(training_list_type), DIMENSION(:), TARGET     :: training_lists
     292             : 
     293             :       INTEGER                                            :: ikind, pao_basis_size
     294             :       TYPE(gto_basis_set_type), POINTER                  :: basis_set
     295          18 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
     296             :       TYPE(training_list_type), POINTER                  :: training_list
     297             : 
     298          18 :       CALL get_qs_env(qs_env, qs_kind_set=qs_kind_set)
     299             : 
     300          38 :       DO ikind = 1, SIZE(training_lists)
     301          20 :          training_list => training_lists(ikind)
     302          20 :          IF (training_list%npoints > 0) CYCLE ! it's ok
     303           2 :          CALL get_qs_kind(qs_kind_set(ikind), basis_set=basis_set, pao_basis_size=pao_basis_size)
     304           2 :          IF (pao_basis_size /= basis_set%nsgf) & ! if this kind has pao enabled...
     305          20 :             CPABORT("Found no training-points for kind: "//TRIM(training_list%kindname))
     306             :       END DO
     307             : 
     308          18 :    END SUBROUTINE sanity_check
     309             : 
     310             : ! **************************************************************************************************
     311             : !> \brief Turns the linked lists of training points into matrices
     312             : !> \param training_lists ...
     313             : !> \param training_matrices ...
     314             : !> \param para_env ...
     315             : ! **************************************************************************************************
     316          18 :    SUBROUTINE training_list2matrix(training_lists, training_matrices, para_env)
     317             :       TYPE(training_list_type), ALLOCATABLE, &
     318             :          DIMENSION(:), TARGET                            :: training_lists
     319             :       TYPE(training_matrix_type), ALLOCATABLE, &
     320             :          DIMENSION(:), TARGET                            :: training_matrices
     321             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     322             : 
     323             :       INTEGER                                            :: i, ikind, inp_size, ninputs, noutputs, &
     324             :                                                             npoints, out_size
     325             :       TYPE(training_list_type), POINTER                  :: training_list
     326             :       TYPE(training_matrix_type), POINTER                :: training_matrix
     327             :       TYPE(training_point_type), POINTER                 :: cur_point, prev_point
     328             : 
     329          18 :       CPASSERT(ALLOCATED(training_lists) .AND. .NOT. ALLOCATED(training_matrices))
     330             : 
     331          74 :       ALLOCATE (training_matrices(SIZE(training_lists)))
     332             : 
     333          38 :       DO ikind = 1, SIZE(training_lists)
     334          20 :          training_list => training_lists(ikind)
     335          20 :          training_matrix => training_matrices(ikind)
     336          20 :          training_matrix%kindname = training_list%kindname ! copy kindname
     337          20 :          npoints = training_list%npoints ! number of points
     338          20 :          IF (npoints == 0) THEN
     339           2 :             ALLOCATE (training_matrix%inputs(0, 0))
     340           2 :             ALLOCATE (training_matrix%outputs(0, 0))
     341           2 :             CYCLE
     342             :          END IF
     343             : 
     344             :          ! figure out size of input and output
     345          18 :          inp_size = 0; out_size = 0
     346          18 :          IF (ALLOCATED(training_list%head%input)) &
     347           9 :             inp_size = SIZE(training_list%head%input)
     348          18 :          IF (ALLOCATED(training_list%head%output)) &
     349           9 :             out_size = SIZE(training_list%head%output)
     350          18 :          CALL para_env%sum(inp_size)
     351          18 :          CALL para_env%sum(out_size)
     352             : 
     353             :          ! allocate matices to hold all training points
     354          72 :          ALLOCATE (training_matrix%inputs(inp_size, npoints))
     355          72 :          ALLOCATE (training_matrix%outputs(out_size, npoints))
     356        3258 :          training_matrix%inputs(:, :) = 0.0_dp
     357         562 :          training_matrix%outputs(:, :) = 0.0_dp
     358             : 
     359             :          ! loop over all training points, consume linked-list in the process
     360          18 :          ninputs = 0; noutputs = 0
     361          18 :          cur_point => training_list%head
     362          18 :          NULLIFY (training_list%head)
     363          86 :          DO i = 1, npoints
     364          68 :             IF (ALLOCATED(cur_point%input)) THEN
     365        3240 :                training_matrix%inputs(:, i) = cur_point%input(:)
     366          34 :                ninputs = ninputs + 1
     367             :             END IF
     368          68 :             IF (ALLOCATED(cur_point%output)) THEN
     369         544 :                training_matrix%outputs(:, i) = cur_point%output(:)
     370          34 :                noutputs = noutputs + 1
     371             :             END IF
     372             :             ! advance to next entry and deallocate the current one
     373          68 :             prev_point => cur_point
     374          68 :             cur_point => cur_point%next
     375          86 :             DEALLOCATE (prev_point)
     376             :          END DO
     377          18 :          training_list%npoints = 0 ! list is now empty
     378             : 
     379             :          ! sync training_matrix across ranks
     380          18 :          CALL para_env%sum(training_matrix%inputs)
     381          18 :          CALL para_env%sum(training_matrix%outputs)
     382             : 
     383             :          ! sanity check
     384          18 :          CALL para_env%sum(noutputs)
     385          18 :          CALL para_env%sum(ninputs)
     386          36 :          CPASSERT(noutputs == npoints .AND. ninputs == npoints)
     387             :       END DO
     388             : 
     389          18 :    END SUBROUTINE training_list2matrix
     390             : 
     391             : ! **************************************************************************************************
     392             : !> \brief TODO
     393             : !> \param ml_prior ...
     394             : !> \param training_matrices ...
     395             : ! **************************************************************************************************
     396          18 :    SUBROUTINE pao_ml_substract_prior(ml_prior, training_matrices)
     397             :       INTEGER, INTENT(IN)                                :: ml_prior
     398             :       TYPE(training_matrix_type), DIMENSION(:), TARGET   :: training_matrices
     399             : 
     400             :       INTEGER                                            :: i, ikind, npoints, out_size
     401             :       TYPE(training_matrix_type), POINTER                :: training_matrix
     402             : 
     403          38 :       DO ikind = 1, SIZE(training_matrices)
     404          20 :          training_matrix => training_matrices(ikind)
     405          20 :          out_size = SIZE(training_matrix%outputs, 1)
     406          20 :          npoints = SIZE(training_matrix%outputs, 2)
     407          20 :          IF (npoints == 0) CYCLE
     408          54 :          ALLOCATE (training_matrix%prior(out_size))
     409             : 
     410             :          ! calculate prior
     411          18 :          SELECT CASE (ml_prior)
     412             :          CASE (pao_ml_prior_zero)
     413          96 :             training_matrix%prior(:) = 0.0_dp
     414             :          CASE (pao_ml_prior_mean)
     415         188 :             training_matrix%prior(:) = SUM(training_matrix%outputs, 2)/REAL(npoints, dp)
     416             :          CASE DEFAULT
     417          18 :             CPABORT("PAO: unknown prior")
     418             :          END SELECT
     419             : 
     420             :          ! subtract prior from all training points
     421         104 :          DO i = 1, npoints
     422         564 :             training_matrix%outputs(:, i) = training_matrix%outputs(:, i) - training_matrix%prior
     423             :          END DO
     424             :       END DO
     425             : 
     426          18 :    END SUBROUTINE pao_ml_substract_prior
     427             : 
     428             : ! **************************************************************************************************
     429             : !> \brief Print some statistics about the training set and dump it upon request
     430             : !> \param pao ...
     431             : !> \param training_matrices ...
     432             : ! **************************************************************************************************
     433          18 :    SUBROUTINE pao_ml_print(pao, training_matrices)
     434             :       TYPE(pao_env_type), POINTER                        :: pao
     435             :       TYPE(training_matrix_type), DIMENSION(:), TARGET   :: training_matrices
     436             : 
     437             :       INTEGER                                            :: i, ikind, N, npoints
     438             :       TYPE(training_matrix_type), POINTER                :: training_matrix
     439             : 
     440             :       ! dump training data
     441          18 :       IF (pao%iw_mldata > 0) THEN
     442           2 :          DO ikind = 1, SIZE(training_matrices)
     443           1 :             training_matrix => training_matrices(ikind)
     444           1 :             npoints = SIZE(training_matrix%outputs, 2)
     445           6 :             DO i = 1, npoints
     446           4 :                WRITE (pao%iw_mldata, *) "PAO|ML| training-point kind: ", TRIM(training_matrix%kindname), &
     447           4 :                   " point:", i, " in:", training_matrix%inputs(:, i), &
     448           9 :                   " out:", training_matrix%outputs(:, i)
     449             :             END DO
     450             :          END DO
     451           1 :          CALL m_flush(pao%iw_mldata)
     452             :       END IF
     453             : 
     454             :       ! print stats
     455          18 :       IF (pao%iw > 0) THEN
     456          19 :          DO ikind = 1, SIZE(training_matrices)
     457          10 :             training_matrix => training_matrices(ikind)
     458          30 :             N = SIZE(training_matrix%inputs)
     459          10 :             IF (N == 0) CYCLE
     460             :             WRITE (pao%iw, "(A,I3,A,E10.1,1X,E10.1,1X,E10.1)") " PAO|ML| Descriptor for kind: "// &
     461           9 :                TRIM(training_matrix%kindname)//" size: ", &
     462           9 :                SIZE(training_matrix%inputs, 1), " min/mean/max: ", &
     463        1629 :                MINVAL(training_matrix%inputs), &
     464        1629 :                SUM(training_matrix%inputs)/REAL(N, dp), &
     465        1648 :                MAXVAL(training_matrix%inputs)
     466             :          END DO
     467             :       END IF
     468             : 
     469          18 :    END SUBROUTINE pao_ml_print
     470             : 
     471             : ! **************************************************************************************************
     472             : !> \brief Calls the actual learning algorthim to traing on the given matrices
     473             : !> \param pao ...
     474             : ! **************************************************************************************************
     475          18 :    SUBROUTINE pao_ml_train(pao)
     476             :       TYPE(pao_env_type), POINTER                        :: pao
     477             : 
     478             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'pao_ml_train'
     479             : 
     480             :       INTEGER                                            :: handle
     481             : 
     482          18 :       CALL timeset(routineN, handle)
     483             : 
     484          28 :       SELECT CASE (pao%ml_method)
     485             :       CASE (pao_ml_gp)
     486          10 :          CALL pao_ml_gp_train(pao)
     487             :       CASE (pao_ml_nn)
     488           4 :          CALL pao_ml_nn_train(pao)
     489             :       CASE (pao_ml_lazy)
     490             :          ! nothing to do
     491             :       CASE DEFAULT
     492          18 :          CPABORT("PAO: unknown machine learning scheme")
     493             :       END SELECT
     494             : 
     495          18 :       CALL timestop(handle)
     496             : 
     497          18 :    END SUBROUTINE pao_ml_train
     498             : 
     499             : ! **************************************************************************************************
     500             : !> \brief Fills pao%matrix_X based on machine learning predictions
     501             : !> \param pao ...
     502             : !> \param qs_env ...
     503             : ! **************************************************************************************************
     504         138 :    SUBROUTINE pao_ml_predict(pao, qs_env)
     505             :       TYPE(pao_env_type), POINTER                        :: pao
     506             :       TYPE(qs_environment_type), POINTER                 :: qs_env
     507             : 
     508             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'pao_ml_predict'
     509             : 
     510             :       INTEGER                                            :: acol, arow, handle, iatom, ikind, natoms
     511         138 :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: descriptor, variances
     512         138 :       REAL(dp), DIMENSION(:, :), POINTER                 :: block_X
     513         138 :       TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
     514             :       TYPE(cell_type), POINTER                           :: cell
     515             :       TYPE(dbcsr_iterator_type)                          :: iter
     516             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     517         138 :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
     518         138 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
     519             : 
     520         138 :       CALL timeset(routineN, handle)
     521             : 
     522             :       CALL get_qs_env(qs_env, &
     523             :                       para_env=para_env, &
     524             :                       cell=cell, &
     525             :                       particle_set=particle_set, &
     526             :                       atomic_kind_set=atomic_kind_set, &
     527             :                       qs_kind_set=qs_kind_set, &
     528         138 :                       natom=natoms)
     529             : 
     530             :       ! fill matrix_X
     531         414 :       ALLOCATE (variances(natoms))
     532         416 :       variances(:) = 0.0_dp
     533             : !$OMP PARALLEL DEFAULT(NONE) SHARED(pao,qs_env,particle_set,qs_kind_set,cell,variances) &
     534         138 : !$OMP PRIVATE(iter,arow,acol,iatom,ikind,descriptor,block_X)
     535             :       CALL dbcsr_iterator_start(iter, pao%matrix_X)
     536             :       DO WHILE (dbcsr_iterator_blocks_left(iter))
     537             :          CALL dbcsr_iterator_next_block(iter, arow, acol, block_X)
     538             :          iatom = arow; CPASSERT(arow == acol)
     539             :          CALL get_atomic_kind(particle_set(iatom)%atomic_kind, kind_number=ikind)
     540             :          IF (SIZE(block_X) == 0) CYCLE ! pao disabled for iatom
     541             : 
     542             :          ! calculate descriptor
     543             :          CALL pao_ml_calc_descriptor(pao, &
     544             :                                      particle_set, &
     545             :                                      qs_kind_set, &
     546             :                                      cell, &
     547             :                                      iatom, &
     548             :                                      descriptor)
     549             : 
     550             :          ! call actual machine learning for prediction
     551             :          CALL pao_ml_predict_low(pao, ikind=ikind, &
     552             :                                  descriptor=descriptor, &
     553             :                                  output=block_X(:, 1), &
     554             :                                  variance=variances(iatom))
     555             : 
     556             :          DEALLOCATE (descriptor)
     557             : 
     558             :          !add prior
     559             :          block_X(:, 1) = block_X(:, 1) + pao%ml_training_matrices(ikind)%prior
     560             :       END DO
     561             :       CALL dbcsr_iterator_stop(iter)
     562             : !$OMP END PARALLEL
     563             : 
     564             :       ! print variances
     565         138 :       CALL para_env%sum(variances)
     566         138 :       IF (pao%iw_mlvar > 0) THEN
     567           3 :          DO iatom = 1, natoms
     568           3 :             WRITE (pao%iw_mlvar, *) "PAO|ML| atom:", iatom, " prediction variance:", variances(iatom)
     569             :          END DO
     570           1 :          CALL m_flush(pao%iw_mlvar)
     571             :       END IF
     572             : 
     573             :       ! one-line summary
     574         207 :       IF (pao%iw > 0) WRITE (pao%iw, "(A,E20.10,A,T71,I10)") " PAO|ML| max prediction variance:", &
     575         554 :          MAXVAL(variances), " for atom:", MAXLOC(variances)
     576             : 
     577         554 :       IF (MAXVAL(variances) > pao%ml_tolerance) &
     578           0 :          CPABORT("Variance of prediction above ML_TOLERANCE.")
     579             : 
     580         138 :       DEALLOCATE (variances)
     581             : 
     582         138 :       CALL timestop(handle)
     583             : 
     584         276 :    END SUBROUTINE pao_ml_predict
     585             : 
     586             : ! **************************************************************************************************
     587             : !> \brief Queries the actual learning algorthim to make a prediction
     588             : !> \param pao ...
     589             : !> \param ikind ...
     590             : !> \param descriptor ...
     591             : !> \param output ...
     592             : !> \param variance ...
     593             : ! **************************************************************************************************
     594         138 :    SUBROUTINE pao_ml_predict_low(pao, ikind, descriptor, output, variance)
     595             :       TYPE(pao_env_type), POINTER                        :: pao
     596             :       INTEGER, INTENT(IN)                                :: ikind
     597             :       REAL(dp), DIMENSION(:), INTENT(IN)                 :: descriptor
     598             :       REAL(dp), DIMENSION(:), INTENT(OUT)                :: output
     599             :       REAL(dp), INTENT(OUT)                              :: variance
     600             : 
     601         220 :       SELECT CASE (pao%ml_method)
     602             :       CASE (pao_ml_gp)
     603          82 :          CALL pao_ml_gp_predict(pao, ikind, descriptor, output, variance)
     604             :       CASE (pao_ml_nn)
     605          28 :          CALL pao_ml_nn_predict(pao, ikind, descriptor, output, variance)
     606             :       CASE (pao_ml_lazy)
     607         224 :          output = 0.0_dp ! let's be really lazy and just rely on the prior
     608          28 :          variance = 0
     609             :       CASE DEFAULT
     610         138 :          CPABORT("PAO: unknown machine learning scheme")
     611             :       END SELECT
     612             : 
     613         138 :    END SUBROUTINE pao_ml_predict_low
     614             : 
     615             : ! **************************************************************************************************
     616             : !> \brief Calculate forces contributed by machine learning
     617             : !> \param pao ...
     618             : !> \param qs_env ...
     619             : !> \param matrix_G ...
     620             : !> \param forces ...
     621             : ! **************************************************************************************************
     622          18 :    SUBROUTINE pao_ml_forces(pao, qs_env, matrix_G, forces)
     623             :       TYPE(pao_env_type), POINTER                        :: pao
     624             :       TYPE(qs_environment_type), POINTER                 :: qs_env
     625             :       TYPE(dbcsr_type)                                   :: matrix_G
     626             :       REAL(dp), DIMENSION(:, :), INTENT(INOUT)           :: forces
     627             : 
     628             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'pao_ml_forces'
     629             : 
     630             :       INTEGER                                            :: acol, arow, handle, iatom, ikind
     631          18 :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: descr_grad, descriptor
     632          18 :       REAL(dp), DIMENSION(:, :), POINTER                 :: block_G
     633          18 :       TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
     634             :       TYPE(cell_type), POINTER                           :: cell
     635             :       TYPE(dbcsr_iterator_type)                          :: iter
     636             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     637          18 :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
     638          18 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
     639             : 
     640          18 :       CALL timeset(routineN, handle)
     641             : 
     642             :       CALL get_qs_env(qs_env, &
     643             :                       para_env=para_env, &
     644             :                       cell=cell, &
     645             :                       particle_set=particle_set, &
     646             :                       atomic_kind_set=atomic_kind_set, &
     647          18 :                       qs_kind_set=qs_kind_set)
     648             : 
     649             : !$OMP PARALLEL DEFAULT(NONE) SHARED(pao,matrix_G,particle_set,qs_kind_set,cell) &
     650             : !$OMP REDUCTION(+:forces) &
     651          18 : !$OMP PRIVATE(iter,arow,acol,iatom,ikind,block_G,descriptor,descr_grad)
     652             :       CALL dbcsr_iterator_start(iter, matrix_G)
     653             :       DO WHILE (dbcsr_iterator_blocks_left(iter))
     654             :          CALL dbcsr_iterator_next_block(iter, arow, acol, block_G)
     655             :          iatom = arow; CPASSERT(arow == acol)
     656             :          CALL get_atomic_kind(particle_set(iatom)%atomic_kind, kind_number=ikind)
     657             :          IF (SIZE(block_G) == 0) CYCLE ! pao disabled for iatom
     658             : 
     659             :          ! calculate descriptor
     660             :          CALL pao_ml_calc_descriptor(pao, &
     661             :                                      particle_set, &
     662             :                                      qs_kind_set, &
     663             :                                      cell, &
     664             :                                      iatom=iatom, &
     665             :                                      descriptor=descriptor)
     666             : 
     667             :          ! calcaulte derivate of machine learning prediction
     668             :          CALL pao_ml_gradient_low(pao, ikind=ikind, &
     669             :                                   descriptor=descriptor, &
     670             :                                   outer_deriv=block_G(:, 1), &
     671             :                                   gradient=descr_grad)
     672             : 
     673             :          ! calculate force contributions from descriptor
     674             :          CALL pao_ml_calc_descriptor(pao, &
     675             :                                      particle_set, &
     676             :                                      qs_kind_set, &
     677             :                                      cell, &
     678             :                                      iatom=iatom, &
     679             :                                      descr_grad=descr_grad, &
     680             :                                      forces=forces)
     681             : 
     682             :          DEALLOCATE (descriptor, descr_grad)
     683             :       END DO
     684             :       CALL dbcsr_iterator_stop(iter)
     685             : !$OMP END PARALLEL
     686             : 
     687          18 :       CALL timestop(handle)
     688             : 
     689          36 :    END SUBROUTINE pao_ml_forces
     690             : 
     691             : ! **************************************************************************************************
     692             : !> \brief Calculate gradient of machine learning algorithm
     693             : !> \param pao ...
     694             : !> \param ikind ...
     695             : !> \param descriptor ...
     696             : !> \param outer_deriv ...
     697             : !> \param gradient ...
     698             : ! **************************************************************************************************
     699          18 :    SUBROUTINE pao_ml_gradient_low(pao, ikind, descriptor, outer_deriv, gradient)
     700             :       TYPE(pao_env_type), POINTER                        :: pao
     701             :       INTEGER, INTENT(IN)                                :: ikind
     702             :       REAL(dp), DIMENSION(:), INTENT(IN)                 :: descriptor, outer_deriv
     703             :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: gradient
     704             : 
     705          54 :       ALLOCATE (gradient(SIZE(descriptor)))
     706             : 
     707          28 :       SELECT CASE (pao%ml_method)
     708             :       CASE (pao_ml_gp)
     709          10 :          CALL pao_ml_gp_gradient(pao, ikind, descriptor, outer_deriv, gradient)
     710             :       CASE (pao_ml_nn)
     711           4 :          CALL pao_ml_nn_gradient(pao, ikind, descriptor, outer_deriv, gradient)
     712             :       CASE (pao_ml_lazy)
     713          26 :          gradient = 0.0_dp
     714             :       CASE DEFAULT
     715          18 :          CPABORT("PAO: unknown machine learning scheme")
     716             :       END SELECT
     717             : 
     718          18 :    END SUBROUTINE pao_ml_gradient_low
     719             : 
     720           0 : END MODULE pao_ml

Generated by: LCOV version 1.15