Source code for mbgdml.predictors.gdml_predict

# MIT License
#
# Copyright (c) 2018-2022 Stefan Chmiela, Gregory Fonseca
# Copyright (c) 2022-2023, Alex M. Maldonado
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import numpy as np
from ..logger import GDMLLogger
from ..stress import virial_atom_loop

log = GDMLLogger(__name__)

# This calculation is too fast to be a ray task.
# pylint: disable-next=too-many-statements
[docs]def _predict_gdml_wkr( r_desc, r_d_desc, n_atoms, sig, n_perms, R_desc_perms, R_d_desc_alpha_perms, alphas_E_lin, wkr_start_stop=None, chunk_size=None, ): r"""Compute (part) of a GDML prediction. Every prediction is a linear combination involving the training points used for this model. This function evaluates that combination for the range specified by `wkr_start_stop`. This workload can optionally be processed in chunks, which can be faster as it requires less memory to be allocated. Note ---- It is sufficient to provide either the parameter ``r`` or ``r_desc_d_desc``. The other one can be set to :obj:`None`. Returns ------- :obj:`numpy.ndarray` Partial prediction of all force components and energy (appended to array as last element). """ # pylint: disable=invalid-name ### mbGDML CHANGE ### n_train = int(R_desc_perms.shape[0] / n_perms) ### mbGDML CHANGE END ### wkr_start, wkr_stop = (0, n_train) if wkr_start_stop is None else wkr_start_stop if chunk_size is None: chunk_size = n_train ### mbGDML CHANGE ### dim_d = (n_atoms * (n_atoms - 1)) // 2 dim_i = 3 * n_atoms dim_c = chunk_size * n_perms ### mbGDML CHANGE END ### # Pre-allocate memory. diff_ab_perms = np.empty((dim_c, dim_d), dtype=np.double) a_x2 = np.empty((dim_c,), dtype=np.double) mat52_base = np.empty((dim_c,), dtype=np.double) # avoid divisions (slower) sig_inv = 1.0 / sig mat52_base_fact = 5.0 / (3 * sig**3) diag_scale_fact = 5.0 / sig sqrt5 = np.sqrt(5.0) E_F = np.zeros((dim_d + 1,), dtype=np.double) F = E_F[1:] wkr_start *= n_perms wkr_stop *= n_perms b_start = wkr_start for b_stop in list(range(wkr_start + dim_c, wkr_stop, dim_c)) + [wkr_stop]: rj_desc_perms = R_desc_perms[b_start:b_stop, :] rj_d_desc_alpha_perms = R_d_desc_alpha_perms[b_start:b_stop, :] # Resize pre-allocated memory for last iteration, # if chunk_size is not a divisor of the training set size. # Note: It's faster to process equally sized chunks. c_size = b_stop - b_start if c_size < dim_c: diff_ab_perms = diff_ab_perms[:c_size, :] a_x2 = a_x2[:c_size] mat52_base = mat52_base[:c_size] np.subtract( np.broadcast_to(r_desc, rj_desc_perms.shape), rj_desc_perms, out=diff_ab_perms, ) norm_ab_perms = sqrt5 * np.linalg.norm(diff_ab_perms, axis=1) np.exp(-norm_ab_perms * sig_inv, out=mat52_base) mat52_base *= mat52_base_fact np.einsum( "ji,ji->j", diff_ab_perms, rj_d_desc_alpha_perms, out=a_x2 ) # colum wise dot product F += (a_x2 * mat52_base).dot(diff_ab_perms) * diag_scale_fact mat52_base *= norm_ab_perms + sig F -= mat52_base.dot(rj_d_desc_alpha_perms) # Energies are automatically predicted with a flipped sign here # (because -E are trained, instead of E) E_F[0] += a_x2.dot(mat52_base) # Energies are automatically predicted with a flipped sign here # (because -E are trained, instead of E) if alphas_E_lin is not None: K_fe = diff_ab_perms * mat52_base[:, None] F += alphas_E_lin[b_start:b_stop].dot(K_fe) K_ee = ( 1 + (norm_ab_perms * sig_inv) * (1 + norm_ab_perms / (3 * sig)) ) * np.exp(-norm_ab_perms * sig_inv) E_F[0] += K_ee.dot(alphas_E_lin[b_start:b_stop]) b_start = b_stop out = E_F[: dim_i + 1] # Descriptor has less entries than 3N, need to extend size of the 'E_F' array. if dim_d < dim_i: out = np.empty((dim_i + 1,)) out[0] = E_F[0] # 'r_d_desc.T.dot(F)' for our special representation of 'r_d_desc' r_d_desc = r_d_desc[None, ...] F = F[None, ...] ### mbGDML CHANGE ### n = np.max((r_d_desc.shape[0], F.shape[0])) i, j = np.tril_indices(n_atoms, k=-1) out_F = np.zeros((n, n_atoms, n_atoms, 3)) out_F[:, i, j, :] = r_d_desc * F[..., None] out_F[:, j, i, :] = -out_F[:, i, j, :] out[1:] = out_F.sum(axis=1).reshape(n, -1) ### mbGDML CHANGE END ### return out
# Possible ray task. # pylint: disable-next=too-many-branches
[docs]def predict_gdml(Z, R, entity_ids, entity_combs, model, periodic_cell, **kwargs): r"""Predict total :math:`n`-body energy and forces of a single structure. Parameters ---------- Z : :obj:`numpy.ndarray`, ndim: ``1`` Atomic numbers of all atoms in ``r`` (in the same order). R : :obj:`numpy.ndarray`, ndim: ``2`` Cartesian coordinates of a single structure to predict. entity_ids : :obj:`numpy.ndarray`, ndim: ``1`` 1D array specifying which atoms belong to which entities. entity_combs : ``iterable`` Entity ID combinations (e.g., ``(53,)``, ``(0, 2)``, ``(32, 55, 293)``, etc.) to predict using this model. These are used to slice ``r`` with ``entity_ids``. model : :obj:`mbgdml.models.gdmlModel` GDML model containing all information need to make predictions. periodic_cell : :obj:`mbgdml.periodic.Cell`, default: :obj:`None` Use periodic boundary conditions defined by this object. Returns ------- :obj:`float` Predicted :math:`n`-body energy. :obj:`numpy.ndarray` Predicted :math:`n`-body forces. """ assert R.ndim == 2 E = 0.0 F = np.zeros(R.shape) alchemy_scalers = kwargs.get("alchemy_scalers", None) compute_virial = kwargs.get("compute_virial", False) periodic = bool(periodic_cell) compute_virial = bool(periodic and compute_virial) if compute_virial: virial = np.zeros((3, 3), dtype=np.float64) # Getting all contributions for each molecule combination (comb). for entity_id_comb in entity_combs: # Gets indices of all atoms in the combination of molecules. # r_slice is a list of the atoms for the entity_id combination. r_slice = [] for entity_id in entity_id_comb: r_slice.extend(np.where(entity_ids == entity_id)[0]) z = Z[r_slice] r = R[r_slice] # Note: We store the original coordinates in case the virial is requested # If we are using a periodic cell we convert r_comp into coordinates # we can use in many-body expansions. if periodic: r = periodic_cell.r_mic(r) if r is None: # Any atomic pairwise distance was larger than cutoff. continue # Checks criteria cutoff if present and desired. if model.criteria is not None: accept_r, _ = model.criteria.accept(z, r) if not accept_r: # Do not include this contribution. continue # Predicts energies and forces. r_desc, r_d_desc = model.desc_func(r.flatten(), model.lat_and_inv) wkr_args = ( r_desc, r_d_desc, len(z), model.sig, model.n_perms, model.R_desc_perms, model.R_d_desc_alpha_perms, model.alphas_E_lin, ) out = _predict_gdml_wkr(*wkr_args) out *= model.stdev e = out[0] + model.integ_c f = out[1:].reshape((len(z), 3)) # Scale contribution if entity is included. if alchemy_scalers not in (None, []): for alchemy_scaler in alchemy_scalers: if alchemy_scaler.entity_id in entity_id_comb: e = alchemy_scaler.scale(e) f = alchemy_scaler.scale(f) # Adds contributions to total energy and forces. E += e F[r_slice] += f if compute_virial: virial += virial_atom_loop(r, f) if compute_virial: return E, F, virial return E, F
[docs]def predict_gdml_decomp(Z, R, entity_ids, entity_combs, model, **kwargs): r"""Predict all :math:`n`-body energies and forces of a single structure. Parameters ---------- Z : :obj:`numpy.ndarray`, ndim: ``1`` Atomic numbers of all atoms in ``r`` (in the same order). r : :obj:`numpy.ndarray`, ndim: ``2`` Cartesian coordinates of a single structure to predict. entity_ids : :obj:`numpy.ndarray`, ndim: ``1`` 1D array specifying which atoms belong to which entities. entity_combs : ``iterable`` Entity ID combinations (e.g., ``(53,)``, ``(0, 2)``, ``(32, 55, 293)``, etc.) to predict using this model. These are used to slice ``r`` with ``entity_ids``. model : :obj:`mbgdml.models.gdmlModel` GDML model containing all information need to make predictions. Returns ------- :obj:`numpy.ndarray`, ndim: ``1`` Energies of all possible :math:`n`-body structures. Some elements can be :obj:`numpy.nan` if they are beyond the criteria cutoff. :obj:`numpy.ndarray`, ndim: ``3`` Atomic forces of all possible :math:`n`-body structure. Some elements can be :obj:`numpy.nan` if they are beyond the criteria cutoff. :obj:`numpy.ndarray`, ndim: ``2`` All possible :math:`n`-body combinations of ``r`` (i.e., entity ID combinations). """ assert R.ndim == 2 if entity_combs.ndim == 1: n_atoms = np.count_nonzero(entity_ids == entity_combs[0]) else: n_atoms = 0 for i in entity_combs[0]: n_atoms += np.count_nonzero(entity_ids == i) E = np.empty(len(entity_combs), dtype=np.float64) F = np.empty((len(entity_combs), n_atoms, 3), dtype=np.float64) E[:] = np.nan F[:] = np.nan alchemy_scalers = kwargs.get("alchemy_scalers", None) # Getting all contributions for each molecule combination (comb). for i, entity_id_comb in enumerate(entity_combs): # Gets indices of all atoms in the combination of molecules. # r_slice is a list of the atoms for the entity_id combination. r_slice = [] for entity_id in entity_id_comb: r_slice.extend(np.where(entity_ids == entity_id)[0]) z_comp = Z[r_slice] r_comp = R[r_slice] # Checks criteria cutoff if present and desired. if model.criteria is not None: accept_r, _ = model.criteria.accept(z_comp, r_comp) if not accept_r: # Do not include this contribution. continue # Predicts energies and forces. r_desc, r_d_desc = model.desc_func(r_comp.flatten(), model.lat_and_inv) out = _predict_gdml_wkr( r_desc, r_d_desc, n_atoms, model.sig, model.n_perms, model.R_desc_perms, model.R_d_desc_alpha_perms, model.alphas_E_lin, ) out *= model.stdev E[i] = out[0] + model.integ_c F[i] = out[1:].reshape((n_atoms, 3)) # Scale contribution if entity is included. if alchemy_scalers not in (None, []): for alchemy_scaler in alchemy_scalers: if alchemy_scaler.entity_id in entity_id_comb: E[i] = alchemy_scaler.scale(E[i]) F[i] = alchemy_scaler.scale(F[i]) return E, F, entity_combs