Source code for mbgdml.models.gdml_model

# MIT License
#
# Copyright (c) 2018-2022 Stefan Chmiela, Gregory Fonseca
# Copyright (c) 2022-2023, Alex M. Maldonado
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import numpy as np
from .base import Model
from ..utils import md5_data
from .._gdml.desc import _from_r
from .. import __version__ as mbgdml_version
from ..logger import GDMLLogger

log = GDMLLogger(__name__)


# pylint: disable-next=invalid-name
[docs]class gdmlModel(Model): def __init__(self, model, comp_ids=None, criteria=None, for_predict=True): """ Parameters ---------- model : :obj:`str` or :obj:`dict` Path to GDML npz model or :obj:`dict`. comp_ids : ``iterable``, default: :obj:`None` Model component IDs that relate entity IDs of a structure to a fragment label. This overrides any model ``comp_ids`` if they are present. criteria : :obj:`mbgdml.descriptors.Criteria`, default: :obj:`None` Initialized descriptor criteria for accepting a structure based on a descriptor and cutoff. for_predict : :obj:`bool`, default: ``True`` Unpack the model for use in predictors. """ super().__init__(criteria) self.type = "gdml" self._load(model, comp_ids) if for_predict: self._unpack() def _load(self, model, comp_ids): if isinstance(model, str): log.debug("Loading model from %r", model) model = dict(np.load(model, allow_pickle=True)) elif isinstance(model, dict): log.debug("Loading model from dictionary") else: raise TypeError(f"{type(model)} is not string or dict") self.model_dict = model if "z" in model: self.Z = model["z"] elif "Z" in model: self.Z = model["Z"] else: raise KeyError("z or Z not found in model") self.r_unit = model["r_unit"].item() if "entity_ids" in model.keys(): self.entity_ids = model["entity_ids"] else: self.entity_ids = None if comp_ids is not None: self.comp_ids = comp_ids else: self.comp_ids = model["comp_ids"] self.nbody_order = len(self.comp_ids) def _unpack(self): model = self.model_dict self.sig = model["sig"] self.n_atoms = self.Z.shape[0] self.desc_func = _from_r self.lat_and_inv = ( (model["lattice"], np.linalg.inv(model["lattice"])) if "lattice" in model else None ) self.n_train = model["R_desc"].shape[1] self.stdev = model["std"] if "std" in model else 1.0 self.integ_c = model["c"] self.n_perms = model["perms"].shape[0] self.tril_perms_lin = model["tril_perms_lin"] self.use_torch = False self.R_desc_perms = ( np.tile(model["R_desc"].T, self.n_perms)[:, self.tril_perms_lin] .reshape(self.n_train, self.n_perms, -1, order="F") .reshape(self.n_train * self.n_perms, -1) ) self.R_d_desc_alpha_perms = ( np.tile(model["R_d_desc_alpha"], self.n_perms)[:, self.tril_perms_lin] .reshape(self.n_train, self.n_perms, -1, order="F") .reshape(self.n_train * self.n_perms, -1) ) if "alphas_E" in model.keys(): self.alphas_E_lin = np.tile( model["alphas_E"][:, None], (1, self.n_perms) ).ravel() else: self.alphas_E_lin = None @property def code_version(self): r"""mbGDML version used to train the model. Raises ------ AttributeError If accessing information when no model is loaded. """ if hasattr(self, "model_dict"): return self.model_dict["code_version"].item() raise AttributeError("No model is loaded.") @property def md5(self): r"""Unique MD5 hash of model. :type: :obj:`str` """ md5_properties_all = [ "md5_train", "Z", "R_desc", "R_d_desc_alpha", "sig", "alphas_F", ] md5_keys = [] md5_dict = {} for key in md5_properties_all: if hasattr(self, key): md5_keys.append(key) md5_dict[key] = getattr(self, key) md5_string = md5_data(md5_dict, md5_keys) return md5_string
[docs] def add_modifications(self, dset): r"""mbGDML-specific modifications of models. Transfers information from data set to model. Parameters ---------- dset : :obj:`dict` Dictionary of a mbGDML data set. """ dset_keys = ["entity_ids", "comp_ids"] for key in dset_keys: self.model_dict[key] = dset[key] self.model_dict["code_version"] = np.array(mbgdml_version) self.model_dict["md5"] = np.array(self.md5)
[docs] def save(self, path): r"""Save model dict as npz file. Changes in model attributions are not considered. Parameters ---------- path : :obj:`str` Path to save the npz file. """ np.savez_compressed(path, **self.model_dict)