# MIT License
#
# Copyright (c) 2022-2023, Alex M. Maldonado
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
"""Many-body utilities."""
import itertools
import math
import numpy as np
import ray
from .utils import chunk_array, chunk_iterable, gen_combs
from .stress import to_voigt, virial_finite_diff
from .logger import GDMLLogger
log = GDMLLogger(__name__)
[docs]def gen_r_entity_combs(r_prov_spec, entity_ids_lower):
r"""Generate ``entity_id`` combinations of a specific size for a single
structure.
``r_prov_spec[2:]`` provides ``entity_ids`` of a reference structure.
For example, consider ``r_prov_spec = [0, 0, 34, 5, 2]``.
This tells us that the structure contains three entities, and the
``entity_ids`` of each are ``34``, ``5``, and ``2`` in the structure
provenance.
``gen_r_entity_combs`` generates all combinations (without replacement)
based on the number of entities in ``entity_ids_lower``.
If ``entity_ids_lower`` is ``[0, 0, 0, 1, 1, 1]`` (i.e., number of
lower-order entities is 2), then ``gen_r_entity_combs`` produces all 3
choose 2 combinations.
Parameters
----------
r_prov_spec : :obj:`numpy.ndarray`, ndim: ``1``
Structure provenance specifications of a single structure.
entity_ids_lower : :obj:`numpy.ndarray`, ndim: ``1``
A uniquely identifying integer specifying what atoms belong to
which entities for lower-order structures. Entities can be a related
set of atoms, molecules, or functional group. For example, a water and
methanol molecule could be ``[0, 0, 0, 1, 1, 1, 1, 1, 1]``.
Yield
-----
:obj:`tuple`
Entity combinations of a single structure.
"""
n_entities_lower = len(set(entity_ids_lower))
entity_combs = itertools.combinations(r_prov_spec[2:], n_entities_lower)
for comb in entity_combs:
yield comb
[docs]def mbe_worker(
r_shape,
entity_ids,
r_prov_specs,
r_idxs,
E_lower,
Deriv_lower,
entity_ids_lower,
r_prov_specs_lower,
):
r"""Worker for computing many-body contributions.
This does not take into account if many-body contributions are being
``'added'`` or ``'removed'``. This just sums up all possible contributions.
Parameters
----------
r_shape : :obj:`tuple`, ndim: ``1``
Shape of a single structure. For example, ``(n, 3)`` where ``n`` is the
number of atoms.
entity_ids : :obj:`numpy.ndarray`, ndim: ``1``
A uniquely identifying integer specifying what atoms belong to
which entities for lower-order structures. Entities can be a related
set of atoms, molecules, or functional group. For example, a water and
methanol molecule could be ``[0, 0, 0, 1, 1, 1, 1, 1, 1]``.
r_prov_specs : :obj:`numpy.ndarray`, ndim: ``2``
Structure provenance IDs. This specifies the ``r_prov_id``, structure
index from the ``r_prov_id`` source, and ``entity_ids`` making up
the structure.
r_idxs : :obj:`list`, ndim: ``1``
Structure indices of the original ``E`` and ``Deriv`` arrays for this
worker.
E_lower : :obj:`numpy.ndarray`, ndim: ``1``
Energies of lower-order structures.
Deriv_lower : :obj:`numpy.ndarray`, ndim: ``3``
Potential energy surface derivatives (i.e., gradients or forces
depending on the sign) of lower-order structures.
entity_ids_lower : :obj:`numpy.ndarray`, ndim: ``1``
A uniquely identifying integer specifying what atoms belong to
which entities for lower-order structures. Entities can be a related
set of atoms, molecules, or functional group. For example, a water and
methanol molecule could be ``[0, 0, 0, 1, 1, 1, 1, 1, 1]``.
r_prov_specs_lower : :obj:`numpy.ndarray`, ndim: ``2``
Structure provenance IDs. This specifies the ``r_prov_id``, structure
index from the ``r_prov_id`` source, and ``entity_ids`` making up
lower-order structures.
Returns
-------
:obj:`list`
Structure indices of the original ``E`` and ``Deriv`` arrays for this
worker.
:obj:`numpy.ndarray`
Many-body energy contributions for the worker structure indices.
:obj:`numpy.ndarray`
Many-body derivative (i.e., gradients or forces depending on the sign)
contributions for the worker structure indices.
"""
log.debug("MBE contributions")
log.debug("Parent size %i", np.unique(entity_ids).shape[0])
E = np.zeros(len(r_idxs))
Deriv = np.zeros((len(r_idxs), *r_shape))
# Loop through every structure in the reference data set.
for i, i_r in enumerate(r_idxs):
# We have to match the molecule information for each reference
# structure to the molecules in this lower-order structure to remove
# the right contribution.
r_prov_spec = r_prov_specs[i_r] # r_prov_specs of this structure.
log.debug("Supersystem structure r_prov_spec: %r", r_prov_spec.tolist())
# Loop through every molecular combination.
for entity_comb in gen_r_entity_combs(r_prov_spec, entity_ids_lower):
# r_prov_spec of the lower-order structure.
# Combines [r_prov_id, r_idx] with the entity combination.
r_prov_spec_lower_comb = np.block([r_prov_spec[:2], np.array(entity_comb)])
log.debug("Fragment r_prov_spec: %r", r_prov_spec_lower_comb.tolist())
# Index of the structure in the lower data set.
try:
log.debug("Searching for fragment's r_prov_spec")
i_r_lower = np.where(
np.all(r_prov_specs_lower == r_prov_spec_lower_comb, axis=1)
)[0][0]
log.debug("Found fragment index: %d", i_r_lower)
except IndexError as e:
log.debug("Could not find fragment. This can be expected sometimes.")
if "for axis 0 with size 0" in str(e):
continue
raise
deriv_lower = Deriv_lower[i_r_lower]
# Adding or removing energy contributions.
E[i] += E_lower[i_r_lower]
log.debug("Fragment energy: %r", E_lower[i_r_lower])
# Adding or removing derivative contributions.
# We have to determine the corresponding atom indices of both the
# reference and lower-order structure.
# This is done by matching the entity_id_prov between the two r_prov_specs.
# The position of entity_id_prov in r_prov_specs (with r_prov_id and
# r_idx removed) specifies the entity_id of the
# entity_id_prov: the original entity_id of the structure provenance.
# entity_id: the entity_id in the reference structure.
# entity_id_lower: the entity_id in the lower-order structure.
# i_z: atom indices of the reference structure to add or remove
# lower-order structure.
# i_z_lower: atom indices of lower-order contribution.
for entity_id_prov in r_prov_spec_lower_comb[2:]:
entity_id = np.where(r_prov_spec[2:] == entity_id_prov)[0][0]
i_z = np.where(entity_ids == entity_id)[0]
log.debug("atom slice : %r", i_z.tolist())
entity_id_lower = np.where(
r_prov_spec_lower_comb[2:] == entity_id_prov
)[0][0]
i_z_lower = np.where(entity_ids_lower == entity_id_lower)[0]
Deriv[i][i_z] += deriv_lower[i_z_lower]
log.debug("Fragment successfully accounted for")
return r_idxs, E, Deriv
[docs]def gen_r_idxs_worker(r_prov_specs, r_prov_ids_lower, n_workers):
r"""Generates the assigned structures for each worker.
Parameters
----------
r_prov_specs : :obj:`numpy.ndarray`, ndim: ``2``
Structure provenance IDs. This specifies the ``r_prov_id``, structure
index from the ``r_prov_id`` source, and ``entity_ids`` making up
the structure.
r_prov_ids_lower : :obj:`dict` {:obj:`int`: :obj:`str`}
Species an ID (:obj:`int`) to uniquely identifying labels for each
lower-order structure if it originated from another source. Labels
should always be ``md5_structures``. For example,
``{0: '6038e101da7fc0085978741832ebc7ad',
1: 'eeaf93dec698de3ecb55e9292bd9dfcb'}``.
n_workers : :obj:`int`
Number of parallel workers. Can range from ``1`` to the number of CPUs
available.
Yields
------
:obj:`numpy.ndarray`
Structure indices for the worker.
"""
# Select all structures that we can remove contributions based on
# r_prov_ids in the lower contributions.
r_idxs = []
for r_prov_id_lower in r_prov_ids_lower.keys():
r_idxs.extend(np.where(r_prov_specs[:, 0] == r_prov_id_lower)[0].tolist())
task_size = math.ceil(len(r_idxs) / n_workers)
for i in range(0, len(r_idxs), task_size):
r_idxs_worker = r_idxs[i : i + task_size]
yield r_idxs_worker
# pylint: disable=too-many-branches, too-many-statements
[docs]def mbe_contrib(
E,
Deriv,
entity_ids,
r_prov_ids,
r_prov_specs,
E_lower,
Deriv_lower,
entity_ids_lower,
r_prov_ids_lower,
r_prov_specs_lower,
operation="remove",
use_ray=False,
ray_address="auto",
n_workers=1,
):
r"""Adds or removes energy and derivative (i.e., gradients or forces)
contributions from a reference.
We use the term "lower" to refer to the lower-order (i.e., smaller) systems.
These are the energy and derivative contributions to remove or add to a
reference.
Making :math:`n`-body predictions (i.e., ``operation = "add"``) will often
not have ``r_prov_ids`` or ``r_prov_specs`` as all lower contributions are
derived exclusively from these structures. Use :obj:`None` for both of these
and this function will assume that all ``_lower`` properties apply.
Parameters
----------
E : :obj:`numpy.ndarray`, ndim: ``1``
Energies to add or remove contributions from (i.e., reference).
Deriv : :obj:`numpy.ndarray`, ndim: ``3``
Potential energy surface derivatives (i.e., gradients or forces
depending on the sign) of reference structures.
entity_ids : :obj:`numpy.ndarray`, ndim: ``1``
A uniquely identifying integer specifying what atoms belong to
which entities for reference structures. Entities can be a related
set of atoms, molecules, or functional group. For example, a water and
methanol molecule could be ``[0, 0, 0, 1, 1, 1, 1, 1, 1]``.
r_prov_ids : :obj:`dict` {:obj:`int`: :obj:`str`} or :obj:`None`
Species an ID (:obj:`int`) to uniquely identifying labels for each
structure if it originated from another source. Labels should
always be ``md5_structures``. For example,
``{0: '6038e101da7fc0085978741832ebc7ad', 1:
'eeaf93dec698de3ecb55e9292bd9dfcb'}``.
r_prov_specs : :obj:`numpy.ndarray`, ndim: ``2`` or :obj:`None`
Structure provenance IDs. This specifies the ``r_prov_id``, structure
index from the ``r_prov_id`` source, and ``entity_ids`` making up
the structure.
E_lower : :obj:`numpy.ndarray`, ndim: ``1``
Lower-order energies.
Deriv_lower : :obj:`numpy.ndarray`, ndim: ``3``
Potential energy surface derivatives (i.e., gradients or forces
depending on the sign) of lower-order structures.
entity_ids_lower : :obj:`numpy.ndarray`, ndim: ``1``
A uniquely identifying integer specifying what atoms belong to
which entities for lower-order structures. Entities can be a related
set of atoms, molecules, or functional group. For example, a water and
methanol molecule could be ``[0, 0, 0, 1, 1, 1, 1, 1, 1]``.
r_prov_ids_lower : :obj:`dict` {:obj:`int`: :obj:`str`}
Species an ID (:obj:`int`) to uniquely identifying labels for each
lower-order structure if it originated from another source. Labels
should always be ``md5_structures``. For example,
``{0: '6038e101da7fc0085978741832ebc7ad',
1: 'eeaf93dec698de3ecb55e9292bd9dfcb'}``.
r_prov_specs_lower : :obj:`numpy.ndarray`, ndim: ``2``
Structure provenance IDs. This specifies the ``r_prov_id``, structure
index from the ``r_prov_id`` source, and ``entity_ids`` making up
lower-order structures.
operation : :obj:`str`, default: ``'remove'``
``'add'`` or ``'remove'`` the contributions.
use_ray : :obj:`bool`, default: ``False``
Use `ray <https://docs.ray.io/en/latest/>`__ to parallelize
computations.
n_workers : :obj:`int`, default: ``1``
Total number of workers available for ray. This is ignored if ``use_ray``
is ``False``.
ray_address : :obj:`str`, default: ``"auto"``
Ray cluster address to connect to.
Returns
-------
:obj:`numpy.ndarray`
Energies with lower-order contributions added or removed.
:obj:`numpy.ndarray`
Derivatives with lower-order contributions added or removed.
"""
if use_ray:
if not ray.is_initialized():
log.debug("ray is not initialized")
# Try to connect to already running ray service (from ray cli).
try:
log.debug("Trying to connect to ray at address %r", ray_address)
ray.init(address=ray_address)
except ConnectionError:
log.debug("Failed to connect to ray at %r", ray_address)
log.debug("Trying to initialize ray with %d cores", n_workers)
ray.init(num_cpus=n_workers)
log.debug("Successfully initialized ray")
else:
log.debug("Ray was already initialized")
else:
log.debug("Not using ray")
log.debug("Computing many-body expansion contributions")
if operation not in ("add", "remove"):
raise ValueError(f'{operation} is not "add" or "remove"')
log.debug("MBE operation : %s", operation)
# Checks that the r_prov_md5 hashes match the same r_prov_id
if (r_prov_ids is not None) and (r_prov_ids != {}):
log.debug("Checking if r_prov_ids match")
for r_prov_id_lower, r_prov_md5_lower in r_prov_ids_lower.items():
assert r_prov_md5_lower == r_prov_ids[r_prov_id_lower]
log.debug("All r_prov_ids match")
# Assume that all lower models apply.
else:
log.debug("No r_prov_ids exist")
assert r_prov_specs is None or r_prov_specs.shape == (1, 0)
assert len(set(r_prov_specs_lower[:, 0])) == 1
n_r = len(E)
n_entities = len(set(entity_ids))
r_prov_specs = np.empty((n_r, 2 + n_entities), dtype=int)
r_prov_specs[:, 0] = r_prov_specs_lower[0][0]
r_prov_specs[:, 1] = np.array(list(range(n_r)))
for entity_id in range(n_entities):
r_prov_specs[:, entity_id + 2] = entity_id
r_shape = Deriv.shape[1:]
log.debug("Shape of example structure: %r", r_shape)
if not use_ray:
log.debug("use_ray is False (using serial operation)")
r_idxs = next(gen_r_idxs_worker(r_prov_specs, r_prov_ids_lower, 1))
r_idxs_worker, E_worker, Deriv_worker = mbe_worker(
r_shape,
entity_ids,
r_prov_specs,
r_idxs,
E_lower,
Deriv_lower,
entity_ids_lower,
r_prov_specs_lower,
)
if operation == "remove":
E[r_idxs_worker] -= E_worker
Deriv[r_idxs_worker] -= Deriv_worker
elif operation == "add":
E[r_idxs_worker] += E_worker
Deriv[r_idxs_worker] += Deriv_worker
else:
log.debug("use_ray is True")
if not ray.is_initialized():
log.debug("ray is not initialized")
# Try to connect to already running ray service (from ray cli).
try:
log.debug("Trying to connect to ray at address %r", ray_address)
ray.init(address=ray_address)
except ConnectionError:
ray.init(num_cpus=n_workers)
# Generate all workers.
worker = ray.remote(mbe_worker)
worker_list = []
for r_idxs in gen_r_idxs_worker(r_prov_specs, r_prov_ids_lower, n_workers):
worker_list.append(
worker.options(num_cpus=1).remote(
r_shape,
entity_ids,
r_prov_specs,
r_idxs,
E_lower,
Deriv_lower,
entity_ids_lower,
r_prov_specs_lower,
)
)
# Run all workers.
while len(worker_list) != 0:
done_id, worker_list = ray.wait(worker_list)
r_idxs_worker, E_worker, Deriv_worker = ray.get(done_id)[0]
log.debug("Worker %r has finished", done_id)
if operation == "remove":
E[r_idxs_worker] -= E_worker
Deriv[r_idxs_worker] -= Deriv_worker
elif operation == "add":
E[r_idxs_worker] += E_worker
Deriv[r_idxs_worker] += Deriv_worker
return E, Deriv
[docs]def decomp_to_total(
E_nbody,
F_nbody,
entity_ids,
entity_combs,
use_ray=False,
n_workers=1,
ray_address="auto",
):
r"""Sum decomposed :math:`n`-body energies and forces for total
:math:`n`-body contribution.
This is a wrapper around :func:`mbgdml.mbe.mbe_contrib`.
Parameters
----------
E_nbody : :obj:`numpy.ndarray`, ndim: ``1``
All :math:`n`-body energies. :obj:`numpy.nan` values are allowed for
structures that were beyond the descriptor cutoff.
F_nbody : :obj:`numpy.ndarray`, ndim: ``3``
All :math:`n`-body energies. :obj:`numpy.nan` values are allowed for
structures that were beyond the descriptor cutoff.
entity_ids : :obj:`numpy.ndarray`, ndim: ``1``
Integers specifying which atoms belong to which entities for the
supersystem (not the :math:`n`-body structure).
entity_combs : :obj:`numpy.ndarray`, ndim: ``2``
Structure indices and Entity IDs of all structures in increasing
order of :math:`n`. Column 0 is the structure index and
the remaining columns are entity IDs.
use_ray : :obj:`bool`, default: ``False``
Use `ray <https://docs.ray.io/en/latest/>`__ to parallelize
computations.
n_workers : :obj:`int`, default: ``1``
Total number of workers available for ray. This is ignored if ``use_ray``
is ``False``.
ray_address : :obj:`str`, default: ``"auto"``
Ray cluster address to connect to.
Returns
-------
:obj:`numpy.ndarray`, ndim: ``1``
Total :math:`n`-body energies.
"""
# Allocating arrays
n_atoms = len(entity_ids)
n_r = len(np.unique(entity_combs[:, 0]))
E = np.zeros((n_r,))
F = np.zeros((n_r, n_atoms, 3))
entity_ids_lower = []
for i in range(len(entity_combs[0][1:])):
entity_ids_lower.extend([i for j in range(np.count_nonzero(entity_ids == i))])
entity_ids_lower = np.array(entity_ids_lower)
r_prov_specs_lower = np.zeros((len(entity_combs), 1))
r_prov_specs_lower = np.hstack((r_prov_specs_lower, entity_combs))
E_nbody = np.nan_to_num(E_nbody, copy=False, nan=0.0)
F_nbody = np.nan_to_num(F_nbody, copy=False, nan=0.0)
E, F = mbe_contrib(
E,
F,
entity_ids,
None,
None,
E_nbody,
F_nbody,
entity_ids_lower,
{0: ""},
r_prov_specs_lower,
operation="add",
use_ray=use_ray,
n_workers=n_workers,
ray_address=ray_address,
)
return E, F
[docs]class mbePredict:
r"""Predict energies and forces of structures using machine learning
many-body models.
This can be parallelized with ray but needs to be initialized with
the ray cli or a script using this class. Note that initializing ray tasks
comes with some overhead and can make smaller computations much slower.
Only GDML models can run in parallel.
"""
def __init__(
self,
models,
predict_model,
use_ray=False,
n_workers=1,
ray_address="auto",
wkr_chunk_size=None,
alchemy_scalers=None,
periodic_cell=None,
compute_stress=False,
):
r"""
Parameters
----------
models : :obj:`list` of :obj:`mbgdml.models.Model`
Machine learning model objects that contain all information to make
predictions using ``predict_model``.
predict_model : ``callable``
A function that takes ``Z``, ``R``, ``entity_ids``, ``nbody_gen``, ``model``
and computes energies and forces. This will be turned into a ray remote
function if ``use_ray`` is ``True``. This can return total properties
or all individual :math:`n`-body energies and forces.
use_ray : :obj:`bool`, default: ``False``
Use `ray <https://docs.ray.io/en/latest/>`__ to parallelize
computations.
n_workers : :obj:`int`, default: ``1``
Total number of workers available for ray. This is ignored if ``use_ray``
is ``False``.
ray_address : :obj:`str`, default: ``"auto"``
Ray cluster address to connect to.
wkr_chunk_size : :obj:`int`, default: :obj:`None`
Number of :math:`n`-body structures to assign to each spawned
worker with ray. If :obj:`None`, it will divide up the number of
predictions by ``n_workers``.
alchemical_scaling : :obj:`list` of :class:`~mbgdml.alchemy.mbeAlchemyScale`, \
default: :obj:`None`
Alchemical scaling of :math:`n`-body interactions of entities.
.. warning::
This has not been thoroughly tested.
periodic_cell : :obj:`mbgdml.periodic.Cell`, default: :obj:`None`
Use periodic boundary conditions defined by this object. If this
is not :obj:`None` only :meth:`~mbgdml.mbe.mbePredict.predict` can be used.
compute_stress : :obj:`bool`, default: ``False``
.. danger::
This implementation is experimental and has not been verified. We do
not recommend using this.
Compute the internal virial contribution,
:math:`\mathbf{W} \left( \mathbf{r}^N \right) / V`, of :math:`N`
particles at positions :math:`\mathbf{r}` to the pressure stress tensor of
a periodic box with volume :math:`V`. The kinetic contribution is not
computed here.
"""
self.models = models
self.predict_model = predict_model
if models[0].type == "gap":
assert not use_ray
elif models[0].type == "schnet":
assert not use_ray
self.use_ray = use_ray # Do early because some setters are dependent on this.
self.n_workers = n_workers
self.wkr_chunk_size = wkr_chunk_size
if alchemy_scalers is None:
alchemy_scalers = []
self.alchemy_scalers = alchemy_scalers
self.periodic_cell = periodic_cell
self.compute_stress = compute_stress
self.virial_form = "group"
r"""The form to use from
`10.1063/1.3245303 <https://doi.org/10.1063/1.3245303>`__ to compute the
internal virial contribution to the pressure stress tensor.
:math:`\mathbf{W} \left( \mathbf{r}^N \right)` is the internal virial of
:math:`N` atoms and :math:`\otimes` is the outer tensor product (i.e.,
``np.multiply.outer``).
``group`` (**default**)
This computes the virial by considering contributions based on groups
of atoms. The number of groups, number of atoms in each group, and
number of groups is completely arbitrary. Thus, we can straightforwardly
use :math:`n`-body combinations as groups and get more insight into
the virial contributions origin.
.. math::
\mathbf{W} \left( \mathbf{r}^N \right) =
\sum_{k \in \mathbf{0}} \sum_{w = 1}^{N_k} \mathbf{r}_w^k
\otimes \mathbf{F}_w^k.
Here, :math:`k` is a group of atoms that must be in the local cell
(i.e., :math:`k \in \mathbf{0}`). :math:`w` is the atom index within
group :math:`k` (:math:`N_k` is the total number of atoms in the group).
.. important::
This has to be specifically implemented in the relevant predictor
functions.
``finite_diff``
Uses finite differences by perturbing the cell vectors.
:type: :obj:`str`
"""
self.finite_diff_dh = 1e-4
r"""Forward and backward displacement of the cell vectors for finite
differences.
Default: ``1e-4``
:type: :obj:`float`
"""
self.use_voigt = False
r"""Convert the stress tensor to the 1D Voigt notation (xx, yy, zz, yz, xz, xy).
Default: ``False``
:type: :obj:`bool`
"""
self.only_normal_stress = False
r"""Only compute normal (xx, yy, and zz) stress. All other elements will be
zero. This is recommended for MD simulations to avoid altering box angular
momentum due to the antisymmetric contributions (yz, xz, and xy).
Default: ``False``
:type: :obj:`bool`
"""
self.box_scaling_type = "anisotropic"
r"""Treatment of box scaling when
:attr:`mbgdml.mbe.mbePredict.only_normal_stress` is ``True``.
``anisotropic`` (**default**)
No modifications are made to normal stresses which allows box vectors
to scale independently.
``isotropic``
Average the normal stresses so box vectors will scale identically.
:type: :obj:`str`
"""
if use_ray:
if not ray.is_initialized():
log.debug("ray is not initialized")
# Try to connect to already running ray service (from ray cli).
try:
log.debug("Trying to connect to ray at address %r", ray_address)
ray.init(address=ray_address)
except ConnectionError:
log.debug("Failed to connect to ray at %r", ray_address)
log.debug("Trying to initialize ray with %d cores", n_workers)
ray.init(num_cpus=n_workers)
log.debug("Successfully initialized ray")
else:
log.debug("Ray was already initialized")
self.models = [ray.put(model) for model in models]
# if alchemy_scalers is not None:
# self.alchemy_scalers = [
# ray.put(scaler) for scaler in alchemy_scalers
# ]
self.predict_model = ray.remote(predict_model)
@property
def periodic_cell(self):
r"""Periodic cell for MBE predictions.
:type: :obj:`mbgdml.periodic.Cell`
"""
if hasattr(self, "_periodic_cell"):
return self._periodic_cell
return None
@periodic_cell.setter
def periodic_cell(self, var):
if var is not None:
if self.use_ray:
var = ray.put(var)
self._periodic_cell = var
[docs] def get_avail_entities(self, comp_ids_r, comp_ids_model):
r"""Determines available ``entity_ids`` for each ``comp_id`` in a
structure.
Parameters
----------
comp_ids_r : :obj:`numpy.ndarray`, ndim: ``1``
Component IDs of the structure to predict.
comp_ids_model : :obj:`numpy.ndarray`, ndim: ``1``
Component IDs of the model.
Returns
-------
:obj:`list`
(length: ``len(comp_ids_r)``)
"""
# Models have a specific entity order that needs to be conserved in the
# predictions. Here, we create a ``avail_entity_ids`` list where each item
# is a list of all entities in ``r`` that match the model entity.
avail_entity_ids = []
for comp_id in comp_ids_model:
matching_entity_ids = np.where(comp_id == comp_ids_r)[0]
avail_entity_ids.append(matching_entity_ids)
return avail_entity_ids
# pylint: disable=too-many-branches, too-many-statements
[docs] def compute_nbody(self, Z, R, entity_ids, comp_ids, model):
r"""Compute all :math:`n`-body contributions of a single structure
using a :obj:`mbgdml.models.Model` object.
When ``use_ray = True``, this acts as a driver that spawns ray tasks of
the ``predict_model`` function.
Parameters
----------
Z : :obj:`numpy.ndarray`, ndim: ``1``
Atomic numbers of all atoms in the system with respect to ``r``.
R : :obj:`numpy.ndarray`, shape: ``(len(Z), 3)``
Cartesian coordinates of a single structure.
entity_ids : :obj:`numpy.ndarray`, shape: ``(len(Z),)``
Integers specifying which atoms belong to which entities.
comp_ids : :obj:`numpy.ndarray`, shape: ``(len(entity_ids),)``
Relates each ``entity_id`` to a fragment label. Each item's index
is the label's ``entity_id``.
model : :obj:`mbgdml.models.Model`
Model that contains all information needed by ``model_predict``.
Returns
-------
:obj:`float`
Total :math:`n`-body energy of ``r``.
:obj:`numpy.ndarray`
(shape: ``(len(Z), 3)``) - Total :math:`n`-body atomic forces of ``r``.
:obj:`numpy.ndarray`
(optional, shape: ``(len(Z), 3)``) - The internal virial
contribution to the pressure stress tensor in units of energy.
"""
# Unify r shape.
if R.ndim == 3:
log.debug("R has three dimensions (instead of two)")
if R.shape[0] == 1:
log.debug("R[0] was selected to proceed")
R = R[0]
else:
raise ValueError("R.ndim is not 2 (only one structure is allowed)")
# Creates a generator for all possible n-body combinations regardless
# of cutoffs.
if self.use_ray:
model_comp_ids = ray.get(model).comp_ids
else:
model_comp_ids = model.comp_ids
avail_entity_ids = self.get_avail_entities(comp_ids, model_comp_ids)
log.debug("Available entity IDs: %r", avail_entity_ids)
nbody_gen = gen_combs(avail_entity_ids)
kwargs_pred = {}
if self.alchemy_scalers is not None:
nbody_order = len(model_comp_ids)
kwargs_pred["alchemy_scalers"] = [
alchemy_scaler
for alchemy_scaler in self.alchemy_scalers
if alchemy_scaler.order == nbody_order
]
periodic_cell = self.periodic_cell
compute_stress = (
self.compute_stress and bool(periodic_cell) and self.virial_form == "group"
)
if compute_stress:
virial = np.zeros((3, 3), dtype=np.float64)
kwargs_pred["compute_virial"] = True
# Runs the predict_model function to calculate all n-body energies
# with this model.
if not self.use_ray:
E, F, *virial_model = self.predict_model(
Z, R, entity_ids, nbody_gen, model, periodic_cell, **kwargs_pred
)
if compute_stress:
virial += virial_model[0]
else:
E = 0.0
F = np.zeros(R.shape, dtype=np.float64)
# Put all common data into the ray object store.
Z = ray.put(Z)
R = ray.put(R)
entity_ids = ray.put(entity_ids)
nbody_gen = tuple(nbody_gen)
if self.wkr_chunk_size is None:
wkr_chunk_size = math.ceil(len(nbody_gen) / self.n_workers)
else:
wkr_chunk_size = self.wkr_chunk_size
nbody_chunker = chunk_iterable(nbody_gen, wkr_chunk_size)
workers = []
# Initialize workers
predict_model = self.predict_model
def add_worker(workers, chunk):
workers.append(
predict_model.remote(
Z,
R,
entity_ids,
chunk,
model,
periodic_cell,
**kwargs_pred,
)
)
for _ in range(self.n_workers):
try:
chunk = next(nbody_chunker)
add_worker(workers, chunk)
except StopIteration:
break
# Start workers and collect results.
while len(workers) != 0:
done_id, workers = ray.wait(workers)
E_worker, F_worker, *virial_model = ray.get(done_id)[0]
E += E_worker
F += F_worker
if compute_stress:
virial += virial_model[0]
try:
chunk = next(nbody_chunker)
add_worker(workers, chunk)
except StopIteration:
pass
# Cleanup object store
del Z, entity_ids
if compute_stress:
return E, F, virial
return E, F
# pylint: disable=too-many-branches, too-many-statements
[docs] def compute_nbody_decomp(self, Z, R, entity_ids, comp_ids, model):
r"""Compute all :math:`n`-body contributions of a single structure
using a :obj:`mbgdml.models.Model` object.
Stores all individual entity ID combinations, energies and forces.
This is more memory intensive. Structures that fall outside the
descriptor cutoff will have :obj:`numpy.nan` as their energy and forces.
When ``use_ray = True``, this acts as a driver that spawns ray tasks of
the ``predict_model`` function.
Parameters
----------
Z : :obj:`numpy.ndarray`, ndim: ``1``
Atomic numbers of all atoms in the system with respect to ``r``.
r : :obj:`numpy.ndarray`, shape: ``(len(Z), 3)``
Cartesian coordinates of a single structure.
entity_ids : :obj:`numpy.ndarray`, shape: ``(len(Z),)``
Integers specifying which atoms belong to which entities.
comp_ids : :obj:`numpy.ndarray`, shape: ``(len(entity_ids),)``
Relates each ``entity_id`` to a fragment label. Each item's index
is the label's ``entity_id``.
model : :obj:`mbgdml.models.Model`
Model that contains all information needed by ``model_predict``.
Returns
-------
:obj:`numpy.ndarray`
(ndim: ``1``) -
Energies of all possible :math:`n`-body structures. Some elements
can be :obj:`numpy.nan` if they are beyond the descriptor cutoff.
:obj:`numpy.ndarray`
(ndim: ``3``) -
Atomic forces of all possible :math:`n`-body structure. Some
elements can be :obj:`numpy.nan` if they are beyond the descriptor
cutoff.
:obj:`numpy.ndarray`
(ndim: ``2``) - All possible entity IDs.
"""
# Unify r shape.
if R.ndim == 3:
log.debug("R has three dimensions (instead of two)")
if R.shape[0] == 1:
log.debug("R[0] was selected to proceed")
R = R[0]
else:
raise ValueError("R.ndim is not 2 (only one structure is allowed)")
# Creates a generator for all possible n-body combinations regardless
# of cutoffs.
if self.use_ray:
model_comp_ids = ray.get(model).comp_ids
else:
model_comp_ids = model.comp_ids
avail_entity_ids = self.get_avail_entities(comp_ids, model_comp_ids)
nbody_gen = gen_combs(avail_entity_ids)
# Explicitly evaluate n-body generator.
comb0 = next(nbody_gen)
n_entities = len(comb0)
entity_combs = np.fromiter(itertools.chain.from_iterable(nbody_gen), np.int64)
# Reshape and add initial combination back.
if len(comb0) == 1:
entity_combs.shape = len(entity_combs), 1
else:
entity_combs.shape = int(len(entity_combs) / n_entities), n_entities
entity_combs = np.vstack((np.array(comb0), entity_combs))
# Runs the predict_model function to calculate all n-body energies
# with this model.
if not self.use_ray:
E, F, entity_combs = self.predict_model(
Z, R, entity_ids, entity_combs, model
)
else:
if entity_combs.ndim == 1:
n_atoms = np.count_nonzero(entity_ids == entity_combs[0])
else:
n_atoms = 0
for i in entity_combs[0]:
n_atoms += np.count_nonzero(entity_ids == i)
# Put all common data into the ray object store.
Z = ray.put(Z)
R = ray.put(R)
entity_ids = ray.put(entity_ids)
# Allocate memory for energies and forces.
E = np.empty(len(entity_combs), dtype=np.float64)
F = np.empty((len(entity_combs), n_atoms, 3), dtype=np.float64)
E[:] = np.nan
F[:] = np.nan
if self.wkr_chunk_size is None:
wkr_chunk_size = math.ceil(len(entity_combs) / self.n_workers)
else:
wkr_chunk_size = self.wkr_chunk_size
nbody_chunker = chunk_array(entity_combs, wkr_chunk_size)
workers = []
# Initialize workers
predict_model = self.predict_model
def add_worker(workers, chunk):
workers.append(predict_model.remote(Z, R, entity_ids, chunk, model))
for _ in range(self.n_workers):
try:
chunk = next(nbody_chunker)
add_worker(workers, chunk)
except StopIteration:
break
# Start workers and collect results.
while len(workers) != 0:
done_id, workers = ray.wait(workers)
E_worker, F_worker, entity_combs_wkr = ray.get(done_id)[0]
i_start = np.where((entity_combs_wkr[0] == entity_combs).all(1))[0][0]
i_end = i_start + len(entity_combs_wkr)
E[i_start:i_end] = E_worker
F[i_start:i_end] = F_worker
try:
chunk = next(nbody_chunker)
add_worker(workers, chunk)
except StopIteration:
pass
return E, F, entity_combs
[docs] def predict(self, Z, R, entity_ids, comp_ids):
r"""Predict the energies and forces of one or multiple structures.
Parameters
----------
Z : :obj:`numpy.ndarray`, ndim: ``1``
Atomic numbers of all atoms in the system with respect to ``R``.
R : :obj:`numpy.ndarray`, shape: ``(N, len(Z), 3)``
Cartesian coordinates of ``N`` structures to predict.
entity_ids : :obj:`numpy.ndarray`, shape: ``(N,)``
Integers specifying which atoms belong to which entities.
comp_ids : :obj:`numpy.ndarray`, shape: ``(N,)``
Relates each ``entity_id`` to a fragment label. Each item's index
is the label's ``entity_id``.
Returns
-------
:obj:`numpy.ndarray`
(shape: ``(N,)``) - Predicted many-body energy
:obj:`numpy.ndarray`
(shape: ``(N, len(Z), 3)``) - Predicted atomic forces.
:obj:`numpy.ndarray`
(optional, shape: ``(N, len(Z), 3)``) - The pressure stress tensor in units
of energy/distance\ :sup:`3`.
"""
t_predict = log.t_start()
if R.ndim == 2:
R = np.array([R])
n_R = R.shape[0]
compute_stress = self.compute_stress and bool(self.periodic_cell)
# Preallocate memory for energies and forces.
E = np.zeros((n_R,), dtype=np.float64)
F = np.zeros(R.shape, dtype=np.float64)
if compute_stress:
assert self.virial_form in ("group", "finite_diff")
stress = np.zeros((n_R, 3, 3), dtype=np.float64)
# Compute all energies and forces with every model.
for i, r in enumerate(R):
for model in self.models:
# Extra returns are present for stress. The *stress_nbody captures it.
# If it is not requested, it will just be an empty list.
E_nbody, F_nbody, *virial_nbody = self.compute_nbody(
Z, r, entity_ids, comp_ids, model
)
E[i] += E_nbody
F[i] += F_nbody
if compute_stress and self.virial_form == "group":
stress[i] += virial_nbody[0]
if compute_stress and self.virial_form == "finite_diff":
periodic_cell = self.periodic_cell
if self.use_ray:
periodic_cell = ray.get(periodic_cell)
cell_v = periodic_cell.cell_v
stress[i] = virial_finite_diff(
Z,
r,
entity_ids,
comp_ids,
cell_v,
self,
only_normal_stress=self.only_normal_stress,
)
if compute_stress: # pylint: disable=too-many-nested-blocks
periodic_cell = self.periodic_cell
if self.use_ray:
periodic_cell = ray.get(periodic_cell)
stress /= periodic_cell.volume
if self.only_normal_stress:
for n in range(stress.shape[0]):
for i in range(0, 3):
for j in range(0, 3):
if i != j:
stress[n][i][j] = 0.0
if self.box_scaling_type == "isotropic":
stress_average = np.trace(stress[n]) / 3
for i in range(0, 3):
stress[n][i][i] = stress_average
if self.use_voigt:
stress = np.array([to_voigt(r_stress) for r_stress in stress])
log.t_stop(
t_predict, message="Predictions took {time} s", precision=3, level=10
)
return E, F, stress
log.t_stop(
t_predict, message="Predictions took {time} s", precision=3, level=10
)
return E, F
[docs] def predict_decomp(self, Z, R, entity_ids, comp_ids):
r"""Predict the energies and forces of one or multiple structures.
Parameters
----------
Z : :obj:`numpy.ndarray`, ndim: ``1``
Atomic numbers of all atoms in the system with respect to ``R``.
R : :obj:`numpy.ndarray`, shape: ``(N, len(Z), 3)``
Cartesian coordinates of ``N`` structures to predict.
entity_ids : :obj:`numpy.ndarray`, shape: ``(N,)``
Integers specifying which atoms belong to which entities.
comp_ids : :obj:`numpy.ndarray`, shape: ``(N,)``
Relates each ``entity_id`` to a fragment label. Each item's index
is the label's ``entity_id``.
Returns
-------
:obj:`list`
:math:`n`-body energies of all structures in increasing order of
:math:`n`.
:obj:`list`
:math:`n`-body forces of all structures in increasing order of
:math:`n`.
:obj:`list`
Structure indices and Entity IDs of all structures in increasing
order of :math:`n`. Column 0 is the structure index in ``R``, and
the remaining columns are entity IDs.
:obj:`list`
:math:`n`-body orders of each returned item.
"""
t_predict = log.t_start()
if R.ndim == 2:
R = np.array([R])
E_data, F_data, entity_comb_data = [], [], []
# We want to perform all same order n-body contributions.
if self.use_ray:
models_ = [ray.get(model) for model in self.models]
else:
models_ = self.models
model_nbody_orders = [model.nbody_order for model in models_]
nbody_orders = sorted(set(model_nbody_orders))
for nbody_order in nbody_orders:
E_nbody, F_nbody, entity_comb_nbody = [], [], []
nbody_models = []
for i in range(len(model_nbody_orders)):
if models_[i].nbody_order == nbody_order:
nbody_models.append(self.models[i])
for model in nbody_models:
for i, r in enumerate(R):
E, F, entity_combs = self.compute_nbody_decomp(
Z, r, entity_ids, comp_ids, model
)
entity_combs = np.hstack(
(
np.array([[i] for _ in range(len(entity_combs))]),
entity_combs,
)
)
E_nbody.extend(E)
F_nbody.extend(F)
entity_comb_nbody.extend(entity_combs)
E_data.append(np.array(E_nbody))
F_data.append(np.array(F_nbody))
entity_comb_data.append(np.array(entity_comb_nbody))
log.t_stop(
t_predict,
message="Decomposed predictions took {time} s",
precision=3,
level=10,
)
return E_data, F_data, entity_comb_data, nbody_orders