# MIT License
#
# Copyright (c) 2018-2022 Stefan Chmiela, Luis Galvez
# Copyright (c) 2022-2023, Alex M. Maldonado
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
from functools import partial
import multiprocessing as mp
import numpy as np
import scipy as sp
from ..logger import GDMLLogger
try:
import torch
except ImportError:
_HAS_TORCH = False
else:
_HAS_TORCH = True
Pool = mp.get_context("fork").Pool
log = GDMLLogger(__name__)
[docs]def _pbc_diff(diffs, lat_and_inv, use_torch=False):
r"""Clamp differences of vectors to super cell.
Parameters
----------
diffs : :obj:`numpy.ndarray`
:math:`N \times 3` matrix of :math:`N` pairwise differences between
vectors :math:`\boldsymbol{u} - \boldsymbol{v}`
lat_and_inv : tuple of :obj:`numpy.ndarray`
Tuple of :math:`3 \times 3` matrix containing lattice vectors as
columns and its inverse.
use_torch : :obj:`bool`, default: ``False``
Enable, if the inputs are PyTorch objects.
Returns
-------
:obj:`numpy.ndarray`
:math:`N \times 3` matrix clamped differences
"""
lat, lat_inv = lat_and_inv
if use_torch and not _HAS_TORCH:
raise ImportError(
"Optional PyTorch dependency not found! Please install PyTorch."
)
if use_torch:
c = lat_inv.mm(diffs.t())
diffs -= lat.mm(c.round()).t()
else:
c = lat_inv.dot(diffs.T)
diffs -= lat.dot(np.around(c)).T
return diffs
[docs]def _pdist(r, lat_and_inv=None):
r"""Compute pairwise Euclidean distance matrix between all atoms.
Parameters
----------
r : :obj:`numpy.ndarray`
Array of size :math:`3N` containing the Cartesian coordinates of each atom.
lat_and_inv : :obj:`tuple` of :obj:`numpy.ndarray`, optional
Tuple of :math:`3 \times 3` matrix containing lattice vectors as
columns and its inverse.
Returns
-------
:obj:`numpy.ndarray`
Array of size :math:`N(N-1)/2` containing the upper triangle of the
pairwise distance matrix between atoms.
"""
r = r.reshape(-1, 3)
n_atoms = r.shape[0]
if lat_and_inv is None:
pdist = sp.spatial.distance.pdist(r, "euclidean")
else:
pdist = sp.spatial.distance.pdist(
r, lambda u, v: np.linalg.norm(_pbc_diff(u - v, lat_and_inv))
)
tril_idxs = np.tril_indices(n_atoms, k=-1)
return sp.spatial.distance.squareform(pdist, checks=False)[tril_idxs]
def _squareform(vec_or_mat):
# vector to matrix representation
if vec_or_mat.ndim == 1:
n_tril = vec_or_mat.size
n = int((1 + np.sqrt(8 * n_tril + 1)) / 2)
i, j = np.tril_indices(n, k=-1)
mat = np.zeros((n, n))
mat[i, j] = vec_or_mat
mat[j, i] = vec_or_mat
return mat
# matrix to vector
assert vec_or_mat.shape[0] == vec_or_mat.shape[1] # matrix is square
n = vec_or_mat.shape[0]
i, j = np.tril_indices(n, k=-1)
return vec_or_mat[i, j]
[docs]def _r_to_desc(r, pdist):
r"""Generate descriptor for a set of atom positions in Cartesian
coordinates.
Parameters
----------
r : :obj:`numpy.ndarray`
Array of size :math:`3N` containing the Cartesian coordinates of
each atom.
pdist : :obj:`numpy.ndarray`
Array of size :math:`N \times N` containing the Euclidean distance
(L2-norm) for each pair of atoms.
Returns
-------
:obj:`numpy.ndarray`
Descriptor representation as 1D array of size :math:`N(N-1)/2`.
"""
# Add singleton dimension if input is (,3N).
if r.ndim == 1:
r = r[None, :]
return 1.0 / pdist
[docs]def _r_to_d_desc(r, pdist, lat_and_inv=None):
r"""Generate descriptor Jacobian for a set of atom positions in
Cartesian coordinates.
This method can apply the minimum-image convention as periodic
boundary condition for distances between atoms, given the edge
length of the (square) unit cell.
Parameters
----------
r : :obj:`numpy.ndarray`
Array of size :math:`3N` containing the Cartesian coordinates of
each atom.
pdist : :obj:`numpy.ndarray`
Array of size :math:`N \times N` containing the Euclidean distance
(L2-norm) for each pair of atoms.
lat_and_inv : :obj:`tuple` of :obj:`numpy.ndarray`, optional
Tuple of :math:`3 \times 3` matrix containing lattice vectors as
columns and its inverse.
Returns
-------
:obj:`numpy.ndarray`
Array of size :math:`N(N-1)/2 \times 3N` containing all partial
derivatives of the descriptor.
"""
r = r.reshape(-1, 3)
pdiff = r[:, None] - r[None, :] # pairwise differences ri - rj
n_atoms = r.shape[0]
i, j = np.tril_indices(n_atoms, k=-1)
pdiff = pdiff[i, j, :] # lower triangular
if lat_and_inv is not None:
pdiff = _pbc_diff(pdiff, lat_and_inv)
d_desc_elem = pdiff / (pdist**3)[:, None]
return d_desc_elem
[docs]def _from_r(r, lat_and_inv=None):
r"""Generate descriptor and its Jacobian for one molecular geometry
in Cartesian coordinates.
Parameters
----------
r : :obj:`numpy.ndarray`
Array of size :math:`3N` containing the Cartesian coordinates of
each atom.
lat_and_inv : :obj:`tuple` of :obj:`numpy.ndarray`, optional
Tuple of :math:`3 \times 3` matrix containing lattice vectors as
columns and its inverse.
Returns
-------
:obj:`numpy.ndarray`
Descriptor representation as 1D array of size :math:`N(N-1)/2`.
:obj:`numpy.ndarray`
Array of size :math:`N(N-1)/2 \times 3N` containing all partial
derivatives of the descriptor.
"""
# Add singleton dimension if input is (,3N).
if r.ndim == 1:
r = r[None, :]
pd = _pdist(r, lat_and_inv)
r_desc = _r_to_desc(r, pd)
r_d_desc = _r_to_d_desc(r, pd, lat_and_inv)
return r_desc, r_d_desc
[docs]class Desc:
r"""Generate descriptors and their Jacobians for molecular geometries,
including support for periodic boundary conditions.
"""
def __init__(self, n_atoms, max_processes=None):
"""
Parameters
----------
n_atoms : :obj:`int`
Number of atoms in the represented system.
max_processes : :obj:`int`, default: :obj:`None`
Limit the max. number of processes. Otherwise all CPU cores are
used.
"""
self.n_atoms = n_atoms
self.dim_i = 3 * n_atoms
# Size of the resulting descriptor vector.
self.dim = (n_atoms * (n_atoms - 1)) // 2
self.tril_indices = np.tril_indices(n_atoms, k=-1)
# Precompute indices for nonzero entries in descriptor derivatives.
self.d_desc_mask = np.zeros((n_atoms, n_atoms - 1), dtype=np.int64)
for a in range(n_atoms): # for each partial derivative
rows, cols = self.tril_indices
self.d_desc_mask[a, :] = np.concatenate(
[np.where(rows == a)[0], np.where(cols == a)[0]]
)
self.dim_range = np.arange(self.dim) # [0, 1, ..., dim-1]
# Precompute indices for nonzero entries in descriptor derivatives.
self.M = np.arange(1, n_atoms) # indexes matrix row-wise, skipping diagonal
for a in range(1, n_atoms):
self.M = np.concatenate((self.M, np.delete(np.arange(n_atoms), a)))
self.A = np.repeat(
np.arange(n_atoms), n_atoms - 1
) # [0, 0, ..., 1, 1, ..., 2, 2, ...]
self.max_processes = max_processes
[docs] def from_R(self, R, lat_and_inv=None, max_processes=None):
r"""Generate descriptor and its Jacobian for multiple molecular
geometries in Cartesian coordinates.
Parameters
----------
R : :obj:`numpy.ndarray`
Array of size :math:`M \times 3N` containing the Cartesian
coordinates of each atom.
lat_and_inv : :obj:`tuple` of :obj:`numpy.ndarray`, default: :obj:`None`
Tuple of :math:`3 \times 3` matrix containing lattice vectors as
columns and its inverse.
max_processes : :obj:`int`, default: :obj:`None`
Limit the max number of processes. Otherwise all CPU cores are
used. This parameter overwrites the global setting as set during
initialization.
Returns
-------
:obj:`numpy.ndarray`
Array of size :math:`M \times N(N-1)/2` containing the descriptor
representation for each geometry.
:obj:`numpy.ndarray`
Array of size :math:`M \times N(N-1)/2 \times 3N` containing all
partial derivatives of the descriptor for each geometry.
"""
# Add singleton dimension if input is (,3N).
if R.ndim == 1:
R = R[None, :]
M = R.shape[0]
if M == 1:
return _from_r(R, lat_and_inv)
R_desc = np.empty([M, self.dim])
R_d_desc = np.empty([M, self.dim, 3])
pool = None
map_func = map
max_processes = max_processes or self.max_processes
if max_processes != 1 and mp.cpu_count() > 1:
pool = Pool((max_processes or mp.cpu_count()) - 1) # exclude main process
map_func = pool.imap
for i, r_desc_r_d_desc in enumerate(
map_func(partial(_from_r, lat_and_inv=lat_and_inv), R)
):
R_desc[i, :], R_d_desc[i, :, :] = r_desc_r_d_desc
if pool is not None:
pool.close()
# Wait for the worker processes to terminate (to measure total runtime
# correctly).
pool.join()
pool = None
return R_desc, R_d_desc
# Multiplies descriptor(s) jacobian with 3N-vector(s) from the right side
def d_desc_dot_vec(self, R_d_desc, vecs):
if R_d_desc.ndim == 2:
R_d_desc = R_d_desc[None, ...]
if vecs.ndim == 1:
vecs = vecs[None, ...]
i, j = self.tril_indices
vecs = vecs.reshape(vecs.shape[0], -1, 3)
einsum = np.einsum
if _HAS_TORCH and torch.is_tensor(R_d_desc):
assert torch.is_tensor(vecs)
einsum = torch.einsum
return einsum("...ij,...ij->...i", R_d_desc, vecs[:, j, :] - vecs[:, i, :])
# Multiplies descriptor(s) jacobian with N(N-1)/2-vector(s) from the left side
def vec_dot_d_desc(self, R_d_desc, vecs, out=None):
if R_d_desc.ndim == 2:
R_d_desc = R_d_desc[None, ...]
if vecs.ndim == 1:
vecs = vecs[None, ...]
# either multiple descriptors or multiple vectors at once, not both (or the
# same number of both, than it will must be a multidot).
assert (
R_d_desc.shape[0] == 1
or vecs.shape[0] == 1
or R_d_desc.shape[0] == vecs.shape[0]
)
n = np.max((R_d_desc.shape[0], vecs.shape[0]))
i, j = self.tril_indices
out = np.zeros((n, self.n_atoms, self.n_atoms, 3))
out[:, i, j, :] = R_d_desc * vecs[..., None]
out[:, j, i, :] = -out[:, i, j, :]
return out.sum(axis=1).reshape(n, -1)
# if out is None or out.shape != (n, self.n_atoms*3):
# out = np.zeros((n, self.n_atoms*3))
# R_d_desc_full = np.zeros((self.n_atoms, self.n_atoms, 3))
# for a in range(n):
# R_d_desc_full[i, j, :] = R_d_desc * vecs[a, :, None]
# R_d_desc_full[j, i, :] = -R_d_desc_full[i, j, :]
# out[a,:] = R_d_desc_full.sum(axis=0).ravel()
# return out
[docs] def d_desc_from_comp(self, R_d_desc, out=None):
r"""Convert a compressed representation of a descriptor Jacobian back
to its full representation.
The compressed representation omits all zeros and scales with :math:`N`
instead of :math:`N(N-1)/2`.
Parameters
----------
R_d_desc : :obj:`numpy.ndarray` or :obj:`torch.tensor`
Array of size :math:`M \times N \times N \times 3` containing the
compressed descriptor Jacobian.
out : :obj:`numpy.ndarray` or :obj:`torch.tensor`, optional
Output argument. This must have the exact kind that would
be returned if it was not used.
Note
----
If used, the output argument must be initialized with zeros!
Returns
-------
:obj:`numpy.ndarray` or :obj:`torch.tensor`
Array of size :math:`M \times N(N-1)/2 \times 3N` containing the
full representation.
"""
if R_d_desc.ndim == 2:
R_d_desc = R_d_desc[None, ...]
n = R_d_desc.shape[0]
i, j = self.tril_indices
if out is None:
if _HAS_TORCH and torch.is_tensor(R_d_desc):
device = R_d_desc.device
dtype = R_d_desc.dtype
# pylint: disable-next=no-member
out = torch.zeros((n, self.dim, self.n_atoms, 3), device=device).to(
dtype
)
else:
out = np.zeros((n, self.dim, self.n_atoms, 3))
else:
out = out.reshape(n, self.dim, self.n_atoms, 3)
out[:, self.dim_range, j, :] = R_d_desc
out[:, self.dim_range, i, :] = -R_d_desc
return out.reshape(-1, self.dim, self.dim_i)
[docs] def d_desc_to_comp(self, R_d_desc):
r"""Convert a descriptor Jacobian to a compressed representation.
The compressed representation omits all zeros and scales with :math:`N`
instead of :math:`N(N-1)/2`.
Parameters
----------
R_d_desc : :obj:`numpy.ndarray`
Array of size :math:`M \times N(N-1)/2 \times 3N` containing the
descriptor Jacobian.
Returns
-------
:obj:`numpy.ndarray`
Array of size :math:`M \times N \times N \times 3` containing the
compressed representation.
"""
# Add singleton dimension for single inputs.
if R_d_desc.ndim == 2:
R_d_desc = R_d_desc[None, ...]
n = R_d_desc.shape[0]
n_atoms = int(R_d_desc.shape[2] / 3)
R_d_desc = R_d_desc.reshape(n, -1, n_atoms, 3)
ret = np.zeros((n, n_atoms, n_atoms, 3))
ret[:, self.M, self.A, :] = R_d_desc[:, self.d_desc_mask.ravel(), self.A, :]
# Take the upper triangle.
i, j = self.tril_indices
return ret[:, i, j, :]
[docs] @staticmethod
def perm(perm):
r"""Convert atom permutation to descriptor permutation.
A permutation of :math:`N` atoms is converted to a permutation that acts
on the corresponding descriptor representation. Applying the converted
permutation to a descriptor is equivalent to permuting the atoms
first and then generating the descriptor.
Parameters
----------
perm : :obj:`numpy.ndarray`
Array of size N containing the atom permutation.
Returns
-------
:obj:`numpy.ndarray`
Array of size :math:`N(N-1)/2` containing the corresponding
descriptor permutation.
"""
n = len(perm)
rest = np.zeros((n, n))
rest[np.tril_indices(n, -1)] = list(range((n**2 - n) // 2))
rest = rest + rest.T
rest = rest[perm, :]
rest = rest[:, perm]
return rest[np.tril_indices(n, -1)].astype(int)