Source code for doped.utils.configurations

"""
Utility functions for generating and parsing configurational coordinate (CC)
diagrams, for potential energy surfaces (PESs), Nudged Elastic Band (NEB), non-
radiative recombination calculations etc.
"""

import os
import warnings

import numpy as np
from pymatgen.core.structure import PeriodicSite, Structure
from pymatgen.util.typing import PathLike

from doped.utils.efficiency import StructureMatcher_scan_stol
from doped.utils.symmetry import are_equivalent_lattices



[docs]
def orient_s2_like_s1(
    struct1: Structure,
    struct2: Structure,
    new_lattice: str | None = None,
    verbose: bool = False,
    **sm_kwargs,
):
    """
    Re-orient ``struct2`` to match the orientation of ``struct1`` as closely as
    possible , with matching atomic indices as needed for VASP NEB calculations
    and other structural transformation analyses (e.g. configuration coordinate
    (CC) diagrams via ``nonrad``, ``CarrierCapture.jl`` etc.).

    This will give a fully symmetry-equivalent orientation (i.e. **will not
    change the actual geometry**) of ``struct2``, except if ``struct1`` and
    ``struct2`` have different inequivalent lattices (e.g. different space
    groups) `and` ``struct1_lattice`` is ``True``.

    This corresponds to minimising the root-mean-square displacement for the
    shortest `linear` path from ``struct1`` to a symmetry-equivalent definition
    of ``struct2``, with matched atomic indices and lattices as required by
    VASP NEB and ``nonrad`` functions. This function uses an accelerated
    version of the
    :meth:`~pymatgen.analysis.structure_matcher.StructureMatcher.get_s2_like_s1`
    method, extended to ensure the correct atomic indices matching and lattice
    vector definitions.

    If ``verbose=True``, information about the mass-weighted displacement (ΔQ
    in amu^(1/2)Å) between the input and re-oriented structures is printed.
    This is the typical x-axis unit in configurational coordinate diagrams (see
    e.g. 10.1103/PhysRevB.90.075202).

    Args:
        struct1 (Structure): Initial structure.
        struct2 (Structure): Final structure.
        new_lattice (str | None):
            If ``"struct1"``, then the lattice of ``struct1`` is used for the
            re-oriented structure, if ``"struct2"``, then the lattice of
            ``struct2`` is used, or if ``"s2_like_s1"``, then the output
            lattice of ``StructureMatcher.get_s2_like_s1`` (a
            symmetry-equivalent version of ``struct2.lattice``) is used.
            Default is ``None``, where ``new_lattice`` is set to ``"struct1"``
            if ``struct1`` and ``struct2`` have equivalent lattices (expected
            to be the case for defect NEBs/CC diagrams), and ``"s2_like_s1"``
            otherwise.
        verbose (bool):
            Print information about the mass-weighted displacement
            (ΔQ in amu^(1/2)Å) between the input and re-oriented structures.
            Default: ``False``
        **sm_kwargs:
            Additional keyword arguments to pass to ``StructureMatcher()``
            (e.g. ``ignored_species``, ``comparator`` etc).

    Returns:
        Structure:
        ``struct2`` re-oriented to match ``struct1`` as closely as possible.

        # TODO: Option to return RMSD, just displacement, anything else?
    """
    if not are_equivalent_lattices(struct1.lattice, struct2.lattice):
        warnings.warn(
            "The lattices of the two input structures have been detected to be (symmetry-)inequivalent. "
            "This is usually not desirable for defect NEBs/CC diagrams, but may be the case for e.g. "
            "NEBs between polymorphs. "
        )
        if new_lattice is None:
            warnings.warn(
                "Note that the lattice definitions may differ between the output structure and "
                "``struct1``. See the NEB/CC diagram tutorial for details."
            )
            new_lattice = "s2_like_s1"

    elif new_lattice is None:
        new_lattice = "struct1"

    if sm_kwargs.get("primitive_cell", False):
        raise ValueError(
            "``primitive_cell=True`` is not supported for `get_transformation` (and hence "
            "`get_s2_like_s1`."
        )
    sm_kwargs["primitive_cell"] = False

    struct2_like_struct1 = StructureMatcher_scan_stol(
        struct1, struct2, func_name="get_s2_like_s1", **sm_kwargs
    )

    if not struct2_like_struct1:
        raise RuntimeError(
            f"``StructureMatcher.get_s2_like_s1()`` failed. Note that this requires your input structures "
            f"to have matching compositions and similar lattices. Input structures have compositions:\n"
            f"struct1: {struct1.composition}\nstruct2: {struct2.composition}\n"
            f"and lattices:\nstruct1: {struct1.lattice}\nstruct2: {struct2.lattice}"
        )

    lattice = (
        struct1.lattice
        if new_lattice == "struct1"
        else (
            struct2_like_struct1.lattice
            if new_lattice == "s2_like_s1"
            else struct2.lattice if new_lattice == "struct2" else None
        )
    )
    if lattice is None:
        raise ValueError(
            f"Invalid value for ``new_lattice``: {new_lattice}. Must be one of ``'struct1'``, "
            f"``'struct2'``, or ``'s2_like_s1'``."
        )

    struct2_really_like_struct1 = Structure.from_sites(
        [  # sometimes this get_s2_like_s1 doesn't fully work as desired, giving different (but equivalent)
            PeriodicSite(  # lattice vectors (e.g. a=(010) instead of (100) etc.), so we redefine with the
                site.specie,  # chosen lattice to be sure
                site.frac_coords,
                lattice,
                properties=site.properties,
                to_unit_cell=True,
            )
            for site in struct2_like_struct1.sites
        ]
    )

    # we see that this rearranges the structure so the atom indices should now match correctly. This should
    # give a lower dQ as we see here (or the same if the original structures matched perfectly)
    delQ_s1_s2 = get_dQ(struct1, struct2)
    delQ_s1_s2_like_s1_pmg = get_dQ(struct1, struct2_like_struct1)
    delQ_s2_like_s1_s2 = get_dQ(struct2_really_like_struct1, struct2)
    delQ_s1_s2_like_s1 = get_dQ(struct1, struct2_really_like_struct1)

    if (
        not sm_kwargs.get("allow_subset")
        and delQ_s1_s2_like_s1 > min(delQ_s1_s2, delQ_s1_s2_like_s1_pmg) + 0.1
    ):
        # shouldn't happen!  (and ignore cases where we're using it in defect stenciling; sm_kwargs has X)
        warnings.warn(
            f"StructureMatcher.get_s2_like_s1() appears to have failed. The mass-weighted displacement "
            f"(ΔQ in amu^(1/2)Å) for the input structures is:\n"
            f"ΔQ(s1/s2) = {delQ_s1_s2:.2f} amu^(1/2)Å\n"
            f"Using ``StructureMatcher.get_s2_like_s1()`` directly gives:\n"
            f"ΔQ(s1/s2_like_s1_pmg) = {delQ_s1_s2_like_s1_pmg:.2f} amu^(1/2)Å\n"
            f"Then using the re-oriented structure gives:\n"
            f"ΔQ(s1/s2_like_s1_doped) = {delQ_s1_s2_like_s1:.2f} amu^(1/2)Å\n"
            f"which should always be less than or equal to the previous two values... Please report this "
            f"case to the doped developers!"
        )

    if verbose:
        print(f"ΔQ(s1/s2) = {delQ_s1_s2:.2f} amu^(1/2)Å")
        print(f"ΔQ(s2_like_s1/s2) = {delQ_s2_like_s1_s2:.2f} amu^(1/2)Å")
        print(f"ΔQ(s1/s2_like_s1) = {delQ_s1_s2_like_s1:.2f} amu^(1/2)Å")

    return struct2_really_like_struct1



get_s2_like_s1 = orient_s2_like_s1  # alias similar to pymatgen's get_s2_like_s1



[docs]
def get_dQ(struct_a: Structure, struct_b: Structure) -> float:
    """
    Get the mass-weighted displacement (ΔQ in amu^(1/2)Å) between two
    structures, assuming matched atomic indices.

    Args:
        struct_a (Structure): Initial structure.
        struct_b (Structure): Final structure.

    Returns:
        float:
            The mass-weighted displacement (ΔQ in amu^(1/2)Å) between the two
            structures, assuming matched atomic indices. Returns ``np.inf`` if
            the structures are not matching.
    """
    try:
        return np.sqrt(
            sum(
                (a.distance(b) ** 2) * a.specie.atomic_mass
                for a, b in zip(struct_a, struct_b, strict=False)
            )
        )  # TODO: Make this a public function, with option to reorient if not matching?
        # Should then match output of when using get_linear_assignment_solution or
        # get_site_mapping_indices (TODO: use in tests)
    except Exception:
        return np.inf  # if the structures are not matching, return inf




[docs]
def get_path_structures(
    struct1: Structure,
    struct2: Structure,
    n_images: int | np.ndarray | list[float] = 7,
    displacements: np.ndarray | list[float] | None = None,
    displacements2: np.ndarray | list[float] | None = None,
) -> dict[str, Structure] | tuple[dict[str, Structure], dict[str, Structure]]:
    """
    Generate a series of interpolated structures along the linear path between
    ``struct1`` and ``struct2``, typically for use in NEB calculations or
    configuration coordinate (CC) diagrams.

    Structures are output as a dictionary with keys corresponding to either the
    index of the interpolated structure (0-indexed; ``00``, ``01`` etc as for
    VASP NEB calculations) or the fractional displacement along the
    interpolation path between structures, and values corresponding to the
    interpolated structure. If ``displacements`` is set (and thus two sets of
    structures are generated), a tuple of such dictionaries is returned.

    Note that for NEB calculations, the the lattice vectors and order of sites
    (atomic indices) must be consistent in both ``struct1`` and ``struct2``.
    This can be ensured by using the ``orient_s2_like_s1()`` function in
    ``doped.utils.configurations``, as shown in the ``doped`` tutorials. This
    is also desirable for CC diagrams, as the atomic indices are assumed to
    match for many parsing and plotting functions (e.g. in ``nonrad`` and
    ``CarrierCapture.jl``), but is not strictly necessary. If the input
    structures are detected to be different (symmetry-inequivalent) geometries
    (e.g. not a simple defect migration between two symmetry-equivalent sites),
    but have mis-matching orientations/ positions (such that they do not
    correspond to the shortest linear path between them), a warning will be
    raised. See the ``doped`` configuration coordinate / NEB path generation
    tutorial for a deeper explanation.

    If only ``n_images`` is set (and ``displacements`` is ``None``)(default),
    then only one set of interpolated structures is generated (in other words,
    assuming a standard NEB/PES calculation is being performed). If
    ``displacements`` (and possibly ``displacements2``) is set, then two sets
    of interpolated structures are generated (in other words, assuming a CC /
    non-radiative recombination calculation is being performed, where the two
    sets of structures are to be calculated in separate charge/spin etc
    states).

    Args:
        struct1 (Structure): Initial structure.
        struct2 (Structure): Final structure.
        n_images (int):
            Number of images to interpolate between ``struct1`` and
            ``struct2``, or a list of fractional interpolation values
            (displacements) to use. Note that ``n_images`` is ignored if
            ``displacements`` is set (in which case CC / non-radiative
            recombination calculations are assumed -- generating two sets of
            interpolated structures -- otherwise a standard NEB / PES
            calculation is assumed -- generating one set of structures).
            Default: 7
        displacements (np.ndarray or list):
            Displacements to use for ``struct1`` along the linear
            transformation path to ``struct2``. If set, then CC / non-radiative
            recombination calculations are assumed, and two sets of
            interpolated structures will be generated. If set and
            ``displacements2`` is not set, then the same set of displacements
            is used for both sets of interpolated structures. Default: ``None``
        displacements2 (np.ndarray or list):
            Displacements to use for ``struct2`` along the linear
            transformation path to ``struct1``. If not set and
            ``displacements`` is not ``None``, then the same set of
            displacements is used for both sets of interpolated structures.
            Default: ``None``

    Returns:
        dict[str, Structure] | tuple[dict[str, Structure], dict[str, Structure]]:
            Dictionary of structures (for NEB/PES calculations), or tuple of
            two dictionaries of structures (for CC / non-radiative
            calculations, when ``displacements`` is not ``None``).
    """
    if displacements is None:
        disp_1 = struct1.interpolate(
            struct2, n_images, interpolate_lattices=True, pbc=True, autosort_tol=1.2
        )
        disp_2 = None
        if not isinstance(n_images, int | float):
            displacements = n_images  # displacement magnitudes provided instead of n_images

    else:
        displacements2 = displacements2 if displacements2 is not None else displacements
        disp_1 = struct1.interpolate(
            struct2, displacements, interpolate_lattices=True, pbc=True, autosort_tol=1.2
        )
        disp_2 = struct2.interpolate(
            struct1,
            displacements2,
            interpolate_lattices=True,
            pbc=True,
            autosort_tol=1.2,
        )

    disp_1_dict: dict[str, Structure] = {}
    disp_2_dict: dict[str, Structure] = {}
    for structs_disps_dict_tuple in [
        (disp_1, displacements, disp_1_dict),
        (disp_2, displacements2, disp_2_dict),
    ]:
        structs, disps, disp_dict = structs_disps_dict_tuple
        if structs is None:
            continue  # NEB, only one directory written
        if disps is not None:
            disps = _smart_round(disps)

        for i, struct in enumerate(structs):
            key = f"0{i}" if displacements is None else f"delQ_{disps[i]}"  # type: ignore
            disp_dict[key] = struct

    return (disp_1_dict, disp_2_dict) if disp_2_dict else disp_1_dict



def _smart_round(
    numbers: float | list[float] | np.ndarray[float],
    tol: float = 1e-5,
    return_decimals: bool = False,
    consistent_decimals: bool = True,
) -> float | list[float] | np.ndarray[float] | tuple[float | list[float] | np.ndarray[float], float]:
    """
    Custom rounding function that rounds the input number(s) to the lowest
    number of decimals which gives the same numerical value as the input, to
    within a specified tolerance value.

    For example, 0.5000001 with tol = 1e-5 (default) would be rounded to 0.5,
    but 0.5780001 would be rounded to 0.578.

    Primary use case is for rounding float values returned by ``numpy`` which
    may be e.g. 0.50000001 instead of 0.5.

    If a list or array of numbers is provided (rather than a single float),
    then rounding is applied to each element in the list/array. If
    ``consistent_decimals=True`` (default), then the same number of decimals is
    used for rounding for all elements, where this number of decimals is the
    minimum required for rounding to satisfy the tolerance for all elements.

    Args:
        numbers (float, list[float], np.ndarray[float]):
            The number(s) to round.
        tol (float): The tolerance value for rounding.
            Default: ``1e-5``
        return_decimals (bool):
            If ``True``, also returns the identified appropriate number of
            decimals to round to. Default: ``False``
        consistent_decimals (bool):
            If ``True``, the same number of decimals is used for rounding for
            all elements in the input list/array, where this number of decimals
            is the minimum required for rounding to satisfy the tolerance for
            all elements. Default: ``True``

    Returns:
        float | list[float] | np.ndarray[float] | tuple[..., float]:
            The rounded number(s). If ``return_decimals=True``, then a tuple is
            returned with the rounded number(s) and the number of decimals used
            for rounding.
    """

    def _smart_round_number(number: float, tol: float, return_decimals: bool):
        for decimals in range(20):
            rounded_number = round(number, decimals)
            if abs(rounded_number - number) < tol:
                return (rounded_number, decimals) if return_decimals else rounded_number

        raise ValueError("Failed to round number to within tolerance???")

    if isinstance(numbers, float):
        return _smart_round_number(numbers, tol, return_decimals)

    # list / array:
    rounded_list = [
        _smart_round_number(number, tol, return_decimals or consistent_decimals) for number in numbers
    ]

    if not consistent_decimals:
        return rounded_list if isinstance(numbers, list) else np.array(rounded_list)

    decimals = max(decimals for _, decimals in rounded_list)
    rounded_list = [round(number, decimals) for number in numbers]
    return (rounded_list, decimals) if return_decimals else rounded_list



[docs]
def write_path_structures(
    struct1: Structure,
    struct2: Structure,
    output_dir: PathLike | None = None,
    n_images: int | list = 7,
    displacements: np.ndarray | list[float] | None = None,
    displacements2: np.ndarray | list[float] | None = None,
):
    """
    Generate a series of interpolated structures along the linear path between
    ``struct1`` and ``struct2``, typically for use in NEB calculations or
    configuration coordinate (CC) diagrams, and write to folders.

    Folder names are labelled by the index of the interpolated structure
    (0-indexed; ``00``, ``01`` etc as for VASP NEB calculations) or the
    fractional displacement along the interpolation path between structures
    (e.g. ``delQ_0.0``, ``delQ_0.1``, ``delQ_-0.1`` etc), depending on the
    input ``n_images``/``displacements`` settings.

    Note that for NEB calculations, the the lattice vectors and order of sites
    (atomic indices) must be consistent in both ``struct1`` and ``struct2``.
    This can be ensured by using the ``orient_s2_like_s1()`` function in
    ``doped.utils.configurations``, as shown in the ``doped`` tutorials. This
    is also desirable for CC diagrams, as the atomic indices are assumed to
    match for many parsing and plotting functions (e.g. in ``nonrad`` and
    ``CarrierCapture.jl``), but is not strictly necessary. If the input
    structures are detected to be different (symmetry-inequivalent) geometries
    (e.g. not a simple defect migration between two symmetry-equivalent sites),
    but have mis-matching orientations/ positions (such that they do not
    correspond to the shortest linear path between them), a warning will be
    raised. See the ``doped`` configuration coordinate / NEB path generation
    tutorial for a deeper explanation. (TODO)

    If only ``n_images`` is set (and ``displacements`` is ``None``)(default),
    then only one set of interpolated structures is written (in other words,
    assuming a standard NEB/PES calculation is being performed). If
    ``displacements`` (and possibly ``displacements2``) is set, then two sets
    of interpolated structures are written (in other words, assuming a CC /
    non-radiative recombination calculation is being performed, where the two
    sets of structures are to be calculated in separate charge/spin etc
    states).

    Args:
        struct1 (Structure): Initial structure.
        struct2 (Structure): Final structure.
        output_dir (PathLike):
            Directory to write the interpolated structures to. Defaults to
            "Configuration_Coordinate" if ``displacements`` is set, otherwise
            "NEB".
        n_images (int):
            Number of images to interpolate between ``struct1`` and
            ``struct2``, or a list of fractional interpolation values
            (displacements) to use. Note that ``n_images`` is ignored if
            ``displacements`` is set (in which case CC / non-radiative
            recombination calculations are assumed -- generating two sets of
            interpolated structures -- otherwise a standard NEB / PES
            calculation is assumed -- generating one set of structures).
            Default: 7
        displacements (np.ndarray or list):
            Displacements to use for ``struct1`` along the linear
            transformation path to ``struct2``. If set, then CC / non-radiative
            recombination calculations are assumed, and two sets of
            interpolated structures will be written to file. If set and
            ``displacements2`` is not set, then the same set of displacements
            is used for both sets of interpolated structures. Default: ``None``
        displacements2 (np.ndarray or list):
            Displacements to use for ``struct2`` along the linear
            transformation path to ``struct1``. If not set and
            ``displacements`` is not ``None``, then the same set of
            displacements is used for both sets of interpolated structures.
            Default: ``None``
    """
    path_structs = get_path_structures(struct1, struct2, n_images, displacements, displacements2)
    path_struct_dicts = [path_structs] if isinstance(path_structs, dict) else list(path_structs)
    output_dir = output_dir or ("Configuration_Coordinate" if displacements is not None else "NEB")

    for i, path_struct_dict in enumerate(path_struct_dicts):
        for folder, struct in path_struct_dict.items():
            PES_dir = f"PES_{i+1}" if len(path_struct_dicts) > 1 else ""
            path_to_folder = f"{output_dir}/{PES_dir}/{folder}"
            os.makedirs(path_to_folder, exist_ok=True)
            struct.to(filename=f"{path_to_folder}/POSCAR", fmt="poscar")



# TODO: Quick tests
# TODO: Re-orient directly in generation functions, but with option not to?