Source code for doped.utils.symmetry

"""
Utility code and functions for symmetry analysis of structures and defects.
"""

import contextlib
import math
import os
import warnings
from collections.abc import Iterable, Sequence
from functools import lru_cache
from itertools import permutations, product
from typing import cast

import numpy as np
import pandas as pd
import spglib
from numpy.typing import ArrayLike
from pymatgen.analysis.defects.core import DefectType
from pymatgen.core.operations import SymmOp
from pymatgen.core.structure import Lattice
from pymatgen.core.structure_matcher import ElementComparator
from pymatgen.symmetry.analyzer import SymmetryUndeterminedError
from pymatgen.transformations.standard_transformations import SupercellTransformation
from pymatgen.util.coord import is_coord_subset_pbc
from scipy.cluster.hierarchy import fcluster, linkage
from scipy.spatial.distance import squareform
from sympy import Eq, Expr, simplify, solve
from tqdm import tqdm

from doped.core import Defect, DefectEntry, template_defect_entry_from_structures
from doped.utils.configurations import orient_s2_like_s1
from doped.utils.efficiency import PeriodicSite, SpacegroupAnalyzer, Structure
from doped.utils.parsing import (
    _get_bulk_supercell,
    _get_defect_supercell,
    _get_defect_supercell_frac_coords,
    _get_site_mapping_from_coords_and_indices,
    _get_unrelaxed_defect_structure,
    get_site_mappings,
)



[docs]
@lru_cache(maxsize=int(1e5))
def cached_simplify(eq):
    """
    Cached simplification function for ``sympy`` equations, for efficiency.
    """
    return simplify(eq)




[docs]
@lru_cache(maxsize=int(1e5))
def cached_solve(equation, variable):
    """
    Cached solve function for ``sympy`` equations, for efficiency.
    """
    return solve(equation, variable)



def _set_spglib_warnings_error_handling_env_var():
    """
    Set the SPGLIB environment variable to use new error handling.
    """
    os.environ["SPGLIB_OLD_ERROR_HANDLING"] = "False"  # can be removed with spglib >=2.8
    os.environ["SPGLIB_WARNING"] = "OFF"


def _check_spglib_version():
    """
    Check the versions of spglib and its C libraries, and raise a warning if
    the correct installation instructions have not been followed.
    """
    python_version = spglib.__version__
    c_version = spglib.spg_get_version_full()

    if python_version != c_version:
        warnings.warn(  # think this issue is avoided with latest spglib versions, but not sure
            f"Your spglib Python version (spglib.__version__ = {python_version}) does not match its C "
            f"library version (spglib.spg_get_version_full() = {c_version}). This can lead to unnecessary "
            f"spglib warning messages, but can be avoided by upgrading spglib with `pip install --upgrade "
            f"spglib`."
        )  # previously also had to do conda or special pip install settings, with spglib <2.5


_set_spglib_warnings_error_handling_env_var()
_check_spglib_version()


def _round_floats(obj, places: int = 5):
    """
    Recursively round floats in a dictionary to ``places`` decimal places,
    using the ``_custom_round`` function.
    """
    if isinstance(obj, float):
        return _custom_round(obj, places) + 0.0
    if isinstance(obj, dict):
        return {k: _round_floats(v, places) for k, v in obj.items()}
    if isinstance(obj, list | tuple):
        return [_round_floats(x, places) for x in obj]
    if isinstance(obj, np.ndarray):
        return _vectorized_custom_round(obj, places) + 0.0
    if isinstance(obj, pd.DataFrame):  # if dataframe, convert to dict and round floats
        return pd.DataFrame(_round_floats(obj.to_dict(), places))

    return obj


def _custom_round(number: float, decimals: int = 3):
    """
    Custom rounding function that rounds numbers to a specified number of
    decimals, if that rounded number is within 0.15*10^(-decimals) of the
    original number, else rounds to [decimals+1] decimals.

    Primarily because float rounding with ``pymatgen``/``numpy`` can give cell
    coordinates of 0.5001 instead of 0.5 etc, but also can have coordinates of
    e.g. ``0.6125`` that should not be rounded to ``0.613``.

    Args:
        number (float):
            The number to round.
        decimals (int):
            The number of decimals to round to (default: 3).

    Returns:
        float: The rounded number.
    """
    rounded_number = round(number, decimals)
    if abs(rounded_number - number) < 0.15 * float(10) ** (-decimals):
        return rounded_number

    return round(number, decimals + 1)


_vectorized_custom_round = np.vectorize(_custom_round)


def _get_num_places_for_dist_precision(
    structure: Structure | Lattice, dist_precision: float = 0.001
) -> int:
    """
    Given a structure or lattice, get the number of decimal places that we need
    to keep / can round to for `fractional coordinates` (``frac_coords``), to
    maintain a distance precision of ``dist_precision`` in Å.

    Intended for use with the ``_round_floats()`` function, to achieve cleanly
    formatted structure outputs while ensuring no significant rounding errors
    are introduced in site positions (e.g. for very large supercells, small
    differences in fraction coordinates become significant).

    Args:
        structure (|Structure| | |Lattice|):
            The input structure or lattice.
        dist_precision (float):
            The desired distance precision in Å (default: 0.001).

    Returns:
        int:
            The number of decimal places to keep for fractional coordinates to
            maintain the desired distance precision.
    """
    lattice = structure if isinstance(structure, Lattice) else structure.lattice
    frac_precision = dist_precision / max(lattice.abc)

    # get corresponding number of decimal places for this precision:
    return -1 * min(math.floor(math.log(frac_precision, 10)), -8)  # use 8 dp as max precision


def _round_struct_coords(structure: Structure, dist_precision: float = 0.001, to_unit_cell=False):
    """
    Convenience method to round the lattice parameters and fractional
    coordinates of a structure to a given distance precision, for cleanly
    formatted structure outputs.

    Does not apply this operation in-place!

    Args:
        structure:
            The input structure.
        dist_precision:
            The desired distance precision in Å (default: 0.001).
        to_unit_cell:
            Whether to round the fractional coordinates to the unit cell
            (default: False).

    Returns:
        Structure:
            The structure with rounded lattice parameters and fractional
            coordinates.
    """
    rounded_struct = structure.copy()
    req_places = _get_num_places_for_dist_precision(rounded_struct, dist_precision)
    frac_coords = _round_floats(rounded_struct.frac_coords, places=req_places)
    lattice = Lattice(_round_floats(rounded_struct.lattice.matrix, places=req_places))

    for idx in range(len(rounded_struct)):
        orig_site = structure[idx]
        rounded_struct._sites[idx] = PeriodicSite(
            orig_site.species,
            frac_coords[idx],
            lattice,
            properties=orig_site.properties,
            label=orig_site._label,
            skip_checks=True,
            to_unit_cell=to_unit_cell,
        )

    return rounded_struct


def _frac_coords_sort_func(coords):
    """
    Sorting function to apply on an iterable of fractional coordinates, where
    entries are sorted by the number of x, y, z that are (almost) equal (i.e.
    between 0 and 3), then by the magnitude of x+y+z, then by the magnitudes of
    x, y and z.
    """
    if coords is None:
        return (1e10, 1e10, 1e10, 1e10, 1e10)
    coords_for_sorting = _vectorized_custom_round(
        np.mod(_vectorized_custom_round(coords), 1)
    )  # to unit cell
    num_equals = sum(
        np.isclose(coords_for_sorting[i], coords_for_sorting[j], atol=1e-3)
        for i in range(len(coords_for_sorting))
        for j in range(i + 1, len(coords_for_sorting))
    )
    magnitude = _custom_round(np.linalg.norm(coords))
    return (-num_equals, magnitude, *np.abs(coords_for_sorting))



[docs]
def get_sga(struct: Structure, symprec: float = 0.01) -> SpacegroupAnalyzer:
    """
    Get a ``SpacegroupAnalyzer`` object of the input structure, dynamically
    adjusting ``symprec`` if needs be.

    Note that by default, magnetic symmetry (i.e. MAGMOMs) are not used in
    symmetry analysis in ``doped``, as noise in these values (particularly in
    structures from the Materials Project) often leads to incorrect symmetry
    determinations. To use magnetic moments in symmetry analyses, set the
    environment variable ``USE_MAGNETIC_SYMMETRY=1`` (i.e.
    ``os.environ["USE_MAGNETIC_SYMMETRY"] = "1"`` in Python).

    Args:
        struct (|Structure|):
            The input structure.
        symprec (float):
            The symmetry precision to use (default: 0.01).

    Returns:
        SpacegroupAnalyzer: The symmetry analyzer object.
    """
    return _get_sga(struct, symprec=symprec, return_symprec=False)




[docs]
def get_sga_and_symprec(struct: Structure, symprec: float = 0.01) -> tuple[SpacegroupAnalyzer, float]:
    """
    Get a ``SpacegroupAnalyzer`` object of the input structure, dynamically
    adjusting ``symprec`` if needs be, and the final successful ``symprec``
    used for ``SpacegroupAnalyzer`` initialisation.

    Note that by default, magnetic symmetry (i.e. MAGMOMs) are not used in
    symmetry analysis in ``doped``, as noise in these values (particularly in
    structures from the Materials Project) often leads to incorrect symmetry
    determinations. To use magnetic moments in symmetry analyses, set the
    environment variable ``USE_MAGNETIC_SYMMETRY=1`` (i.e.
    ``os.environ["USE_MAGNETIC_SYMMETRY"] = "1"`` in Python).

    Args:
        struct (|Structure|):
            The input structure.
        symprec (float):
            The symmetry precision to use (default: 0.01).

    Returns:
        tuple[SpacegroupAnalyzer, float]:
            Tuple of the ``SpacegroupAnalyzer`` object and the final
            ``symprec`` used.
    """
    return _get_sga(struct, symprec=symprec, return_symprec=True)



def _get_sga(
    struct: Structure, symprec: float = 0.01, return_symprec: bool = False
) -> SpacegroupAnalyzer | tuple[SpacegroupAnalyzer, float]:
    return _cache_ready_get_sga(
        struct,
        symprec=symprec,
        return_symprec=return_symprec,
        use_magnetic_symmetry=(os.environ.get("USE_MAGNETIC_SYMMETRY", "0") == "1"),  # default no mag symm
    )


@lru_cache(maxsize=int(1e3))
def _cache_ready_get_sga(
    struct: Structure,
    symprec: float = 0.01,
    return_symprec: bool = False,
    use_magnetic_symmetry: bool = False,
) -> SpacegroupAnalyzer | tuple[SpacegroupAnalyzer, float]:
    """
    ``get_sga`` code, with hashable input arguments for caching (using
    |Structure| hash function from ``doped.utils.efficiency``).
    """
    if not use_magnetic_symmetry:  # don't use magnetic symmetry by default
        struct = struct.copy()
        for site in struct:
            site.properties = {}

    sga = None
    trial_symprecs = [symprec, 0.1, 0.001, 1, 0.0001]
    spg_2pt7 = False
    symm_error_types: tuple[type[Exception], ...] = (SymmetryUndeterminedError, ValueError)
    with contextlib.suppress(AttributeError):  # introduced with spglib 2.7.0, can remove once spglib
        symm_error_types += (spglib.SpglibError,)  # (indirect) requirement is >= 2.7
        spg_2pt7 = True

    for trial_symprec in trial_symprecs:
        try:  # if symmetry determination fails, increase symprec first, then decrease, then criss-cross
            sga = SpacegroupAnalyzer(struct, symprec=trial_symprec)
            # check symmetry determination, sometimes SpacegroupAnalyzer initialises but methods fail:
            _detected_symmetry = sga._get_symmetry()
            return (sga, trial_symprec) if return_symprec else sga
        except symm_error_types as latest_symm_error:
            symm_error = latest_symm_error  # save before auto-deleted at end of except block
            continue

    raise SymmetryUndeterminedError(
        "Could not determine symmetry of input structure!"
        + (f"Got spglib error: {spglib.get_error_message()}" if not spg_2pt7 else "")
    ) from symm_error



[docs]
def apply_symm_op_to_site(
    symm_op: SymmOp,
    site: PeriodicSite,
    fractional: bool = False,
    rotate_lattice: Lattice | bool = True,
    just_unit_cell_frac_coords: bool = False,
) -> PeriodicSite:
    """
    Apply the given symmetry operation to the input site (**not in place**) and
    return the new site.

    By default, also rotates the lattice accordingly. If you want to apply the
    symmetry operation but keep the same lattice definition, set
    ``rotate_lattice=False``.

    Args:
        symm_op (SymmOp):
            ``pymatgen`` ``SymmOp`` object.
        site (|PeriodicSite|):
            ``pymatgen`` |PeriodicSite| object.
        fractional (bool):
            If the ``SymmOp`` is in fractional or Cartesian (default)
            coordinates (i.e. to apply to ``site.frac_coords`` or
            ``site.coords``). Default: False
        rotate_lattice (|Lattice| | bool):
            Either a ``pymatgen`` |Lattice| object (to use as the new lattice
            basis of the transformed site, which can be provided to reduce
            computation time when looping) or ``True/False``. If ``True``
            (default), the ``SymmOp`` rotation matrix will be applied to the
            input site lattice, or if ``False``, the original lattice will be
            retained.
        just_unit_cell_frac_coords (bool):
            If ``True``, just returns the `fractional coordinates` of the
            transformed site (rather than the site itself), within the unit
            cell. Default: False

    Returns:
        PeriodicSite:
            Site with the symmetry operation applied.
    """
    if isinstance(rotate_lattice, Lattice):
        rotated_lattice = rotate_lattice
    else:
        if rotate_lattice:
            if fractional:
                rotated_lattice = Lattice(np.dot(symm_op.rotation_matrix, site.lattice.matrix))
            else:
                rotated_lattice = Lattice(
                    [symm_op.apply_rotation_only(row) for row in site.lattice.matrix]
                )
        else:
            rotated_lattice = site.lattice

    if fractional:  # operate in **original** lattice, then convert to new lattice
        frac_coords = symm_op.operate(site.frac_coords)
        new_coords = site.lattice.get_cartesian_coords(frac_coords)
    else:
        new_coords = symm_op.operate(site.coords)

    if just_unit_cell_frac_coords:
        rotated_frac_coords = rotated_lattice.get_fractional_coords(new_coords)
        return np.array(
            [
                np.mod(f, 1) if p else f
                for p, f in zip(rotated_lattice.pbc, rotated_frac_coords, strict=False)
            ]
        )

    return PeriodicSite(
        site.species,
        new_coords,
        rotated_lattice,
        coords_are_cartesian=True,
        properties=site.properties,
        skip_checks=True,
        label=site._label,
    )




[docs]
def apply_symm_op_to_struct(
    symm_op: SymmOp, struct: Structure, fractional: bool = False, rotate_lattice: bool = True
) -> Structure:
    """
    Apply a symmetry operation to a structure and return the new structure.

    This differs from pymatgen's ``apply_operation`` method in that it **does
    not apply the operation in place as well (i.e. does not modify the input
    structure)**, which avoids the use of unnecessary and slow
    ``Structure.copy()`` calls, making the structure manipulation / symmetry
    analysis functions more efficient. Also fixes an issue when applying
    fractional symmetry operations.

    By default, also rotates the lattice accordingly. If you want to apply the
    symmetry operation to the sites but keep the same lattice definition, set
    ``rotate_lattice=False``.

    Args:
        symm_op:
            ``pymatgen`` ``SymmOp`` object.
        struct:
            ``pymatgen`` |Structure| object.
        fractional:
            If the ``SymmOp`` is in fractional or Cartesian (default)
            coordinates (i.e. to apply to ``site.frac_coords`` or
            ``site.coords``). Default: False
        rotate_lattice:
            If the lattice of the input structure should be rotated according
            to the symmetry operation. Default: True.

    Returns:
        Structure:
            |Structure| with the symmetry operation applied.
    """
    # using modified version of ``pymatgen``\'s ``apply_operation`` method:
    if rotate_lattice:
        if not fractional:
            rotated_lattice = Lattice([symm_op.apply_rotation_only(row) for row in struct._lattice.matrix])
        else:
            rotated_lattice = Lattice(np.dot(symm_op.rotation_matrix, struct._lattice.matrix))
    else:
        rotated_lattice = struct._lattice

    # note could also use ``SymmOp.operate_multi`` for speedup if ever necessary, but requires some more
    # accounting of species ordering etc, and this isn't an efficiency bottleneck currently
    return Structure.from_sites(
        [
            apply_symm_op_to_site(symm_op, site, fractional=fractional, rotate_lattice=rotated_lattice)
            for site in struct
        ]
    )




[docs]
def summed_dist(
    struct_a: Structure, struct_b: Structure, ignored_species: list[str] | None = None
) -> float:
    """
    Get the summed distance between closest-matched sites of two structures, in
    Å.

    Note that this assumes the lattices of the two structures are equal!

    Args:
        struct_a: ``pymatgen`` |Structure| object.
        struct_b: ``pymatgen`` |Structure| object.
        ignored_species:
            List of species to ignore when calculating the RMS distance
            (default: None).

    Returns:
        float:
            The summed distance between the sites of the two structures, in Å.
    """
    # orders of magnitude faster than StructureMatcher.get_rms_dist() from pymatgen
    # (though this assumes lattices are equal)
    # set threshold to a large number to avoid possible site-matching warnings
    return np.array(
        get_site_mappings(
            struct_a,
            struct_b,
            threshold=1e10,
            ignored_species=ignored_species,
        )
    )[:, 0].sum()




[docs]
def get_distance_matrix(fcoords: ArrayLike, lattice: Lattice) -> np.ndarray:
    """
    Get a matrix of the distances between the input fractional coordinates in
    the input lattice.

    Args:
        fcoords (ArrayLike):
            Fractional coordinates to get distances between.
        lattice (|Lattice|):
            Lattice for the fractional coordinates.

    Returns:
        np.ndarray:
            Matrix of distances between the input fractional coordinates in the
            input lattice.
    """
    # tuple-ify for caching:
    return _get_distance_matrix(tuple(tuple(row) for row in np.asarray(fcoords)), lattice)



@lru_cache(maxsize=int(1e2))
def _get_distance_matrix(fcoords: tuple[tuple, ...], lattice: Lattice):
    """
    Get a matrix of the distances between the input fractional coordinates in
    the input lattice.

    This function requires the input fcoords to be given as tuples, to allow
    hashing and caching for efficiency.
    """
    # copy() to help avoid mutability issues with cached outputs:
    dist_matrix = np.array(lattice.get_all_distances(fcoords, fcoords).copy())
    return (dist_matrix + dist_matrix.T) / 2



[docs]
def cluster_coords(
    fcoords: ArrayLike,
    structure: Structure | Lattice,
    dist_tol: float = 0.01,
    method: str = "single",
    criterion: str = "distance",
) -> np.ndarray:
    """
    Cluster fractional coordinates based on their distances (using ``scipy``
    functions) and return the cluster numbers (as an array matching the shape
    and order of ``fcoords``).

    ``method`` chooses the clustering algorithm to use with ``linkage()``
    (``"single"`` by default, matching the ``scipy`` default), along with a
    ``dist_tol`` distance tolerance in Å. ``"single"`` corresponds to the
    Nearest Point algorithm and is the recommended choice for ``method`` when
    ``dist_tol`` is small, but can be sensitive to how many fractional
    coordinates are included in ``fcoords`` (allowing for daisy-chaining of
    sites to give large spaced-out clusters), while ``"average"`` or
    ``"complete"`` (furthest point algorithm) are good choices to avoid this
    issue. ``"centroid"``/``"median"``/``"ward"`` should not be used for
    ``method`` as they assume a flat Euclidean space, which is violated with
    PBC distances.

    See the ``scipy`` API docs for more info.

    Args:
        fcoords (ArrayLike):
            Fractional coordinates to cluster.
        structure (|Structure| | |Lattice|):
            |Structure| or |Lattice| to which the fractional coordinates
            correspond.
        dist_tol (float):
            Distance tolerance for clustering, in Å (default: 0.01). For the
            most part, fractional coordinates with distances less than this
            tolerance will be clustered together (when ``method = "single"``,
            giving the Nearest Point algorithm, as is the default).
        method (str):
            Clustering algorithm to use with ``linkage()``. Default is
            ``"single"`` (recommended for small ``dist_tol``), while
            ``"average"`` or ``"complete"`` are recommended with medium/large
            ``dist_tol`` (e.g. for candidate interstitial site clustering or
            defect site clustering (for determining defect site competition)).
            ``"centroid"``/``"median"``/``"ward"`` should not be used for
            as they assume a flat Euclidean space, which is violated with PBC
            distances.
        criterion (str):
            Criterion to use for flattening hierarchical clusters from the
            linkage matrix, used with ``fcluster()``. Default: ``"distance"``.

    Returns:
        np.ndarray:
            Array of cluster numbers, matching the shape and order of
            ``fcoords`` (i.e. corresponding to the index/number of the cluster
            to which that fractional coordinate belongs).
    """
    fcoords = np.asarray(fcoords)
    if len(fcoords) == 1:  # only one input coordinate
        return np.array([0])

    lattice = structure if isinstance(structure, Lattice) else structure.lattice
    condensed_m = squareform(get_distance_matrix(fcoords, lattice), checks=False)
    z = linkage(condensed_m, method=method)
    # Note: with method = "single", the z distances are the minimum pairwise distance between any point in
    # one and any point in the other cluster (so two clusters should merge when any site in one is within
    # ``dist_tol`` of any site in the other, and kept separate when all points in one are >``dist_tol``
    # away from all points in the other), which of course can easily lead to daisy-chaining (for medium /
    # large dist_tol values).
    # With method = "complete", the z distances are instead the maximum pairwise distance between any point
    # in one and any point in the other cluster (so two clusters should merge only when _every_ site in one
    # is within ``dist_tol`` of _every_ site in the other cluster). Clusters are thus compact and bounded
    # -- the diameter of any cluster is guaranteed <= ``dist_tol``, with every pair of sites within a
    # cluster within ``dist_tol`` of each other.
    # With method = "average", the z distances are the mean pairwise distance across all point pairs (one
    # from each cluster), so two clusters should merge only when the average distance drops below
    # ``dist_tol``. This is a compromise; less chain-prone than "single", less outlier-sensitive than
    # "complete". Mean within-cluster distance is controlled, but individual pairs can exceed ``dist_tol``.
    return fcluster(z, dist_tol, criterion=criterion)




[docs]
def doped_cluster_frac_coords(
    fcoords: np.typing.ArrayLike,
    structure: Structure,
    tol: float = 0.55,
    symm_pref_dist_factor: float = 0.85,
    method: str = "average",
    criterion: str = "distance",
) -> np.ndarray:
    """
    Cluster fractional coordinates that are within a certain distance tolerance
    of each other, and return the cluster site.

    Modified from the ``pymatgen-analysis-defects``` function as follows:
    For each site cluster, the possible sites to choose from are the sites
    in the cluster `and` the cluster midpoint (average position). Of these
    sites, the site with the highest symmetry, and then largest ``min_dist``
    (distance to any host lattice site), is chosen -- if its ``min_dist`` is
    no more than ``symm_pref_dist_factor`` (0.85 by default) times the largest
    possible ``min_dist``. This is because we want to favour the higher
    symmetry interstitial sites (as these are typically the more intuitive
    sites for placement, cleaner, easier for analysis etc, and work well when
    combined with |ShakeNBreak| or other structure-searching techniques to
    account for symmetry-breaking), but also interstitials are often
    lowest-energy when furthest from host atoms (i.e. in the largest
    interstitial voids -- particularly for fully-ionised charge states), and so
    this approach tries to strike a balance between these two goals.

    In ``pymatgen-analysis-defects``, the average cluster position is used,
    which breaks symmetries and is less easy to manipulate in the following
    interstitial generation functions. ``pymatgen-analysis-defects`` also uses
    the default ``"single"`` method for site clustering, which can lead to
    large unwanted daisy-chaining effects, unintentionally grouping
    interstitials with distances far larger than ``tol``.

    Args:
        fcoords (ArrayLike):
            Fractional coordinates of points to cluster.
        structure (|Structure|):
            The host structure.
        tol (float):
            Distance tolerance for clustering Voronoi nodes. Default is 0.55 Å.
        symm_pref_dist_factor (float):
            Minimum acceptable ratio of distance to host atoms for
            symmetry-favoured sites vs distance-to-host-favoured sites, for
            which to prefer symmetry-favoured sites. Default is 0.85.
        method (str):
            Clustering algorithm to use with ``linkage()``. Default is
            ``"average"``, which is typically better than the ``scipy`` default
            of ``"single`` for interstitial generation, as it avoids
            unintentional daisy-chaining effects. Another reasonable choice is
            ``"complete"``, which ensures that no two sites in a given cluster
            are more than ``tol`` apart. See the docstrings and source code of
            :func:`~doped.utils.symmetry.cluster_coords` for more details.
            ``"centroid"``/``"median"``/``"ward"`` should not be used for
            as they assume a flat Euclidean space, which is violated with PBC
            distances.
        criterion (str):
            Criterion to use for flattening hierarchical clusters from the
            linkage matrix, used with ``fcluster()`` Default is ``"distance"``.

    Returns:
        np.ndarray: Clustered fractional coordinates.
    """
    fcoords = np.asarray(fcoords)
    if len(fcoords) == 0:
        return np.array([])
    if len(fcoords) == 1:
        return _vectorized_custom_round(np.mod(_vectorized_custom_round(fcoords, 5), 1), 4)  # to unit cell

    lattice = structure.lattice
    cn = cluster_coords(fcoords, structure, dist_tol=tol, method=method, criterion=criterion)
    unique_fcoords = []

    # cn is an array of cluster numbers, of length ``len(fcoords)``, so we take the set of cluster numbers
    # ``n``, use ``np.where(cn == n)[0]`` to get the indices of ``cn`` / ``fcoords`` which are in cluster
    # ``n``, and then decide which coordinates to take as the cluster site based on symmetry and distance:
    for n in set(cn):
        frac_coords = []
        for i, j in enumerate(np.where(cn == n)[0]):
            if i == 0:
                frac_coords.append(fcoords[j])
            else:
                fcoord = fcoords[j]  # We need the image to combine the frac_coords properly:
                _d, image = lattice.get_distance_and_image(frac_coords[0], fcoord)
                frac_coords.append(fcoord + image)

        frac_coords.append(np.average(frac_coords, axis=0))  # midpoint of cluster
        frac_coords_scores = {
            tuple(x): (
                -group_order_from_schoenflies(
                    point_symmetry_from_site(x, structure)
                ),  # higher order = higher symmetry
                -np.min(lattice.get_all_distances(x, structure.frac_coords), axis=1),
                *_frac_coords_sort_func(x),
            )
            for x in frac_coords
        }
        symmetry_favoured_site = sorted(frac_coords_scores.items(), key=lambda x: x[1])[0][0]
        dist_favoured_site = sorted(
            frac_coords_scores.items(), key=lambda x: (x[1][1], x[1][0], *x[1][2:])
        )[0][0]

        if (
            np.min(lattice.get_all_distances(symmetry_favoured_site, structure.frac_coords), axis=1)
            / np.min(lattice.get_all_distances(dist_favoured_site, structure.frac_coords), axis=1)
        ) < symm_pref_dist_factor:
            unique_fcoords.append(dist_favoured_site)
        else:  # prefer symmetry over distance if difference is sufficiently small
            unique_fcoords.append(symmetry_favoured_site)

    return _vectorized_custom_round(
        np.mod(_vectorized_custom_round(unique_fcoords, 5), 1), 4
    )  # to unit cell




[docs]
def get_all_equiv_sites(
    frac_coords: ArrayLike,
    structure: Structure,
    symprec: float = 0.01,
    dist_tol_factor: float = 1.0,
    species: str = "X",
    just_frac_coords: bool = False,
    return_symprec_and_dist_tol_factor: bool = False,
    fixed_symprec_and_dist_tol_factor: bool = False,
    verbose: bool = False,
) -> list[PeriodicSite | np.ndarray] | tuple[list[PeriodicSite | np.ndarray], float, float]:
    """
    Get a list of all equivalent sites of the input fractional coordinates in
    ``structure``.

    Tries to use hashing and caching to accelerate if possible.

    Args:
        frac_coords (ArrayLike):
            Fractional coordinates to get equivalent sites of.
        structure (|Structure|):
            |Structure| to use for the lattice, to which the fractional
            coordinates correspond, and for determining symmetry operations
            if not provided.
        symprec (float):
            Symmetry precision to use for determining symmetry operations.
            Default is 0.01. If ``fixed_symprec_and_dist_tol_factor`` is
            ``False`` (default), this value will be automatically adjusted (up
            to 10x, down to 0.1x) until the identified equivalent sites from
            ``spglib`` have consistent point group symmetries. Setting
            ``verbose`` to ``True`` will print information on the trialled
            ``symprec`` (and ``dist_tol_factor`` values), and setting
            ``return_symprec_and_dist_tol_factor`` to ``True`` will return the
            final ``symprec`` (and ``dist_tol_factor``) used for the equivalent
            site generation.
        dist_tol_factor (float):
            Distance tolerance for clustering generated sites (to ensure they
            are truly distinct), as a multiplicative factor of ``symprec``.
            Default is 1.0 (i.e. ``dist_tol = symprec``, in Å). If
            ``fixed_symprec_and_dist_tol_factor`` is ``False`` (default), this
            value will also be automatically adjusted if necessary (up to 10x,
            down to 0.1x)(after ``symprec`` adjustments) until the identified
            equivalent sites from ``spglib`` have consistent point group
            symmetries. Setting ``verbose`` to ``True`` will print information
            on the trialled ``dist_tol_factor`` (and ``symprec``) values, and
            setting ``return_symprec_and_dist_tol_factor`` to ``True`` will
            return the final ``symprec`` (and ``dist_tol_factor``) used for
            the equivalent site generation.
        species (str):
            Species to use for the equivalent sites (default: "X").
        just_frac_coords (bool):
            If ``True``, just returns the fractional coordinates of the
            equivalent sites (rather than ``pymatgen`` |PeriodicSite|
            objects). Default: False.
        return_symprec_and_dist_tol_factor (bool):
            If ``True``, returns the final symmetry precision and distance
            tolerance factor used for the equivalent site generation (see
            ``symprec`` and ``dist_tol_factor`` argument descriptions). Default
            is ``False``.
        fixed_symprec_and_dist_tol_factor (bool):
            If ``True``, uses the provided ``symprec`` and ``dist_tol_factor``
            values without any automatic adjustments (see ``symprec`` and
            ``dist_tol_factor`` argument descriptions). Default is ``False``.
        verbose (bool):
            If ``True``, prints information on the trialled ``symprec`` and
            ``dist_tol_factor`` values, and the identified equivalent sites.
            Default is ``False``.

    Returns:
        list[PeriodicSite | np.ndarray]:
            List of equivalent sites of the input fractional coordinates in
            ``structure``, either as ``pymatgen`` |PeriodicSite| objects or
            as fractional coordinates (depending on the value of
            ``just_frac_coords``).

        If ``return_symprec_and_dist_tol_factor`` is ``True`` (default is
        ``False``), also returns the final ``symprec`` and ``dist_tol_factor``
        values used for the equivalent site generation.
    """
    try:
        return _cache_ready_get_all_equiv_sites(
            tuple(cast("Sequence", frac_coords)),
            structure,
            symprec,
            dist_tol_factor,
            species,
            just_frac_coords,
            return_symprec_and_dist_tol_factor,
            fixed_symprec_and_dist_tol_factor,
            verbose,
        )
    except TypeError:  # issue with hashing (possibly due to ``species`` choice), use raw function
        return _raw_get_all_equiv_sites(
            frac_coords,
            structure,
            symprec,
            dist_tol_factor,
            species,
            just_frac_coords,
            return_symprec_and_dist_tol_factor,
            fixed_symprec_and_dist_tol_factor,
            verbose,
        )



@lru_cache(maxsize=int(1e3))
def _cache_ready_get_all_equiv_sites(
    frac_coords: tuple,
    structure: Structure,
    symprec: float = 0.01,
    dist_tol_factor: float = 1.0,
    species: str = "X",
    just_frac_coords: bool = False,
    return_symprec_and_dist_tol_factor: bool = False,
    fixed_symprec_and_dist_tol_factor: bool = False,
    verbose: bool = False,
) -> list[PeriodicSite | np.ndarray] | tuple[list[PeriodicSite | np.ndarray], float, float]:
    output = _raw_get_all_equiv_sites(
        frac_coords,
        structure,
        symprec,
        dist_tol_factor,
        species,
        just_frac_coords,
        return_symprec_and_dist_tol_factor,
        fixed_symprec_and_dist_tol_factor,
        verbose,
    )
    # copy() to help avoid mutability issues with cached outputs:
    if not return_symprec_and_dist_tol_factor:
        assert isinstance(output, list)
        return output.copy()

    assert isinstance(output, tuple)  # typing
    return output[0].copy(), output[1], output[2]


_TRIAL_SYMPREC_DIST_TOL_FACTORS = np.array([1, 1.05, 0.95, 1.1, 0.9, 1.2, 0.8, 1.5, 0.75, 2, 0.5, 10, 0.1])


def _raw_get_all_equiv_sites(
    frac_coords: ArrayLike,
    structure: Structure,
    symprec: float = 0.01,
    dist_tol_factor: float = 1.0,
    species: str = "X",
    just_frac_coords: bool = False,
    return_symprec_and_dist_tol_factor: bool = False,
    fixed_symprec_and_dist_tol_factor: bool = False,
    verbose: bool = False,
) -> list[PeriodicSite | np.ndarray] | tuple[list[PeriodicSite | np.ndarray], float, float]:
    # ensure sites have the same property keys, otherwise can cause issues with pymatgen primitive
    # structure determination:
    if (
        "magmom" in structure.site_properties
    ):  # if species matches those in structure, and all the same, then use the
        # same magmom, otherwise remove magmom from properties
        matching_sites = [site for site in structure if site.species_string == str(species)]
        if matching_sites and np.std([site.properties["magmom"] for site in matching_sites]) < 0.1:
            properties = {"magmom": next(site.properties.get("magmom", 0) for site in matching_sites)}
        else:
            properties = {"magmom": 0}
    else:
        properties = {}

    def _get_equiv_sites_with_given_symprec(
        symprec: float,
        dist_tol_factor: float,
        just_frac_coords: bool = False,
    ):
        dist_tol = dist_tol_factor * symprec  # distance tolerance for clustering sites
        sga, symprec = get_sga_and_symprec(structure, symprec=symprec)
        symm_ops = sga.get_symmetry_operations()  # fractional symm_ops by default

        dummy_site = PeriodicSite(species, frac_coords, structure.lattice, properties=properties)
        x_sites = [
            apply_symm_op_to_site(
                symm_op,
                dummy_site,
                fractional=True,
                rotate_lattice=structure.lattice,  # same lattice, just want transformed frac coords
                just_unit_cell_frac_coords=just_frac_coords,
            )
            for symm_op in symm_ops
        ]
        if not just_frac_coords:
            for site in x_sites:
                site.to_unit_cell(in_place=True)  # faster with in_place

        return cluster_sites_by_dist_tol(x_sites, structure, dist_tol=dist_tol)

    if fixed_symprec_and_dist_tol_factor:
        equiv_sites = _get_equiv_sites_with_given_symprec(
            symprec, dist_tol_factor, just_frac_coords=just_frac_coords
        )
        return (
            (equiv_sites, symprec, dist_tol_factor) if return_symprec_and_dist_tol_factor else equiv_sites
        )

    # the choice of equivalent sites should give consistent site symmetries for each equivalent site (using
    # the same ``symprec`` as for generation), however this is sometimes not the case (due to small
    # numerical noise / ``dist_tol`` choices etc), so check that the site symmetries (according to
    # ``symprec``) are self-consistent, and adjust ``symprec`` if not:
    trial_symprecs = _TRIAL_SYMPREC_DIST_TOL_FACTORS * symprec
    trial_dist_tol_factors = _TRIAL_SYMPREC_DIST_TOL_FACTORS * dist_tol_factor
    for trial_dist_tol_factor, trial_symprec in product(trial_dist_tol_factors, trial_symprecs):
        equiv_sites = _get_equiv_sites_with_given_symprec(
            trial_symprec, trial_dist_tol_factor, just_frac_coords=False
        )
        struct_with_all_X = _get_struct_with_all_X(structure, equiv_sites)
        sga_with_all_X = get_sga(struct_with_all_X, symprec=trial_symprec)
        if len(set(sga_with_all_X.get_symmetry_dataset().site_symmetry_symbols[-len(equiv_sites) :])) == 1:
            symprec = trial_symprec
            dist_tol_factor = trial_dist_tol_factor
            equiv_sites = [site.frac_coords for site in equiv_sites] if just_frac_coords else equiv_sites
            if verbose:
                print(
                    f"Equivalent site generation succeeded (with consistent site symmetries) with symprec "
                    f"= {symprec} & dist_tol_factor = {dist_tol_factor}, giving "
                    f"{len(equiv_sites)} equivalent sites in the input structure."
                )
            break

        if verbose:
            print(
                f"Equivalent site generation failed with symprec = {trial_symprec} & dist_tol_factor "
                f"= {trial_dist_tol_factor}, giving {len(equiv_sites)} equivalent sites in the input "
                f"structure."
            )

    return (equiv_sites, symprec, dist_tol_factor) if return_symprec_and_dist_tol_factor else equiv_sites



[docs]
def cluster_sites_by_dist_tol(
    sites: Iterable[PeriodicSite | np.ndarray],
    structure: Structure | Lattice,
    dist_tol: float = 0.01,
    method: str = "single",
    criterion: str = "distance",
) -> list[PeriodicSite | np.ndarray]:
    r"""
    Cluster sites based on their distances (using ``cluster_coords``).

    Args:
        sites (Iterable[|PeriodicSite| | np.ndarray]):
            Sites to cluster, as an iterable of |PeriodicSite| objects or
            fractional coordinates.
        structure (|Structure| | |Lattice|):
            |Structure| or |Lattice| to which the sites correspond.
        dist_tol (float):
            Distance tolerance for clustering, in Å (default: 0.01).
        method (str):
            Clustering algorithm to use with ``scipy``\'s ``linkage()``
            clustering function in ``cluster_coords``. Default is ``"single"``,
            which is the ``scipy`` default and is typically recommended when
            ``dist_tol`` is small. See the docstrings and source code of
            :func:`~doped.utils.symmetry.cluster_coords` for more details.
        criterion (str):
            Criterion to use for flattening hierarchical clusters from the
            linkage matrix, used with ``fcluster()``. Default: ``"distance"``.

    Returns:
        list[PeriodicSite | np.ndarray]:
            List of clustered sites, as |PeriodicSite| objects or fractional
            coordinates depending on the input ``sites`` type.
    """
    dist_precision_num_places = _get_num_places_for_dist_precision(structure, dist_tol)
    just_frac_coords = not hasattr(next(iter(sites)), "frac_coords")
    sites = list(sites)  # needs to be indexable for reducing to unique sites below
    all_frac_coords = [
        tuple(np.round(i, dist_precision_num_places))
        for i in (
            sites if just_frac_coords else [cast("PeriodicSite", site).frac_coords for site in sites]
        )
    ]
    unique_frac_coords, unique_indices = np.unique(all_frac_coords, axis=0, return_index=True)
    unique_sites = [sites[i] for i in unique_indices]

    cn = cluster_coords(
        unique_frac_coords, structure, dist_tol=dist_tol, method=method, criterion=criterion
    )
    # cn is an array of cluster numbers, of length ``len(unique_frac_coords)``, so we take the set of
    # cluster numbers ``n``, use ``np.where(cn == n)[0]`` to get the indices of ``cn`` /
    # ``unique_frac_coords`` which are in cluster ``n``, and then take the first of each cluster
    # (because here these should be basically the same sites just with possibly small numerical
    # differences due to symmetry operations, unlike when ``cluster_coords`` is used for Voronoi
    # interstitial generation, where we choose the cluster site based on symmetry/distance to host)
    return [unique_sites[np.where(cn == n)[0][0]] for n in set(cn)]  # take 1st of each cluster




[docs]
def get_min_dist_between_equiv_sites(
    site_1: PeriodicSite | Sequence[float] | Defect | DefectEntry,
    site_2: PeriodicSite | Sequence[float] | Defect | DefectEntry,
    structure: Structure | None = None,
    symprec: float = 0.01,
    dist_tol_factor: float = 1.0,
    return_symprec_and_dist_tol_factor: bool = False,
    fixed_symprec_and_dist_tol_factor: bool = False,
    verbose: bool = False,
) -> float | tuple[float, float, float]:
    """
    Get the minimum distance (in Å) between equivalent sites of two input
    site/|Defect|/|DefectEntry| objects in a structure.

    Args:
        site_1 (|PeriodicSite| | Sequence[float, float, float] | |Defect| | |DefectEntry|):
            First site to get equivalent sites of, to determine minimum
            distance to equivalent sites of ``site_2``. Can be a
            |PeriodicSite| object, a sequence of fractional coordinates, or a
            |Defect|/|DefectEntry| object.
        site_2 (|PeriodicSite| | Sequence[float, float, float] | |Defect| | |DefectEntry|):
            Second site to get equivalent sites of, to determine minimum
            distance to equivalent sites of ``site_1``. Can be a
            |PeriodicSite| object, a sequence of fractional coordinates, or a
            |Defect|/|DefectEntry| object.
        structure (|Structure|):
            |Structure| to use for determining symmetry-equivalent sites of
            ``site_1`` and ``site_2``. Required if ``site_1`` and ``site_2``
            are not |Defect| or |DefectEntry| objects. Default: None.
        symprec (float):
            Symmetry precision to use for determining symmetry operations.
            Default is 0.01. If ``fixed_symprec_and_dist_tol_factor`` is
            ``False`` (default), this value will be automatically adjusted (up
            to 10x, down to 0.1x) until the identified equivalent sites from
            ``spglib`` have consistent point group symmetries. Setting
            ``verbose`` to ``True`` will print information on the trialled
            ``symprec`` (and ``dist_tol_factor`` values), and setting
            ``return_symprec_and_dist_tol_factor`` to ``True`` will return the
            final ``symprec`` (and ``dist_tol_factor``) used for the equivalent
            site generation.
        dist_tol_factor (float):
            Distance tolerance for clustering generated sites (to ensure they
            are truly distinct), as a multiplicative factor of ``symprec``.
            Default is 1.0 (i.e. ``dist_tol = symprec``, in Å). If
            ``fixed_symprec_and_dist_tol_factor`` is ``False`` (default), this
            value will also be automatically adjusted if necessary (up to 10x,
            down to 0.1x)(after ``symprec`` adjustments) until the identified
            equivalent sites from ``spglib`` have consistent point group
            symmetries. Setting ``verbose`` to ``True`` will print information
            on the trialled ``dist_tol_factor`` (and ``symprec``) values, and
            setting ``return_symprec_and_dist_tol_factor`` to ``True`` will
            return the final ``symprec`` (and ``dist_tol_factor``) used for
            the equivalent site generation.
        return_symprec_and_dist_tol_factor (bool):
            If ``True``, returns the final symmetry precision and distance
            tolerance factor used for the equivalent site generation (see
            ``symprec`` and ``dist_tol_factor`` argument descriptions). Default
            is ``False``.
        fixed_symprec_and_dist_tol_factor (bool):
            If ``True``, uses the provided ``symprec`` and ``dist_tol_factor``
            values without any automatic adjustments (see ``symprec`` and
            ``dist_tol_factor`` argument descriptions). Default is ``False``.
        verbose (bool):
            If ``True``, prints information on the trialled ``symprec`` and
            ``dist_tol_factor`` values, and the identified equivalent sites.
            Default is ``False``.

    Returns:
        float | tuple[float, float, float]:
            Minimum distance (in Å) between equivalent sites of ``site_1``
            and ``site_2``, or a tuple of  (minimum distance, ``symprec``,
            ``dist_tol_factor``) if ``return_symprec_and_dist_tol_factor`` is
            ``True``.
    """
    if structure is None:
        for site in [site_2, site_1]:  # if both ``DefectEntry``s/``Defect``s, take structure from site_1
            if isinstance(site, DefectEntry):
                structure = site.defect.structure
            elif isinstance(site, Defect):
                structure = site.structure
    if structure is None:
        raise ValueError(
            "Structure must be provided if site_1 and site_2 are not DefectEntry or Defect objects."
        )

    def _parse_site_to_PeriodicSite(site):
        if isinstance(site, DefectEntry):
            return site.defect.site
        if isinstance(site, Defect):
            return site.site
        if isinstance(site, PeriodicSite):
            return site
        return None  # frac coords provided, not site

    def _parse_site_to_frac_coords(site):
        if periodic_site := _parse_site_to_PeriodicSite(site):
            return periodic_site.frac_coords
        return site  # otherwise ``site`` should be frac coords

    primitive = get_primitive_structure(structure)

    def _get_equiv_fcoords_symprec_and_dist_tol(site, symprec=symprec, dist_tol_factor=dist_tol_factor):
        frac_coords = _parse_site_to_frac_coords(site)
        equiv_fcoords, symprec, dist_tol_factor = get_equiv_frac_coords_in_primitive(
            frac_coords,
            primitive,
            structure,
            symprec=symprec,
            dist_tol_factor=dist_tol_factor,
            return_symprec_and_dist_tol_factor=True,
            fixed_symprec_and_dist_tol_factor=fixed_symprec_and_dist_tol_factor,
            verbose=verbose,
        )

        return equiv_fcoords, symprec, dist_tol_factor

    equiv_fcoords_1, symprec, dist_tol_factor = _get_equiv_fcoords_symprec_and_dist_tol(site_1)
    equiv_fcoords_2, symprec, dist_tol_factor = _get_equiv_fcoords_symprec_and_dist_tol(site_2)

    min_dist = np.min(primitive.lattice.get_all_distances(equiv_fcoords_1, equiv_fcoords_2))
    return (min_dist, symprec, dist_tol_factor) if return_symprec_and_dist_tol_factor else min_dist



def _get_symm_dataset_of_struct_with_all_equiv_sites(
    frac_coords: ArrayLike,
    struct: Structure,
    symprec: float = 0.01,
    dist_tol_factor: float = 1.0,
    species: str = "X",
    return_symprec_and_dist_tol_factor: bool = False,
    fixed_symprec_and_dist_tol_factor: bool = False,
    verbose: bool = False,
):
    """
    Get the symmetry dataset of a ``SpacegroupAnalyzer`` object of a structure
    with all equivalent sites of the input fractional coordinates added to
    ``struct``, and also returning the list of unique equivalent sites.

    Tries to use hashing and caching to accelerate if possible.

    Returns:
        tuple[SpacegroupDataset, list[PeriodicSite], float, float]:
            Symmetry dataset of the structure with all equivalent sites of
            ``frac_coords`` added, the list of unique equivalent sites, and
            if ``return_symprec_and_dist_tol_factor`` is ``True``, the final
            ``symprec`` and ``dist_tol_factor`` used for the equivalent site
            generation.
    """
    try:
        return _cache_ready_get_symm_dataset_of_struct_with_all_equiv_sites(
            tuple(cast("Sequence", frac_coords)),
            struct,
            symprec,
            dist_tol_factor,
            species,
            return_symprec_and_dist_tol_factor,
            fixed_symprec_and_dist_tol_factor,
            verbose,
        )
    except TypeError:  # issue with hashing (possibly due to ``species`` choice), use raw function
        return _raw_get_symm_dataset_of_struct_with_all_equiv_sites(
            frac_coords,
            struct,
            symprec,
            dist_tol_factor,
            species,
            return_symprec_and_dist_tol_factor,
            fixed_symprec_and_dist_tol_factor,
            verbose,
        )


@lru_cache(maxsize=int(1e3))
def _cache_ready_get_symm_dataset_of_struct_with_all_equiv_sites(
    frac_coords: tuple,
    struct: Structure,
    symprec: float = 0.01,
    dist_tol_factor: float = 1.0,
    species: str = "X",
    return_symprec_and_dist_tol_factor: bool = False,
    fixed_symprec_and_dist_tol_factor: bool = False,
    verbose: bool = False,
):
    return _raw_get_symm_dataset_of_struct_with_all_equiv_sites(
        frac_coords,
        struct,
        symprec,
        dist_tol_factor,
        species,
        return_symprec_and_dist_tol_factor,
        fixed_symprec_and_dist_tol_factor,
        verbose,
    )


def _raw_get_symm_dataset_of_struct_with_all_equiv_sites(
    frac_coords: ArrayLike,
    struct: Structure,
    symprec: float = 0.01,
    dist_tol_factor: float = 1.0,
    species: str = "X",
    return_symprec_and_dist_tol_factor: bool = False,
    fixed_symprec_and_dist_tol_factor: bool = False,
    verbose: bool = False,
):
    equiv_sites_output = get_all_equiv_sites(
        frac_coords,
        struct,
        symprec=symprec,
        dist_tol_factor=dist_tol_factor,
        species=species,
        return_symprec_and_dist_tol_factor=True,
        fixed_symprec_and_dist_tol_factor=fixed_symprec_and_dist_tol_factor,
        verbose=verbose,
    )
    assert isinstance(equiv_sites_output, tuple)  # return_symprec_and_dist_tol_factor = True
    unique_sites, symprec, dist_tol_factor = equiv_sites_output
    struct_with_all_X = _get_struct_with_all_X(struct, unique_sites)
    sga_with_all_X, symprec = get_sga_and_symprec(struct_with_all_X, symprec=symprec)
    return_tuple = (sga_with_all_X.get_symmetry_dataset(), unique_sites)
    return (
        (*return_tuple, symprec, dist_tol_factor) if return_symprec_and_dist_tol_factor else return_tuple
    )


def _get_struct_with_all_X(struct, unique_sites):
    """
    Add all sites in unique_sites to a ``copy`` of ``struct``, and return this
    new |Structure|.
    """
    struct_with_all_X = struct.copy()
    struct_with_all_X.sites += unique_sites
    return struct_with_all_X



[docs]
def get_equiv_frac_coords_in_primitive(
    frac_coords: ArrayLike,
    primitive: Structure,
    supercell: Structure,
    symprec: float = 0.01,
    dist_tol_factor: float = 1.0,
    equiv_coords: bool = True,
    return_symprec_and_dist_tol_factor: bool = False,
    fixed_symprec_and_dist_tol_factor: bool = False,
    verbose: bool = False,
) -> list[np.ndarray] | np.ndarray | tuple[list[np.ndarray] | np.ndarray, float, float] | None:
    """
    Get equivalent fractional coordinates of ``frac_coords`` (in ``supercell``)
    in the given ``primitive`` cell.

    Returns a list of equivalent fractional coords in the primitive cell if
    ``equiv_coords`` is ``True`` (default).

    Note that there may be multiple possible symmetry-equivalent sites, all of
    which are returned if ``equiv_coords`` is ``True``, otherwise the first
    site in the list (sorted using ``_frac_coords_sort_func``) is returned.

    Args:
        frac_coords (ArrayLike):
            Fractional coordinates in the supercell, for which to get
            equivalent coordinates in the primitive cell.
        primitive (|Structure|):
            Primitive cell structure.
        supercell (|Structure|):
            Supercell structure.
        symprec (float):
            Symmetry precision to use for determining symmetry operations.
            Default is 0.01. If ``fixed_symprec_and_dist_tol_factor`` is
            ``False`` (default), this value will be automatically adjusted (up
            to 10x, down to 0.1x) until the identified equivalent sites from
            ``spglib`` have consistent point group symmetries. Setting
            ``verbose`` to ``True`` will print information on the trialled
            ``symprec`` (and ``dist_tol_factor`` values), and setting
            ``return_symprec_and_dist_tol_factor`` to ``True`` will return the
            final ``symprec`` (and ``dist_tol_factor``) used for the equivalent
            site generation.
        dist_tol_factor (float):
            Distance tolerance for clustering generated sites (to ensure they
            are truly distinct), as a multiplicative factor of ``symprec``.
            Default is 1.0 (i.e. ``dist_tol = symprec``, in Å). If
            ``fixed_symprec_and_dist_tol_factor`` is ``False`` (default), this
            value will also be automatically adjusted if necessary (up to 10x,
            down to 0.1x)(after ``symprec`` adjustments) until the identified
            equivalent sites from ``spglib`` have consistent point group
            symmetries. Setting ``verbose`` to ``True`` will print information
            on the trialled ``dist_tol_factor`` (and ``symprec``) values, and
            setting ``return_symprec_and_dist_tol_factor`` to ``True`` will
            return the final ``symprec`` (and ``dist_tol_factor``) used for
            the equivalent site generation.
        equiv_coords (bool):
            If ``True``, returns a list of equivalent fractional coords in the
            primitive cell. If ``False``, returns the first equivalent
            fractional coordinates in the list, sorted using
            ``_frac_coords_sort_func``. Default: ``True``.
        return_symprec_and_dist_tol_factor (bool):
            If ``True``, returns the final symmetry precision and distance
            tolerance factor used for the equivalent site generation (see
            ``symprec`` and ``dist_tol_factor`` argument descriptions). Default
            is ``False``.
        fixed_symprec_and_dist_tol_factor (bool):
            If ``True``, uses the provided ``symprec`` and ``dist_tol_factor``
            values without any automatic adjustments (see ``symprec`` and
            ``dist_tol_factor`` argument descriptions). Default is ``False``.
        verbose (bool):
            If ``True``, prints information on the trialled ``symprec`` and
            ``dist_tol_factor`` values, and the identified equivalent sites.
            Default is ``False``.

    Returns:
        list[np.ndarray] | np.ndarray | tuple[list[np.ndarray] | np.ndarray, float, float]:
            List of equivalent fractional coordinates in the primitive cell, or
            the first equivalent fractional coordinate in the list (sorted
            using ``_frac_coords_sort_func``), depending on the value of
            ``equiv_coords``. If ``return_symprec_and_dist_tol_factor`` is
            ``True``, also returns the final ``symprec`` and
            ``dist_tol_factor`` used for the equivalent site generation.
    """
    trial_symprecs = _TRIAL_SYMPREC_DIST_TOL_FACTORS * symprec
    trial_dist_tol_factors = _TRIAL_SYMPREC_DIST_TOL_FACTORS * dist_tol_factor
    for trial_dist_tol_factor, trial_symprec in product(trial_dist_tol_factors, trial_symprecs):
        # sometimes we can have edge cases where slight numerical differences cause issues with
        # dist_tol/symprec choices, and then primitive cell determination as a result, so scan over
        # some values if necessary. Here we scan over symprec values first (following the approach in
        # ``get_all_equiv_sites``), then dist_tol values -- this approach was found to be best from testing
        equiv_sites_output = get_all_equiv_sites(
            frac_coords,
            supercell,
            symprec=trial_symprec,
            dist_tol_factor=trial_dist_tol_factor,
            return_symprec_and_dist_tol_factor=True,
            fixed_symprec_and_dist_tol_factor=fixed_symprec_and_dist_tol_factor,
            verbose=verbose,
        )
        assert isinstance(equiv_sites_output, tuple)  # return_symprec_and_dist_tol_factor = True
        unique_sites, adjusted_trial_symprec, adjusted_trial_dist_tol_factor = equiv_sites_output
        supercell_with_all_X = _get_struct_with_all_X(supercell, unique_sites)
        prim_with_all_X = get_primitive_structure(
            supercell_with_all_X, ignored_species=["X"], symprec=adjusted_trial_symprec
        )

        with warnings.catch_warnings():
            warnings.filterwarnings("ignore", message="No mapping")
            rotated_struct, matrix = _rotate_and_get_supercell_matrix(
                prim_with_all_X,
                primitive,
                ltol=adjusted_trial_symprec,
                atol=100 * adjusted_trial_symprec,  # default is 1
            )
        if fixed_symprec_and_dist_tol_factor:
            break  # just take first attempt

        if rotated_struct is not None:
            symprec = adjusted_trial_symprec
            dist_tol_factor = adjusted_trial_dist_tol_factor
            if verbose:
                print(
                    f"Succeeded folding to primitive cell of equivalent supercell sites, with symprec = "
                    f"{symprec}, dist_tol_factor = {dist_tol_factor}."
                )
            break

        if verbose:
            print(
                f"Failed folding to primitive cell of equivalent supercell sites, with symprec = "
                f"{symprec}, dist_tol_factor = {dist_tol_factor}."
            )

    if rotated_struct is None:
        warnings.warn(
            "Could not find a mapping between the primitive and supercell structures! You may need to "
            "tune the symprec/dist_tol parameters for this system."
        )
        return None

    dist_tol = symprec * dist_tol_factor
    primitive_with_all_X = rotated_struct * matrix
    orig_summed_dist = summed_dist(primitive, primitive_with_all_X, ignored_species=["X"])
    if orig_summed_dist != 0:
        # may have different primitive cell definitions, try re-orienting
        from doped.utils.supercells import min_dist

        orig_min_dist = min_dist(primitive_with_all_X, ignored_species=["X"])
        reoriented_primitive_with_all_X = orient_s2_like_s1(
            primitive,
            primitive_with_all_X,
            primitive_cell=False,
            ignored_species=["X"],
            comparator=ElementComparator(),
        )
        new_min_dist = min_dist(reoriented_primitive_with_all_X, ignored_species=["X"])
        new_summed_dist = summed_dist(primitive, reoriented_primitive_with_all_X, ignored_species=["X"])
        if (
            abs(new_summed_dist - orig_summed_dist) > abs(orig_min_dist - new_min_dist)
            and abs(orig_min_dist - new_min_dist) < dist_tol * 2
        ):  # only take re-oriented cell if it improves RMS diff and doesn't significantly change min_dist
            primitive_with_all_X = reoriented_primitive_with_all_X
            dist_tol = max(dist_tol, abs(orig_min_dist - new_min_dist))
            dist_tol_factor = dist_tol / symprec

    # now re-apply ``get_all_equiv_sites`` to each site primitive cell X site, to account for possible
    # periodicity-breaking in the supercell, which would then only give a subset of the actual equivalent
    # sites in the primitive cell:
    if verbose:
        print("Regenerating equivalent sites in primitive cell...")
    all_equiv_prim_frac_coords = cluster_sites_by_dist_tol(
        [
            equiv_frac_coords
            for site in primitive_with_all_X.sites
            if site.specie.symbol == "X"
            for equiv_frac_coords in get_all_equiv_sites(
                site.frac_coords,
                primitive,
                just_frac_coords=True,
                symprec=symprec,
                dist_tol_factor=dist_tol_factor,
                fixed_symprec_and_dist_tol_factor=fixed_symprec_and_dist_tol_factor,
                verbose=verbose,
            )
        ],
        primitive.lattice,
        dist_tol=dist_tol,
    )

    prim_coord_list = sorted(
        [
            _vectorized_custom_round(np.mod(_vectorized_custom_round(frac_coords), 1))
            for frac_coords in all_equiv_prim_frac_coords
        ],
        key=_frac_coords_sort_func,
    )

    if return_symprec_and_dist_tol_factor:
        return (prim_coord_list if equiv_coords else prim_coord_list[0]), symprec, dist_tol_factor
    return prim_coord_list if equiv_coords else prim_coord_list[0]




[docs]
def are_equivalent_lattices(
    lattice_1: Lattice | Structure,
    lattice_2: Lattice | Structure,
    ltol: float = 5e-3,
    atol: float = 1,
) -> bool:
    """
    Check if two lattices are (symmetry-)equivalent, allowing for different
    cell sizes.

    Args:
        lattice_1 (|Lattice| | |Structure|):
            The first lattice to check for equivalence.
        lattice_2 (|Lattice| | |Structure|):
            The second lattice to check for equivalence.
        ltol (float):
            Fractional tolerance for matching lattice vector lengths.
            Defaults to 5e-3 (i.e. 0.5% tolerance).
        atol (float):
            Tolerance for matching angles. Defaults to 1 degree.

    Returns:
        bool:
            ``True`` if the two lattices are (symmetry-)equivalent, ``False``
            otherwise.
    """
    lattice_1 = lattice_1 if isinstance(lattice_1, Lattice) else lattice_1.lattice
    lattice_2 = lattice_2 if isinstance(lattice_2, Lattice) else lattice_2.lattice
    return lattice_1.find_mapping(lattice_2, ltol=ltol, atol=atol, skip_rotation_matrix=True) is not None



def _rotate_and_get_supercell_matrix(
    prim_struct: Structure, target_struct: Structure, ltol: float = 1e-5, atol: float = 1
) -> tuple[Structure, np.ndarray] | tuple[None, None]:
    """
    Rotates the input ``prim_struct`` to match the ``target_struct``
    orientation, and returns the supercell matrix to convert from the rotated
    ``prim_struct`` to the ``target_struct``.

    Returns ``(None, None)`` if no mapping is found.

    Args:
        prim_struct (|Structure|):
            The primitive structure.
        target_struct (|Structure|):
            The target structure to match.
        ltol (float):
            Length tolerance for matching the lattice vectors (default: 1e-5).
        atol (float):
            Angle tolerance for matching the angles between the lattice vectors
            (default: 1).

    Returns:
        tuple[Structure, np.ndarray]:
            The rotated primitive structure and the supercell matrix to convert
            from the rotated primitive structure to the target structure.
    """
    possible_mappings = list(
        prim_struct.lattice.find_all_mappings(target_struct.lattice, ltol=ltol, atol=atol)
    )
    if not possible_mappings:
        warnings.warn("No mapping between the primitive and target structures found!")
        return None, None

    mapping = next(
        iter(  # get possible mappings, then sort by R*S, S, R, then return first
            sorted(
                possible_mappings,
                key=lambda x: (
                    _lattice_matrix_sort_func(np.dot(x[1].T, x[2])),
                    _lattice_matrix_sort_func(x[2]),
                    _lattice_matrix_sort_func(x[1]),
                ),
            )
        )
    )

    rotation_matrix = mapping[1]
    if np.allclose(rotation_matrix, -1 * np.eye(3)):
        # pymatgen sometimes gives a rotation matrix of -1 * identity matrix, which is
        # equivalent to no rotation. Just use the identity matrix instead.
        rotation_matrix = np.eye(3)
        supercell_matrix = -1 * mapping[2]
    else:
        supercell_matrix = mapping[2]

    rotation_symm_op = SymmOp.from_rotation_and_translation(
        rotation_matrix=rotation_matrix.T
    )  # Transpose = inverse of rotation matrices (orthogonal matrices), better numerical stability
    output_prim_struct = apply_symm_op_to_struct(rotation_symm_op, prim_struct, rotate_lattice=True)
    return _round_struct_coords(output_prim_struct), supercell_matrix



[docs]
def translate_structure(
    structure: Structure, vector: np.ndarray, frac_coords: bool = True, to_unit_cell: bool = True
) -> Structure:
    """
    Translate a structure and its sites by a given vector (**not in place**).

    Args:
        structure: ``pymatgen`` |Structure| object.
        vector: Translation vector, fractional or Cartesian.
        frac_coords:
            Whether the input vector is in fractional coordinates.
            (Default: True)
        to_unit_cell:
            Whether to translate the sites to the unit cell.
            (Default: True)

    Returns:
        ``pymatgen`` |Structure| object with translated sites.
    """
    translated_structure = structure.copy()
    return translated_structure.translate_sites(
        indices=list(range(len(translated_structure))),
        vector=vector,
        to_unit_cell=to_unit_cell,
        frac_coords=frac_coords,
    )



def _get_supercell_matrix_and_possibly_redefine_prim(
    prim_struct, target_struct, sga: SpacegroupAnalyzer | None = None, symprec=0.01
):
    """
    Determines the supercell transformation matrix to convert from the
    primitive structure to the target structure.

    The supercell matrix is defined to be T in ``T*P = S`` where P and S are
    the primitive and supercell lattice matrices respectively. Equivalently,
    multiplying ``prim_struct * T`` will give the target_struct. In
    ``pymatgen``, this requires the output transformation matrix to be integer.

    First tries to determine a simple (integer) transformation matrix with no
    basis set rotation required. If that fails, then defaults to using
    ``_rotate_and_get_supercell_matrix``. Searches over various possible
    primitive cell definitions from spglib.

    Args:
        prim_struct: ``pymatgen`` |Structure| object of the primitive cell.
        target_struct: ``pymatgen`` |Structure| object of the target cell.
        sga:
            ``SpacegroupAnalyzer`` object of the primitive cell. If ``None``,
            will be computed from ``prim_struct``.
        symprec:
            Symmetry precision for ``SpacegroupAnalyzer``, if being generated.

    Returns:
        prim_struct:
            Primitive structure, possibly rotated/redefined.
        supercell_matrix:
            Supercell transformation matrix to convert from the primitive
            structure to the target structure.
    """

    def _get_supercell_matrix_and_possibly_rotate_prim(prim_struct, target_struct):
        try:
            # supercell transform matrix is T in `T*P = S` (P = prim, S = super), so `T = S*P^-1`:
            transformation_matrix = np.dot(
                target_struct.lattice.matrix, np.linalg.inv(prim_struct.lattice.matrix)
            )
            if not np.allclose(np.rint(transformation_matrix), transformation_matrix, atol=1e-3):
                raise ValueError  # if non-integer transformation matrix

            return prim_struct, np.rint(transformation_matrix)

        except ValueError:  # if non-integer transformation matrix
            attempt_prim_struct, attempt_transformation_matrix = _rotate_and_get_supercell_matrix(
                prim_struct,
                target_struct,
                ltol=symprec,
                atol=100 * symprec,
            )
            if attempt_prim_struct:  # otherwise failed, stick with original T matrix
                prim_struct = attempt_prim_struct
                transformation_matrix = attempt_transformation_matrix

        if np.allclose(np.rint(transformation_matrix), transformation_matrix, atol=1e-3):
            return prim_struct, np.rint(transformation_matrix)

        return prim_struct, transformation_matrix

    summed_dists_w_candidate_prim_structs_and_T_matrices = []
    # Could also apply possible origin shifts to other structs (refined, find_primitive) as well,
    # if we find any structures for which this code still fails
    candidate_prim_structs = [
        *_get_candidate_prim_structs(prim_struct, symprec=symprec),
        *_get_candidate_prim_structs(target_struct, symprec=symprec),
    ]

    for possible_prim_struct in candidate_prim_structs:
        new_prim_struct, transformation_matrix = _get_supercell_matrix_and_possibly_rotate_prim(
            possible_prim_struct, target_struct
        )
        if not np.allclose(
            np.rint(transformation_matrix), transformation_matrix, atol=1e-3
        ) or not np.allclose(
            (new_prim_struct * transformation_matrix).lattice.matrix,
            target_struct.lattice.matrix,
            atol=1e-3,
        ):
            # not integer or doesn't exactly match bulk supercell, so bad transformation matrix, skip
            continue
        new_prim_struct = Structure.from_sites([site.to_unit_cell() for site in new_prim_struct])
        summed_dist_to_target = summed_dist(
            Structure.from_sites(
                [
                    site.to_unit_cell()
                    for site in (new_prim_struct * transformation_matrix).get_sorted_structure()
                ]
            ),
            target_struct,
        )
        summed_dists_w_candidate_prim_structs_and_T_matrices.append(
            (summed_dist_to_target, new_prim_struct, transformation_matrix)
        )

    closest_match = sorted(  # sort to get ideal primitive cell definition
        summed_dists_w_candidate_prim_structs_and_T_matrices,
        key=lambda x: (
            round(x[0], 3),
            _lattice_matrix_sort_func(x[1].lattice.matrix),
            _lattice_matrix_sort_func(x[2]),
            _struct_sort_func(x[1]),
        ),
    )[0]
    if closest_match[0] > 0.1:  # no perfect match has been found. Warn user and return the closest:
        warnings.warn(
            f"Found the transformation matrix from the primitive cell lattice to the supplied supercell, "
            f"but could not determine the transformation to directly match the atomic coordinates ("
            f"infinite possible symmetry-equivalent coordinate definitions). Closest match has RMS "
            f"distance of {closest_match[0]:.3f} Å.\n"
            f"The bulk and defect supercells generated will be equivalent to the input supercell, "
            f"but with a different choice of atomic coordinates (e.g. [0.1, 0.1, 0.1] instead of [0.9, "
            f"0.9, 0.9]). You should make sure to do the bulk supercell calculation with this "
            f"doped-generated supercell (DefectsGenerator.bulk_supercell, which is output to the `Bulk` "
            f"folders with the file generation functions), so that the coordinates match those of the "
            f"defect supercells (this matters when computing finite-size corrections)."
        )
    # Note: Could always just get the transformation of the generated supercell to the input supercell, and
    # then apply this transformation to each generated bulk/defect supercell at the end of defect
    # generation, but means that self.primitive_structure * self.supercell_matrix is no longer
    # guaranteed to match self.bulk_supercell... Not the biggest deal though
    # Likely way more work than worth
    return closest_match[1:]


def _get_candidate_prim_structs(structure, **kwargs):
    sga = get_sga(structure, **kwargs)

    pmg_prim_struct = structure.get_primitive_structure(tolerance=kwargs.get("symprec", 0.01))
    candidate_prim_structs = (
        [structure, pmg_prim_struct] if len(structure) == len(pmg_prim_struct) else [pmg_prim_struct]
    )

    for _i in range(4):
        struct = sga.get_primitive_standard_structure()
        candidate_prim_structs.append(struct)

        spglib_dataset = sga.get_symmetry_dataset()
        if not np.allclose(spglib_dataset.origin_shift, 0):
            candidate_prim_structs.append(translate_structure(struct, spglib_dataset.origin_shift))

        sga = get_sga(struct, sga._symprec)  # use same symprec

    candidate_prim_structs.append(sga.find_primitive())
    for candidate_conv_struct in [sga.get_refined_structure(), sga.get_conventional_standard_structure()]:
        if len(candidate_conv_struct) == len(pmg_prim_struct):
            # only also try conventional if equivalent to the primitive cell
            candidate_prim_structs = [candidate_conv_struct, *candidate_prim_structs]

    # sometimes Structure.get_primitive_structure() can fail to identify the primitive structure, returning
    # the same input structure, so if the number of atoms differs in different candidate primitive
    # structures, then just take those with the minimum number of atoms:
    return [
        candidate_prim_struct
        for candidate_prim_struct in candidate_prim_structs
        if len(candidate_prim_struct) == min(len(i) for i in candidate_prim_structs)
    ]



[docs]
def get_wyckoff(
    frac_coords: ArrayLike,
    struct: Structure,
    equiv_sites: bool = False,
    symprec: float = 0.01,
    **kwargs,
) -> str | tuple:
    r"""
    Get the Wyckoff label of the input fractional coordinates in the input
    structure. If the symmetry operations of the structure have already been
    computed, these can be input as a list to speed up the calculation.

    Args:
        frac_coords (ArrayLike):
            Fractional coordinates of the site to get the Wyckoff label of.
        struct (|Structure|):
            |Structure| for which ``frac_coords`` corresponds to.
        equiv_sites (bool):
            If ``True``, returns a tuple of (Wyckoff label, list of equivalent
            sites). Default is ``False``.
        symprec (float):
            Symmetry precision to use for determining symmetry operations.
            Default is 0.01. If ``fixed_symprec_and_dist_tol_factor`` is
            ``False`` (default), this value will be automatically adjusted (up
            to 10x, down to 0.1x) until the identified equivalent sites from
            ``spglib`` have consistent point group symmetries. Setting
            ``verbose`` to ``True`` will print information on the trialled
            ``symprec`` (and ``dist_tol_factor`` values).
        **kwargs:
            Additional keyword arguments to pass to ``get_all_equiv_sites``,
            such as ``dist_tol_factor``, ``fixed_symprec_and_dist_tol_factor``,
            and ``verbose``.

    Returns:
        str | tuple:
            The Wyckoff label of the input fractional coordinates in the
            structure. If ``equiv_sites`` is ``True``, also returns a list of
            equivalent sites in the structure.
    """
    symm_dataset, unique_sites = _get_symm_dataset_of_struct_with_all_equiv_sites(
        frac_coords,
        struct,
        symprec=symprec,
        return_symprec_and_dist_tol_factor=False,
        **kwargs,
    )
    conv_cell_factor = len(symm_dataset.std_positions) / len(symm_dataset.wyckoffs)
    multiplicity = int(conv_cell_factor * len(unique_sites))
    wyckoff_label = f"{multiplicity}{symm_dataset.wyckoffs[-1]}"

    return (wyckoff_label, unique_sites) if equiv_sites else wyckoff_label



def _struct_sort_func(struct: Structure | np.ndarray) -> tuple:
    """
    Sort by the lattice matrix sorting function, then by (minus) the number of
    high-symmetry coordinates (x=y=z, then 2 equal coordinates), then by the
    sum of all fractional coordinates, then by the magnitudes of high-symmetry
    coordinates (x=y=z, then 2 equal coordinates), then by the summed magnitude
    of all x coordinates, then y coordinates, then z coordinates.

    Args:
        struct:
            ``pymatgen`` |Structure| object, or an array of fractional
            coordinates of sites in the structure (in which case the lattice
            matrix metric is skipped).

    Returns:
        tuple: Tuple of sorting criteria values.
    """
    if isinstance(struct, Structure):
        struct_for_sorting = _round_struct_coords(struct, to_unit_cell=True)
        lattice_metric = _lattice_matrix_sort_func(struct_for_sorting.lattice.matrix)
        frac_coords = struct_for_sorting.frac_coords
    else:
        lattice_metric = (False,)
        frac_coords = struct

    # get summed magnitudes of x=y=z coords:
    xyz_matching_coords = frac_coords[  # Find the coordinates where x = y = z:
        (frac_coords[:, 0] == frac_coords[:, 1]) & (frac_coords[:, 1] == frac_coords[:, 2])
    ]
    xyz_sum_magnitudes = np.sum(np.linalg.norm(xyz_matching_coords, axis=1))

    # get summed magnitudes of x=y / y=z / x=z coords:
    xy_matching_coords = frac_coords[
        (frac_coords[:, 0] == frac_coords[:, 1])
        | (frac_coords[:, 1] == frac_coords[:, 2])
        | (frac_coords[:, 0] == frac_coords[:, 2])
    ]
    xy_sum_magnitudes = np.sum(np.linalg.norm(xy_matching_coords, axis=1))

    return (
        *lattice_metric,
        -len(xyz_matching_coords),
        -len(xy_matching_coords),
        round(np.sum(frac_coords), 2),
        round(xyz_sum_magnitudes, 2),
        round(xy_sum_magnitudes, 2),
        round(np.sum(frac_coords[:, 0]), 2),
        round(np.sum(frac_coords[:, 1]), 2),
        round(np.sum(frac_coords[:, 2]), 2),
    )


def _lattice_matrix_sort_func(lattice_matrix: np.ndarray) -> tuple:
    """
    Sorting function to apply on an iterable of lattice matrices.

    Matrices are sorted by:

    - lattice_matrix is diagonal
    - matrix symmetry (around diagonal)
    - maximum sum of diagonal element magnitudes.
    - minimum number of negative elements
    - maximum number of x, y, z that are equal
    - maximum number of abs(x), abs(y), abs(z) that are equal
    - a, b, c magnitudes (favouring c >= b >= a)

    Args:
        lattice_matrix (np.ndarray): Lattice matrix to sort.

    Returns:
        tuple: Tuple of sorting criteria values.
    """

    def is_symmetric(matrix: np.ndarray, tol: float = 1e-3) -> bool:
        iu = np.triu_indices_from(matrix, k=1)  # indices of upper triangle of matrix
        return bool(np.all(np.abs(matrix[iu] - matrix.T[iu]) <= tol))

    is_diagonal = np.all(np.abs(lattice_matrix[~np.eye(3, dtype=bool)]) < 1e-3)
    symmetric = is_diagonal or is_symmetric(lattice_matrix)
    num_negs = np.sum(lattice_matrix < 0)
    diag_sum = np.round(np.sum(np.abs(np.diag(lattice_matrix))), 1)
    flat_matrix = lattice_matrix.ravel()
    _unique_vals, counts = np.unique(flat_matrix, return_counts=True)
    num_equals = np.sum(counts * (counts + 1) // 2)
    _abs_vals, abs_counts = np.unique(np.abs(flat_matrix), return_counts=True)
    num_abs_equals = np.sum(abs_counts * (abs_counts + 1) // 2)
    a, b, c = np.linalg.norm(lattice_matrix, axis=1)

    return (
        not is_diagonal,
        not symmetric,
        -diag_sum,
        num_negs,
        -num_equals,
        -num_abs_equals,
        -round(c, 2),
        -round(b, 2),
        -round(a, 2),
    )



[docs]
def get_clean_structure(
    structure: Structure, return_T: bool = False, dist_precision: float = 0.001, niggli_reduce: bool = True
) -> Structure | tuple[Structure, np.ndarray]:
    """
    Get a 'clean' version of the input `structure` by searching over equivalent
    cells, and finding the most optimal according to
    ``_lattice_matrix_sort_func`` (most symmetric, with mostly positive
    diagonals and c >= b >= a).

    Args:
        structure (|Structure|): |Structure| object.
        return_T (bool):
            Whether to return the transformation matrix from the original
            structure lattice to the new structure lattice (T * Orig = New).
            (Default = False)
        dist_precision (float):
            The desired distance precision in Å for rounding of lattice
            parameters and fractional coordinates. (Default: 0.001)
        niggli_reduce (bool):
            Whether to Niggli reduce the lattice before searching for the
            optimal lattice matrix. If this is set to ``False``, we also skip
            the search for the best positive determinant lattice matrix.
            (Default: True)

    Returns:
        Structure | tuple[Structure, np.ndarray]:
            The 'clean' version of the input structure, or a tuple of the
            'clean' structure and the transformation matrix from the original
            structure lattice to the new structure lattice (T * Orig = New).
    """
    lattice = structure.lattice
    if np.all(lattice.matrix <= 0):
        lattice = Lattice(lattice.matrix * -1)
    possible_lattice_matrices = [
        lattice.matrix,
    ]

    for _ in range(4):
        lattice = lattice.get_niggli_reduced_lattice() if niggli_reduce else lattice

        # want to maximise the number of non-negative diagonals, and also have a positive determinant
        # can multiply two rows by -1 to get a positive determinant:
        possible_lattice_matrices.append(lattice.matrix)
        for i in range(3):
            for j in range(i + 1, 3):
                new_lattice_matrix = lattice.matrix.copy()
                new_lattice_matrix[i] = new_lattice_matrix[i] * -1
                new_lattice_matrix[j] = new_lattice_matrix[j] * -1
                possible_lattice_matrices.append(new_lattice_matrix)

    possible_lattice_matrices.sort(key=_lattice_matrix_sort_func)
    new_lattice_matrix = possible_lattice_matrices[0]
    if np.all(new_lattice_matrix <= 0):
        new_lattice_matrix = new_lattice_matrix * -1

    new_structure = Structure(
        new_lattice_matrix,
        structure.species_and_occu,
        structure.cart_coords,
        coords_are_cartesian=True,
        to_unit_cell=True,
        site_properties=structure.site_properties,
        labels=structure.labels,
        charge=structure._charge,
    )
    new_structure = _round_struct_coords(new_structure, dist_precision=dist_precision, to_unit_cell=True)

    # sort structure to match a desired, deterministic format:
    new_structure = new_structure.get_sorted_structure(
        key=lambda x: (
            x.species.average_electroneg,
            x.species_string,
            _frac_coords_sort_func(x.frac_coords),
        )
    )
    if niggli_reduce:
        new_structure = _get_best_pos_det_structure(new_structure)  # ensure positive determinant

    if return_T:
        # T * Orig = New; T = New * Orig^-1; Orig = T^-1 * New
        transformation_matrix = np.matmul(
            new_structure.lattice.matrix, np.linalg.inv(structure.lattice.matrix)
        )
        if not np.allclose(transformation_matrix, np.rint(transformation_matrix), atol=1e-5):
            raise ValueError(
                "Transformation matrix for clean/reduced structure could not be found! If you are seeing "
                "this bug, please notify the `doped` developers"
            )

        return (new_structure, np.rint(transformation_matrix))

    return new_structure



def _get_best_pos_det_structure(structure: Structure):
    """
    If the input structure has a negative determinant (corresponding to a left-
    hand coordinate system), then find the best possible re-definition of the
    lattice vectors which gives a positive determinant, according to
    ``_struct_sort_func``.

    This is to avoid an apparent VASP bug with negative triple products of the
    lattice vectors -- not sure if this is only in old versions?
    """
    if np.linalg.det(structure.lattice.matrix) < 0:
        swap_combo_score_dict = {}
        for swap_combo in permutations([0, 1, 2]):
            candidate_structure = swap_axes(structure, swap_combo)
            if np.linalg.det(candidate_structure.lattice.matrix) > 0:
                swap_combo_score_dict[swap_combo] = _struct_sort_func(candidate_structure)

        best_swap_combo = min(swap_combo_score_dict, key=lambda x: swap_combo_score_dict[x])
        structure = swap_axes(structure, best_swap_combo)

    return structure



[docs]
def get_primitive_structure(
    structure: Structure,
    ignored_species: list | None = None,
    clean: bool = True,
    return_all: bool = False,
    **kwargs,
):
    """
    Get a consistent/deterministic primitive structure from a ``pymatgen``
    |Structure|.

    For some materials (e.g. zinc blende), there are multiple equivalent
    primitive cells (e.g. Cd (0,0,0) & Te (0.25,0.25,0.25); Cd (0,0,0) & Te
    (0.75,0.75,0.75) for F-43m CdTe), so for reproducibility and in line with
    most structure conventions/definitions, take the one with the cleanest
    lattice and structure definition, according to ``_struct_sort_func``.

    If ``ignored_species`` is set, then the sorting function used to determine
    the ideal primitive structure will ignore sites with species in
    ``ignored_species``.

    Args:
        structure (|Structure|):
            |Structure| to get the corresponding primitive structure of.
        ignored_species (list | None):
            List of species to ignore when determining the ideal primitive
            structure. (Default: None)
        clean (bool):
            Whether to return a 'clean' version of the primitive structure,
            with the lattice matrix in a standardised form. (Default: True)
        return_all (bool):
            Whether to return all possible primitive structures tested, sorted
            by the sorting function. (Default: False)
        **kwargs:
            Additional keyword arguments to pass to the ``get_sga`` function
            (e.g. ``symprec`` etc).

    Returns:
        Structure | list[Structure]:
            The primitive structure of the input structure, or a list of all
            possible primitive structures tested, sorted by the sorting
            function.
    """
    # make inputs hashable, then call ``_cache_ready_get_primitive_structure``:
    cache_ready_ignored_species = tuple(ignored_species) if ignored_species is not None else None
    cache_ready_kwargs = tuple(kwargs.items()) if kwargs else None

    return _cache_ready_get_primitive_structure(
        structure,
        ignored_species=cache_ready_ignored_species,
        clean=clean,
        return_all=return_all,
        kwargs=cache_ready_kwargs,
    )



@lru_cache(maxsize=int(1e3))
def _cache_ready_get_primitive_structure(
    structure: Structure,
    ignored_species: tuple | None = None,
    clean: bool = True,
    return_all: bool = False,
    kwargs: tuple | None = None,
):
    """
    ``get_primitive_structure`` code, with hashable input arguments for caching
    (using |Structure| hash function from ``doped.utils.efficiency``).
    """
    # clean structure site_properties (if mismatching ``None`` values present, can mess with primitive
    # structure determination) -- this can happen if e.g. a slab structure is input with "bulk_wyckoff"
    # etc site properties:
    for key, val in list(structure.site_properties.items()):
        if any(i is not None for i in val) and any(i is None for i in val):
            structure.site_properties.pop(key, None)
            for site in structure:
                site.properties.pop(key, None)

    kwargs_dict = dict(kwargs) if kwargs is not None else {}
    candidate_prim_structs = _get_candidate_prim_structs(structure, **kwargs_dict)

    if ignored_species is not None:
        pruned_possible_prim_structs = [
            Structure.from_sites([site for site in struct if site.specie.symbol not in ignored_species])
            for struct in candidate_prim_structs
        ]
    else:
        pruned_possible_prim_structs = candidate_prim_structs

    # sort and return indices:
    sorted_indices = sorted(
        range(len(pruned_possible_prim_structs)),
        key=lambda i: _struct_sort_func(pruned_possible_prim_structs[i]),
    )

    prim_structs = [
        _get_best_pos_det_structure(_round_struct_coords(candidate_prim_structs[i], to_unit_cell=True))
        for i in sorted_indices
    ]
    if clean:
        prim_structs = [get_clean_structure(struct) for struct in prim_structs]

    # copy() to help avoid mutability issues with cached outputs:
    return prim_structs.copy() if return_all else _get_best_pos_det_structure(prim_structs[0]).copy()



[docs]
def get_spglib_conv_structure(sga: SpacegroupAnalyzer) -> tuple[Structure, SpacegroupAnalyzer]:
    """
    Get a consistent/deterministic conventional structure from a
    ``SpacegroupAnalyzer`` object. Also returns the corresponding
    ``SpacegroupAnalyzer`` (for getting Wyckoff symbols corresponding to this
    conventional structure definition).

    For some materials (e.g. zinc blende), there are multiple equivalent
    primitive/conventional cells, so for reproducibility and in line with most
    structure conventions/definitions, take the one with the lowest summed norm
    of the fractional coordinates of the sites (i.e. favour Cd (0,0,0) and Te
    (0.25,0.25,0.25) over Cd (0,0,0) and Te (0.75,0.75,0.75) for F-43m CdTe;
    SGN 216).
    """
    possible_conv_structs_and_sgas = []
    for _i in range(3):
        struct = sga.get_conventional_standard_structure()
        possible_conv_structs_and_sgas.append((struct, sga))
        sga = get_sga(sga.get_primitive_standard_structure(), symprec=sga._symprec)

    possible_conv_structs_and_sgas = sorted(
        possible_conv_structs_and_sgas, key=lambda x: _struct_sort_func(x[0])
    )
    return (
        _round_struct_coords(possible_conv_structs_and_sgas[0][0], to_unit_cell=True),
        possible_conv_structs_and_sgas[0][1],
    )




[docs]
def get_BCS_conventional_structure(
    structure: Structure, pbar: tqdm | None = None, return_wyckoff_dict: bool = False
) -> tuple[Structure, list[int]] | tuple[Structure, list[int], dict[str, list[list[Expr]]]]:
    """
    Get the conventional crystal structure of the input structure, according to
    the Bilbao Crystallographic Server (BCS) definition.

    Also returns an array of the lattice vector swaps (used with ``swap_axes``)
    to convert from the ``spglib`` (``SpaceGroupAnalyzer``) conventional
    structure definition to the BCS definition.

    Args:
        structure (|Structure|):
            |Structure| for which to get the corresponding BCS conventional
            crystal structure.
        pbar (ProgressBar):
            ``tqdm`` progress bar object, to update progress. Default is
            ``None``.
        return_wyckoff_dict (bool):
            Whether to return the Wyckoff label dict (as
            ``{Wyckoff label: coordinates}``).

    Returns:
        tuple[Structure, np.ndarray] | tuple[Structure, np.ndarray, dict[str, np.ndarray]]:
            A tuple of the BCS conventional structure of the input structure,
            the lattice vector swapping array and, if ``return_wyckoff_dict``
            is ``True``, the Wyckoff label dict.
    """
    struc_wout_oxi = structure.copy()
    struc_wout_oxi.remove_oxidation_states()
    sga = get_sga(struc_wout_oxi)
    conventional_structure, conv_sga = get_spglib_conv_structure(sga)

    wyckoff_label_dict = get_wyckoff_dict_from_sgn(conv_sga.get_space_group_number())
    # determine cell orientation for Wyckoff site determination (needs to match the Bilbao
    # Crystallographic Server's convention, which can differ from spglib (pymatgen) in some cases)

    sga_wyckoffs = conv_sga.get_symmetrized_structure().wyckoff_symbols

    for trial_lattice_vec_swap_array in [  # 3C2 -> 6 possible combinations
        # ordered according to frequency of occurrence in the Materials Project
        [0, 1, 2],  # abc, ~95% of cases
        [0, 2, 1],  # acb
        [2, 1, 0],  # cba
        [1, 0, 2],  # bac
        [2, 0, 1],  # cab
        [1, 2, 0],  # bca
        None,  # no perfect match, default to original orientation
    ]:
        if trial_lattice_vec_swap_array is None:
            lattice_vec_swap_array = [0, 1, 2]
            break

        reoriented_conv_structure = swap_axes(conventional_structure, trial_lattice_vec_swap_array)
        if _compare_wyckoffs(
            sga_wyckoffs,
            reoriented_conv_structure,
            wyckoff_label_dict,
        ):
            lattice_vec_swap_array = trial_lattice_vec_swap_array
            break

        if pbar is not None:
            pbar.update(1 / 6 * 10)  # 45 up to 55% of progress bar in DefectsGenerator. This part can
            # take a little while for low-symmetry structures

    bcs_conv_structure = get_clean_structure(
        swap_axes(conventional_structure, lattice_vec_swap_array), niggli_reduce=False
    )
    assert isinstance(bcs_conv_structure, Structure)  # return_T = False

    if return_wyckoff_dict:
        return bcs_conv_structure, lattice_vec_swap_array, wyckoff_label_dict

    return bcs_conv_structure, lattice_vec_swap_array




[docs]
def get_conv_cell_site(defect_entry: DefectEntry) -> PeriodicSite | None:
    """
    Gets an equivalent site of the defect entry in the conventional structure
    of the host material. If the ``conventional_structure`` attribute is not
    defined for defect_entry, then it is generated using ``SpacegroupAnalyzer``
    and then reoriented to match the Bilbao Crystallographic Server's
    conventional structure definition.

    Args:
        defect_entry: |DefectEntry| object.

    Returns:
        PeriodicSite | None:
            The equivalent site of the defect entry in the conventional
            structure of the host material, or ``None`` if not found.
    """
    bulk_prim_structure = defect_entry.defect.structure.copy()
    bulk_prim_structure.remove_oxidation_states()  # adding oxidation states adds the
    # # deprecated 'properties' attribute with -> {"spin": None}, giving a deprecation warning

    prim_struct_with_X = bulk_prim_structure.copy()
    prim_struct_with_X.append("X", defect_entry.defect.site.frac_coords, coords_are_cartesian=False)

    sga = get_sga(bulk_prim_structure)
    # convert to match sga primitive structure first:
    sga_prim_struct = sga.get_primitive_standard_structure()
    prim_struct_with_X_like_sga_prim = orient_s2_like_s1(
        sga_prim_struct,
        prim_struct_with_X,
        primitive_cell=False,
        ignored_species=["X"],
        comparator=ElementComparator(),
    )
    if not prim_struct_with_X_like_sga_prim:
        warnings.warn(
            "The transformation from the DefectEntry primitive cell to the spglib primitive cell could "
            "not be determined, and so the corresponding conventional cell site cannot be identified."
        )
        return None

    conv_struct_with_X = prim_struct_with_X_like_sga_prim * np.linalg.inv(
        sga.get_conventional_to_primitive_transformation_matrix()
    )

    # convert to match defect_entry conventional structure definition
    assert defect_entry.conventional_structure is not None
    conv_struct_with_X_like_defect_entry_conv = orient_s2_like_s1(
        defect_entry.conventional_structure,
        conv_struct_with_X,
        primitive_cell=False,
        ignored_species=["X"],
        comparator=ElementComparator(),
    )

    conv_cell_site = next(
        site for site in conv_struct_with_X_like_defect_entry_conv.sites if site.specie.symbol == "X"
    )
    # site choice doesn't matter so much here, as we later get the equivalent coordinates using the
    # Wyckoff dict and choose the conventional site based on that anyway (in the ``DefectsGenerator``
    # initialisation)
    conv_cell_site.to_unit_cell()
    conv_cell_site.frac_coords = _vectorized_custom_round(conv_cell_site.frac_coords)

    return conv_cell_site




[docs]
def swap_axes(structure: Structure, axes: list[int] | tuple[int, ...]) -> Structure:
    """
    Swap axes of the given structure.

    The new order of the axes is given by the axes parameter. For example,
    ``axes=(2, 1, 0)`` will swap the first and third axes.
    """
    transformation_matrix = [[0, 0, 0], [0, 0, 0], [0, 0, 0]]

    for i, axis in enumerate(axes):
        transformation_matrix[i][axis] = 1

    transformation = SupercellTransformation(transformation_matrix)

    return transformation.apply_transformation(structure)




[docs]
def get_wyckoff_dict_from_sgn(sgn: int) -> dict[str, list[list[Expr]]]:
    """
    Get dictionary of ``{Wyckoff label: coordinates}`` for a given space group
    number.

    The database used here for Wyckoff analysis (``wyckpos.dat``) was obtained
    from code written by JaeHwan Shim @schinavro (ORCID: 0000-0001-7575-4788)
    (https://gitlab.com/ase/ase/-/merge_requests/1035) based on the tabulated
    datasets in https://github.com/xtalopt/randSpg (also found at
    https://github.com/spglib/spglib/blob/develop/database/Wyckoff.csv).
    By default, doped uses the Wyckoff functionality of ``spglib`` (along with
    symmetry operations in pymatgen) when possible, however.

    Args:
        sgn (int):
            Space group number.

    Returns:
        dict[str, list[list[float]]]:
            Dictionary of Wyckoff labels and their corresponding coordinates.
    """
    datafile = _get_wyckoff_datafile()
    with open(datafile, encoding="utf-8") as f:
        wyckoff = _read_wyckoff_datafile(sgn, f)

    wyckoff_label_coords_dict = {}

    def _coord_string_to_array(coord_string):
        # Split string into substrings, parse each as a sympy expression,
        # then convert to list of sympy expressions
        return np.array([cached_simplify(x.replace("2x", "2*x")) for x in coord_string.split(",")])

    for element in wyckoff["letters"]:
        label = wyckoff[element]["multiplicity"] + element  # e.g. 4d
        wyckoff_coords = [_coord_string_to_array(coords) for coords in wyckoff[element]["coordinates"]]
        wyckoff_label_coords_dict[label] = wyckoff_coords

        equivalent_sites = [
            _coord_string_to_array(coords) for coords in wyckoff.get("equivalent_sites", [])
        ]

        new_coords = []  # new list for equivalent coordinates

        for coord_array in wyckoff_coords:
            for equivalent_site in equivalent_sites:
                # add coord_array and equivalent_site element-wise
                equiv_coord_array = coord_array.copy()
                equiv_coord_array = equiv_coord_array + np.array(equivalent_site)
                new_coords.append(equiv_coord_array)

        # add new_coords to wyckoff_label_coords:
        wyckoff_label_coords_dict[label].extend(new_coords)
    return wyckoff_label_coords_dict




[docs]
def get_wyckoff_label_and_equiv_coord_list(
    defect_entry: DefectEntry | None = None,
    conv_cell_site: PeriodicSite | None = None,
    sgn: int | None = None,
    wyckoff_dict: dict | None = None,
) -> tuple[str, list[list[float]]]:
    """
    Return the Wyckoff label and list of equivalent fractional coordinates
    within the conventional cell for the input defect_entry or conv_cell_site
    (whichever is provided, defaults to defect_entry if both), given a
    dictionary of Wyckoff labels and coordinates (``wyckoff_dict``).

    If ``wyckoff_dict`` is not provided, it is generated from the spacegroup
    number (sgn) using ``get_wyckoff_dict_from_sgn(sgn)``. If ``sgn`` is not
    provided, it is obtained from the bulk structure of the ``defect_entry`` if
    provided.
    """
    if wyckoff_dict is None:
        if sgn is None:
            if defect_entry is None:
                raise ValueError(
                    "If inputting `conv_cell_site` and not `defect_entry`, either `sgn` or `wyckoff_dict` "
                    "must be provided."
                )
            # get sgn from primitive unit cell of bulk structure:
            sgn = get_sga(defect_entry.defect.structure).get_space_group_number()

        wyckoff_dict = get_wyckoff_dict_from_sgn(sgn)

    def _compare_arrays(coord_list, coord_array):
        """
        Compare a list of arrays of sympy expressions (``coord_list``) with an
        array of coordinates (``coord_array``).

        Returns the matching array from the list.
        """
        variable_dicts = [{}]  # list of dicts for x,y,z

        for sympy_array in coord_list:
            match, variable_dict = evaluate_expression_and_update_dict(
                sympy_array, coord_array, variable_dicts
            )

            if match:
                # return coord list with sympy expressions subbed with variable_dict:
                return [
                    np.array(
                        [
                            np.mod(float(cached_simplify(sympy_expr).subs(variable_dict)), 1)
                            for sympy_expr in sympy_array
                        ]
                    )
                    for sympy_array in coord_list
                ]

        return None  # No match found

    # get match of coords in wyckoff_label_coords to defect site coords:
    def find_closest_match(defect_site, wyckoff_label_coords_dict):
        for label, coord_list in wyckoff_label_coords_dict.items():
            subbed_coord_list = _compare_arrays(coord_list, np.array(defect_site.frac_coords))
            if subbed_coord_list is not None:
                # convert coords in subbed_coord_list to unit cell, by rounding to 5 decimal places and
                # then modding by 1:
                subbed_coord_list = [
                    _vectorized_custom_round(np.mod(_vectorized_custom_round(coord_array, 5), 1))
                    for coord_array in subbed_coord_list
                ]
                return label, subbed_coord_list

        return None  # No match found

    def evaluate_expression(sympy_expr, coord, variable_dict):
        equation = Eq(sympy_expr, coord)
        variable = next(iter(sympy_expr.free_symbols))
        variable_dict[variable] = cached_solve(equation, variable)[0]

        return cached_simplify(sympy_expr).subs(variable_dict)

    def add_new_variable_dict(
        sympy_expr_prepend, sympy_expr, coord, current_variable_dict, variable_dicts
    ):
        new_sympy_expr = cached_simplify(sympy_expr_prepend + str(sympy_expr))
        new_dict = current_variable_dict.copy()
        evaluate_expression(new_sympy_expr, coord, new_dict)  # solve for new variable
        if new_dict not in variable_dicts:
            variable_dicts.append(new_dict)

    def evaluate_expression_and_update_dict(sympy_array, coord_array, variable_dicts):
        temp_dict = {}
        match = False

        for variable_dict in variable_dicts:
            temp_dict = variable_dict.copy()
            match = True

            # sort zipped arrays by number of variables in sympy expression:
            coord_array, sympy_array = zip(
                *sorted(zip(coord_array, sympy_array, strict=False), key=lambda x: len(x[1].free_symbols)),
                strict=False,
            )

            for coord, sympy_expr in zip(coord_array, sympy_array, strict=False):
                # Evaluate the expression with the current variable_dict
                expr_value = cached_simplify(sympy_expr).subs(temp_dict)

                # If the expression cannot be evaluated to a float
                # it means that there is a new variable in the expression
                try:
                    expr_value = np.mod(float(expr_value), 1)  # wrap to 0-1 (i.e. to unit cell)

                except TypeError:
                    # Assign the expression the value of the corresponding coordinate, and solve
                    # for the new variable
                    # first, special cases with two possible solutions due to PBC:
                    if sympy_expr == cached_simplify("-2*x"):
                        add_new_variable_dict("1+", sympy_expr, coord, temp_dict, variable_dicts)
                    elif sympy_expr == cached_simplify("2*x"):
                        add_new_variable_dict("-1+", sympy_expr, coord, temp_dict, variable_dicts)

                    expr_value = evaluate_expression(
                        sympy_expr, coord, temp_dict
                    )  # solve for new variable

                # Check if the evaluated expression matches the corresponding coordinate
                if not np.isclose(
                    np.mod(float(coord), 1),  # wrap to 0-1 (i.e. to unit cell)
                    np.mod(float(expr_value), 1),
                    atol=0.003,
                ) and not np.isclose(
                    np.mod(float(coord), 1) - 1,  # wrap to 0-1 (i.e. to unit cell)
                    np.mod(float(expr_value), 1),
                    atol=0.003,
                ):
                    match = False
                    break

            if match:
                break

        return match, temp_dict

    if defect_entry is not None:
        defect_entry.defect.site.to_unit_cell()  # ensure wrapped to unit cell

        # convert defect site to conventional unit cell for Wyckoff label matching:
        conv_cell_site = get_conv_cell_site(defect_entry)

    return find_closest_match(conv_cell_site, wyckoff_dict)



def _compare_wyckoffs(wyckoff_symbols, conv_struct, wyckoff_dict):
    """
    Compare the Wyckoff labels of a conventional structure to a list of Wyckoff
    labels.
    """

    def _multiply_wyckoffs(wyckoff_labels, n=2):
        return [str(n * int(wyckoff[:-1])) + wyckoff[-1] for wyckoff in wyckoff_labels]

    wyckoff_symbol_lists = [_multiply_wyckoffs(wyckoff_symbols, n=n) for n in range(1, 5)]  # up to 4x
    doped_wyckoffs = []

    for site in conv_struct:
        wyckoff_label, _equiv_coords = get_wyckoff_label_and_equiv_coord_list(
            conv_cell_site=site, wyckoff_dict=wyckoff_dict
        )
        if all(
            # allow for sga conventional cell (and thus wyckoffs) being a multiple of BCS conventional cell
            wyckoff_label not in wyckoff_symbol_list
            for wyckoff_symbol_list in wyckoff_symbol_lists
        ) and all(
            # allow for BCS conv cell (and thus wyckoffs) being a multiple of sga conv cell (allow it fam)
            multiplied_wyckoff_symbol not in wyckoff_symbols
            for multiplied_wyckoff_symbol in [
                _multiply_wyckoffs([wyckoff_label], n=n)[0]
                for n in range(1, 5)  # up to 4x
            ]
        ):
            return False  # break on first non-match
        doped_wyckoffs.append(wyckoff_label)

    return any(
        # allow for sga conventional cell (and thus wyckoffs) being a multiple of BCS conventional cell
        set(i) == set(doped_wyckoffs)
        for i in wyckoff_symbol_lists
    ) or any(
        set(i) == set(wyckoff_symbols)
        for i in [
            # allow for BCS conv cell (and thus wyckoffs) being a multiple of sga conv cell (allow it fam)
            _multiply_wyckoffs(doped_wyckoffs, n=n)
            for n in range(1, 5)  # up to 4x
        ]
    )  # False if no complete match, True otherwise


def _read_wyckoff_datafile(spacegroup, f, setting=None):
    """
    Read the ``wyckpos.dat`` file of specific spacegroup and returns a
    dictionary with this information.
    """
    if isinstance(spacegroup, int):
        pass
    elif isinstance(spacegroup, str):
        spacegroup = " ".join(spacegroup.strip().split())
    else:
        raise ValueError("`spacegroup` must be of type int or str")

    line = _skip_to_spacegroup(f, spacegroup, setting)
    wyckoff_dict = {"letters": [], "multiplicity": [], "number_of_letters": 0}
    line_list = line.split()
    if line_list[0].isdigit():
        wyckoff_dict["spacegroup"] = int(line_list[0])
    else:
        spacegroup, wyckoff_dict["setting"] = line_list[0].split("-")
        wyckoff_dict["spacegroup"] = int(spacegroup)
    if len(line.split()) > 1:
        eq_sites = line.split("(")[1:]
        wyckoff_dict["equivalent_sites"] = ([eq[:-1] for eq in eq_sites])[1:]
        wyckoff_dict["equivalent_sites"][-1] = wyckoff_dict["equivalent_sites"][-1][:-1]

    while True:
        line = f.readline()
        if line == "\n":
            break
        letter, multiplicity = line.split()[:2]
        coordinates_raw = line.split()[-1].split("(")[1:]
        site_symmetry = "".join(line.split()[2:-1])
        wyckoff_dict["letters"].append(letter)
        wyckoff_dict["number_of_letters"] += 1
        wyckoff_dict["multiplicity"].append(int(multiplicity))
        coordinates = [coord[:-1] for coord in coordinates_raw]
        wyckoff_dict[letter] = {
            "multiplicity": multiplicity,
            "site_symmetry": site_symmetry,
            "coordinates": coordinates,
        }

    return wyckoff_dict


def _get_wyckoff_datafile():
    """
    Return default path to Wyckoff datafile.
    """
    return os.path.join(os.path.dirname(__file__), "wyckpos.dat")


def _skip_to_spacegroup(f, spacegroup, setting=None):
    """
    Read lines from f until a blank line is encountered.
    """
    name = str(spacegroup) if setting is None else f"{spacegroup!s}-{setting}"
    while True:
        line = f.readline()
        if not line:
            raise ValueError(
                f"Invalid spacegroup {spacegroup} with setting: {setting}. Not found in the Wyckoff "
                f"database!"
            )
        if line.startswith(name):
            break
    return line



[docs]
def point_symmetry_from_defect(
    defect: Defect,
    symprec: float = 0.01,
    **kwargs,
) -> str:
    """
    Get the defect site point symmetry from a |Defect| object.

    Note that this is intended only to be used for unrelaxed, as-generated
    |Defect| objects (rather than parsed defects).

    Args:
        defect (|Defect|): |Defect| object.
        symprec (float):
            Symmetry precision to use for determining symmetry operations and
            thus point symmetries. Default is 0.01. If
            ``fixed_symprec_and_dist_tol_factor`` is ``False`` (default), this
            value will be automatically adjusted (up to 10x, down to 0.1x)
            until the identified equivalent sites from ``spglib`` have
            consistent point group symmetries. Setting ``verbose`` to ``True``
            will print information on the trialled ``symprec`` (and
            ``dist_tol_factor`` values).
        **kwargs:
            Additional keyword arguments to pass to ``get_all_equiv_sites``,
            such as ``dist_tol_factor``, ``fixed_symprec_and_dist_tol_factor``,
            and ``verbose``.

    Returns:
        str: Defect point symmetry.
    """
    try:
        point_symm = point_symmetry_from_site(
            defect.site,
            defect.structure,
            symprec=symprec,
            **kwargs,
        )
        if point_symm is not None:
            return point_symm

        warnings.warn(
            "Defect point symmetry could not be determined from the standard approach. Falling back "
            "to supercell generation approach (which can be less efficient)."
        )
        raise ValueError("Defect point symmetry could not be determined from the standard approach.")

    except ValueError:  # symm_ops approach failed, just use diagonal defect supercell approach:
        defect_diagonal_supercell = defect.get_supercell_structure(
            sc_mat=np.array([[2, 0, 0], [0, 2, 0], [0, 0, 2]]),
            dummy_species="X",
        )  # create defect supercell, which is a diagonal expansion of the unit cell so that the defect
        # periodic image retains the unit cell symmetry, in order not to affect the point group symmetry
        sga = get_sga(defect_diagonal_supercell, symprec=symprec)
        return schoenflies_from_hermann(sga.get_point_group_symbol())




[docs]
def point_symmetry_from_defect_entry(
    defect_entry: DefectEntry,
    symprec: float | None = None,
    relaxed: bool = True,
    verbose: bool | None = None,
    return_periodicity_breaking: bool = False,
    attempt_periodicity_restoration: bool = True,
    **kwargs,
) -> str | tuple[str, bool]:
    r"""
    Get the defect site point symmetry from a |DefectEntry| object.

    Note: If ``relaxed = True`` (default), then this tries to use the
    ``defect_entry.defect_supercell`` to determine the site symmetry. This will
    thus give the `relaxed` defect point symmetry if this is a |DefectEntry|
    created from parsed defect calculations. However, it should be noted that
    this is not guaranteed to work in all cases; namely for non-diagonal
    supercell expansions, or sometimes for non-scalar supercell expansion
    matrices (e.g. a 2x1x2 expansion)(particularly with high-symmetry
    materials) which can mess up the periodicity of the cell. ``doped`` tries
    to automatically check if this is the case, and will warn you if so.

    This can also be checked by using this function on your doped `generated`
    defects:

    .. code-block:: python

        from doped.generation import get_defect_name_from_entry
        for defect_name, defect_entry in defect_gen.items():
            print(defect_name,
                  get_defect_name_from_entry(defect_entry, relaxed=False),
                  get_defect_name_from_entry(defect_entry), "\n")

    And if the point symmetries match in each case, then using this function on
    your parsed `relaxed` |DefectEntry| objects should correctly determine
    the final relaxed defect symmetry -- otherwise periodicity-breaking
    prevents this.

    If periodicity-breaking prevents auto-symmetry determination, you can
    manually determine the relaxed defect and bulk-site point symmetries,
    and/or orientational degeneracy, from visualising the structures (e.g.
    using VESTA)(can use |get_orientational_degeneracy| to obtain the
    corresponding orientational degeneracy factor for given defect/bulk-site
    point symmetries) and setting the corresponding values in the
    ``calculation_metadata['relaxed point symmetry']/['bulk site symmetry']``
    and/or ``degeneracy_factors['orientational degeneracy']`` attributes. Note
    that the bulk-site point symmetry corresponds to that of
    ``DefectEntry.defect``, or equivalently
    ``calculation_metadata["bulk_site"]/["unrelaxed_defect_structure"]``, which
    for vacancies/substitutions is the symmetry of the corresponding bulk site,
    while for interstitials it is the point symmetry of the `final relaxed`
    interstitial site when placed in the (unrelaxed) bulk structure. The
    degeneracy factor is used in the calculation of defect/carrier
    concentrations and Fermi level behaviour (discussion in
    https://doi.org/10.1039/D2FD00043A, https://doi.org/10.1039/D3CS00432E,
    https://doi.org/10.1038/s41578-025-00879-y...).

    Args:
        defect_entry (|DefectEntry|): |DefectEntry| object.
        symprec (float):
            Symmetry precision to use for determining symmetry operations and
            thus point symmetries with ``spglib``. Default is 0.01 for
            unrelaxed structures, 0.1 for relaxed (to account for residual
            structural noise, matching that used by the ``Materials Project``).
            You may want to adjust for your system (e.g. if there are very
            slight octahedral distortions etc.).
            If ``fixed_symprec_and_dist_tol_factor`` is ``False`` (default),
            this value will be automatically adjusted (up to 10x, down to 0.1x)
            until the identified equivalent sites from ``spglib`` have
            consistent point group symmetries. Setting ``verbose`` to ``True``
            will print information on the trialled ``symprec`` (and
            ``dist_tol_factor`` values).
        relaxed (bool):
            If ``False``, determines the site symmetry using the defect site
            `in the unrelaxed bulk supercell` (i.e. the bulk site symmetry),
            otherwise tries to determine the point symmetry of the relaxed
            defect in the defect supercell. Default is ``True``.
        verbose (bool):
            If ``None`` (default) or ``True``, prints a warning if the
            supercell is detected to break the crystal periodicity (and hence
            not be able to return a reliable `relaxed` point symmetry).
            ``True`` corresponds to higher verbosity, where information on
            trialled ``symprec`` and ``dist_tol_factor`` values in equivalent
            site generation is also printed. Default is ``None``.
        return_periodicity_breaking (bool):
            If ``True``, also returns a boolean specifying if the supercell has
            been detected to break the crystal periodicity (and hence not be
            able to return a reliable `relaxed` point symmetry) or not. Mainly
            for internal ``doped`` usage, and always ``False`` if ``relaxed``
            is ``False``. Default is ``False``.
        attempt_periodicity_restoration (bool):
            If ``True`` and periodicity-breaking is detected, will attempt to
            restore periodicity by stenciling the relaxed defect geometry into
            a supercell which retains periodicity, and then getting the point
            symmetry for that. Default is ``True``.
        **kwargs:
            Additional keyword arguments to pass to ``get_all_equiv_sites``,
            such as ``dist_tol_factor`` and
            ``fixed_symprec_and_dist_tol_factor``.

    Returns:
        str:
            Defect point symmetry (and if
            ``return_periodicity_breaking = True``, a boolean specifying if the
            supercell has been detected to break the crystal periodicity).
    """
    with warnings.catch_warnings(record=True) as w:
        result = _point_symmetry_from_defect_entry(
            defect_entry,
            symprec=symprec,
            relaxed=relaxed,
            verbose=verbose,
            return_periodicity_breaking=relaxed,  # always return periodicity-breaking if relaxed=True
            **kwargs,
        )

    if not (relaxed and attempt_periodicity_restoration):
        for warning in w:
            warnings.warn(warning.message, warning.category)
        return result

    assert isinstance(result, tuple)  # typing
    relaxed_point_group, periodicity_breaking = result

    if periodicity_breaking and attempt_periodicity_restoration:
        with contextlib.suppress(Exception):
            from doped.utils.stenciling import get_defect_in_supercell
            from doped.utils.supercells import get_min_image_distance

            primitive = defect_entry.defect.structure
            min_expansion_factor = get_min_image_distance(
                defect_entry.bulk_supercell
            ) / get_min_image_distance(primitive)
            target_supercell = primitive * np.ceil(min_expansion_factor)

            periodicity_restored_defect_supercell, periodicity_restored_bulk_supercell = (
                get_defect_in_supercell(
                    defect_entry,
                    target_supercell,
                    check_bulk=False,
                    show_pbar=False,  # may be called during defect parsing etc, so don't show progress bar
                )
            )
            periodicity_restored_defect_entry = template_defect_entry_from_structures(
                periodicity_restored_defect_supercell,
                periodicity_restored_bulk_supercell,
            )
            with warnings.catch_warnings(record=True) as w:
                result = _point_symmetry_from_defect_entry(
                    periodicity_restored_defect_entry,
                    symprec=symprec,
                    relaxed=relaxed,
                    verbose=verbose,
                    return_periodicity_breaking=True,
                    **kwargs,
                )
            assert isinstance(result, tuple)  # typing
            relaxed_point_group, periodicity_breaking = result

    for warning in w:
        warnings.warn(warning.message, warning.category)

    return (
        relaxed_point_group
        if not return_periodicity_breaking
        else (relaxed_point_group, periodicity_breaking)
    )



def _point_symmetry_from_defect_entry(
    defect_entry: DefectEntry,
    symprec: float | None = None,
    relaxed: bool = True,
    verbose: bool | None = None,
    return_periodicity_breaking: bool = False,
    **kwargs,
) -> str | tuple[str, bool]:
    r"""
    Internal function to get the defect site point symmetry from a
    |DefectEntry| object.

    See ``point_symmetry_from_defect_entry`` for full documentation.
    """
    if symprec is None:
        symprec = 0.1 if relaxed else 0.01  # relaxed structures likely have structural noise
        # May need to adjust symprec (e.g. for Ag2Se, symprec of 0.2 is too large as we have very
        # slight distortions present in the unrelaxed material).
    periodicity_breaking_verbose = verbose is not False  # None/True -> True, False -> False
    equiv_sites_verbose = verbose is True  # True -> True, None/False -> False
    if not relaxed:
        return_periodicity_breaking = False  # always False when relaxed=False

    # from spglib docs: For atomic positions, roughly speaking, two position vectors x and x' in
    # Cartesian coordinates are considered to be the same if |x' - x| < symprec. The angle distortion
    # between basis vectors is converted to a length and compared with this distance tolerance.
    # we _could_ do bulk-bond length dependent symprec, which seems like it would be physically
    # reasonable (basically being a proxy accounting for larger structural/positional noise for same force
    # noise in DFT supercell calcs), but from testing this didn't seem to really improve accuracy in
    # general (e.g. for Sb2O5 split-interstitial seemed like >0.1 best, while <0.12 required for SrTiO3
    # despite smaller bond length)

    if not relaxed and defect_entry.defect.defect_type != DefectType.Interstitial:
        # then easy, can just be taken from symmetry dataset of defect structure
        symm_dataset = get_sga(defect_entry.defect.structure, symprec=symprec).get_symmetry_dataset()
        return schoenflies_from_hermann(
            symm_dataset.site_symmetry_symbols[defect_entry.defect.defect_site_index]
        )

    supercell = _get_defect_supercell(defect_entry) if relaxed else _get_bulk_supercell(defect_entry)
    supercell_sga = get_sga(supercell, symprec=symprec)

    # For relaxed = True, often only works for relaxed defect structures if it is a scalar matrix
    # supercell expansion of the primitive/conventional cell (otherwise can mess up the periodicity).
    # Sometimes works even if not a scalar matrix for _low-symmetry_ systems (counter-intuitively),
    # because then the breakdown in periodicity affects the defect site symmetry less. This can be
    # checked by seeing if the site symmetry of the defect site in the unrelaxed structure is correctly
    # guessed (in which case the supercell site symmetry is not messing up the symmetry detection),
    # as shown in the docstrings. Here we test using the 'unrelaxed_defect_structure' in the DefectEntry
    # calculation_metadata if present, which, if it gives the same result as relaxed=False, means that
    # for this defect at least, there is no periodicity-breaking which is affecting the symmetry
    # determination.
    matching = True
    if relaxed:
        if unrelaxed_defect_structure := _get_unrelaxed_defect_structure(defect_entry):
            matching = _check_relaxed_defect_symmetry_determination(
                defect_entry,
                unrelaxed_defect_structure=unrelaxed_defect_structure,
                symprec=symprec,
                verbose=periodicity_breaking_verbose,
                equiv_sites_verbose=equiv_sites_verbose,
                **kwargs,
            )
        else:
            warnings.warn(
                "`relaxed` is set to True (i.e. get _relaxed_ defect symmetry), but the "
                "`calculation_metadata`/`bulk_entry.structure` attributes are not set for `DefectEntry`, "
                "suggesting that this DefectEntry was not parsed from calculations using doped. This "
                "means doped cannot automatically check if the supercell shape is breaking the cell "
                "periodicity here or not (see docstring) -- the point symmetry groups are not guaranteed "
                "to be correct here!"
            )

    _failed = False

    spglib_point_group_symbol = None
    if relaxed:
        with contextlib.suppress(Exception):
            spglib_point_group_symbol = schoenflies_from_hermann(supercell_sga.get_point_group_symbol())

    if not relaxed or spglib_point_group_symbol is None:
        defect_supercell_bulk_site_coords = _get_defect_supercell_frac_coords(
            defect_entry, relaxed=relaxed
        )
        if defect_supercell_bulk_site_coords is not None:
            try:
                symm_dataset, unique_sites, symprec, _dist_tol_factor = (
                    _get_symm_dataset_of_struct_with_all_equiv_sites(
                        defect_supercell_bulk_site_coords,
                        supercell,
                        symprec=symprec,
                        species=(
                            defect_entry.defect.site.species_string
                            if defect_entry.defect.defect_type == DefectType.Interstitial
                            else "X"
                        ),
                        return_symprec_and_dist_tol_factor=True,
                        verbose=equiv_sites_verbose,
                        **kwargs,
                    )
                )

                # Note:
                # This code works to get the site symmetry of a defect site in a periodicity-breaking
                # supercell, but only when the defect has not yet been relaxed. Still has the issue that
                # once we have relaxation around the defect site in a periodicity-breaking supercell,
                # then the (local) point symmetry cannot be easily determined as the whole supercell
                # symmetry is broken. In future will try use stenciling to regenerate the structure in a
                # non-periodicity-breaking cell, and then determine symmetry. Alternatively, could try some
                # local structure analysis approach, but hacky...
                # unique_sites = get_all_equiv_sites(  # defect site but bulk supercell
                #     site.frac_coords, bulk_supercell,
                # )
                # sga_with_all_X = _get_sga_with_all_X(  # defect unique sites but bulk supercell
                #     bulk_supercell, unique_sites, symprec=symprec
                # )
                # symm_dataset = sga_with_all_X.get_symmetry_dataset()
            except AttributeError:
                _failed = True

        if defect_supercell_bulk_site_coords is None or _failed:
            assert symprec is not None  # typing
            point_group = point_symmetry_from_defect(
                defect_entry.defect,
                symprec=symprec,
                verbose=equiv_sites_verbose,
                **kwargs,
            )
            # possibly pymatgen DefectEntry object without defect_supercell_site set
            if relaxed:
                warnings.warn(
                    "Symmetry determination failed with the standard approach (likely due to this being a "
                    "DefectEntry which has not been generated/parsed with doped?). Thus the _relaxed_ "
                    "point group symmetry cannot be reliably automatically determined."
                )
                return (point_group, not matching) if return_periodicity_breaking else point_group

            return point_group

        if not relaxed:
            # `site_symmetry_symbols` should be used (within this equiv sites approach) for unrelaxed
            # defects (rather than `pointgroup`), as the site symmetry can be lower than the crystal point
            # group, but not vice versa; so when populating all equivalent sites (of the defect site,
            # in the bulk supercell) the overall point group should be retained and is not necessarily the
            # defect site symmetry. e.g. consider populating all equivalent sites of a C1 interstitial
            # site in a structure (such as CdTe), then the overall point group is still the bulk point
            # group, but the site symmetry is in fact C1.
            # This issue is avoided for relaxed defect supercells as we take the symm_ops of our reduced
            # symmetry cell rather than that of the bulk (so no chance of spurious symmetry upgrade from
            # equivalent sites), and hence the max point symmetry is the point symmetry of the defect
            spglib_point_group_symbols = [
                schoenflies_from_hermann(hermann_symbol)
                for hermann_symbol in symm_dataset.site_symmetry_symbols[-len(unique_sites) :]
            ]  # get point group symbols for all unique sites
            spglib_point_group_symbol = max(
                spglib_point_group_symbols, key=group_order_from_schoenflies
            )  # use highest symmetry point group symbol

            # Note that, if the supercell is non-periodicity-breaking, then the site symmetry can be simply
            # determined using the point group of the unrelaxed defect structure:
            # unrelaxed_defect_supercell = defect_entry.calculation_metadata.get(
            #     "unrelaxed_defect_structure", defect_supercell
            # )
            # return schoenflies_from_hermann(
            #     get_sga(unrelaxed_defect_supercell, symprec).get_symmetry_dataset().pointgroup,
            # )
            # But current approach works for all cases with unrelaxed defect structures

        else:
            # For relaxed defects the "defect supercell site" is not necessarily the true centre of mass of
            # the defect (e.g. for split-interstitials, split-vacancies, swapped vacancies etc),
            # so use 'pointgroup' output (in this case the reduced symmetry avoids the symmetry-upgrade
            # possibility with the equivalent sites, as when relaxed=False)
            spglib_point_group_symbol = schoenflies_from_hermann(symm_dataset.pointgroup)

            # This also works (at least for non-periodicity-breaking supercells) for relaxed defects in
            # most cases, but is slightly less robust (more sensitive to ``symprec`` choice) than the
            # approach above:
            # schoenflies_from_hermann(
            #     get_sga(
            #         defect_supercell, symprec=symprec
            #     ).get_symmetry_dataset().pointgroup
            # )

    if spglib_point_group_symbol is not None:
        return (
            (spglib_point_group_symbol, not matching)
            if return_periodicity_breaking
            else (spglib_point_group_symbol)
        )

    # symm_ops approach failed, just use diagonal defect supercell approach:
    if relaxed:
        raise RuntimeError(
            "Site symmetry could not be determined using the defect supercell, and so the relaxed site "
            "symmetry cannot be automatically determined (set relaxed=False to obtain the (unrelaxed) "
            "bulk site symmetry)."
        )

    defect_diagonal_supercell = defect_entry.defect.get_supercell_structure(
        sc_mat=np.array([[2, 0, 0], [0, 2, 0], [0, 0, 2]]),
        dummy_species="X",
    )  # create defect supercell, which is a diagonal expansion of the unit cell so that the defect
    # periodic image retains the unit cell symmetry, in order not to affect the point group symmetry
    sga = get_sga(defect_diagonal_supercell, symprec=symprec or 0.01)
    point_group = schoenflies_from_hermann(sga.get_point_group_symbol())
    return (point_group, not matching) if return_periodicity_breaking else point_group


def _check_relaxed_defect_symmetry_determination(
    defect_entry: DefectEntry,
    unrelaxed_defect_structure: Structure | None = None,
    symprec: float = 0.1,
    verbose: bool = False,
    equiv_sites_verbose: bool = False,
    **kwargs,
):
    defect_supercell_bulk_site_coords = _get_defect_supercell_frac_coords(defect_entry, relaxed=False)

    if defect_supercell_bulk_site_coords is None:
        raise AttributeError(
            "`defect_entry.defect_supercell_site` or `defect_entry.sc_defect_frac_coords` are not "
            "defined! Needed to check defect supercell periodicity (for symmetry determination)"
        )

    if unrelaxed_defect_structure is None:
        unrelaxed_defect_structure = _get_unrelaxed_defect_structure(defect_entry)

    if unrelaxed_defect_structure is not None:
        bulk_supercell = _get_bulk_supercell(defect_entry)
        match = False
        symm_dataset, unique_sites, symprec, _dist_tol = _get_symm_dataset_of_struct_with_all_equiv_sites(
            defect_supercell_bulk_site_coords,
            bulk_supercell,
            symprec=symprec,
            species=(
                defect_entry.defect.site.species_string
                if defect_entry.defect.defect_type == DefectType.Interstitial
                else "X"
            ),
            return_symprec_and_dist_tol_factor=True,
            verbose=equiv_sites_verbose,
            **kwargs,
        )
        bulk_spglib_point_group_symbols = {
            schoenflies_from_hermann(hermann_symbol)
            for hermann_symbol in symm_dataset.site_symmetry_symbols[-len(unique_sites) :]
        }  # get point group symbols for all unique sites

        # allow some small variation in symprec/dist_tol as the result can be a little sensitive:
        trial_symprecs = [symprec, symprec * 0.85, symprec * 1.15]
        for trial_symprec in trial_symprecs:
            unrelaxed_spglib_point_group_symbol = schoenflies_from_hermann(
                get_sga(unrelaxed_defect_structure, symprec=trial_symprec)
                .get_symmetry_dataset()
                .pointgroup,
            )
            if equiv_sites_verbose:
                print(
                    f"Using symprec {trial_symprec}, got point group = "
                    f"{unrelaxed_spglib_point_group_symbol} for the unrelaxed defect supercell, and the "
                    f"following for the defect sites: {bulk_spglib_point_group_symbols}"
                )

            if unrelaxed_spglib_point_group_symbol in bulk_spglib_point_group_symbols:
                match = True
                break

        if not match:
            if verbose:
                warnings.warn(
                    "`relaxed` is set to True (i.e. get _relaxed_ defect symmetry), but doped has "
                    "detected that the defect supercell is likely a non-scalar matrix expansion which "
                    "could be breaking the cell periodicity and possibly preventing the correct _relaxed_ "
                    "point group symmetry from being automatically determined. You can set relaxed=False "
                    "to instead get the (unrelaxed) bulk site symmetry, and/or manually "
                    "check/set/edit the point symmetries and corresponding orientational degeneracy "
                    "factors by inspecting/editing the "
                    "calculation_metadata['relaxed point symmetry']/['bulk site symmetry'] and "
                    "degeneracy_factors['orientational degeneracy'] attributes."
                )
            return False

        return True

    return False  # return False if symmetry couldn't be checked



[docs]
def point_symmetry_from_structure(
    structure: Structure,
    bulk_structure: Structure | None = None,
    symprec: float | None = None,
    relaxed: bool = True,
    verbose: bool | None = None,
    return_periodicity_breaking: bool = False,
    skip_atom_mapping_check: bool = False,
    **kwargs,
) -> str | tuple[str, bool]:
    r"""
    Get the point symmetry of a given structure.

    Note: For certain non-trivial supercell expansions, the broken cell
    periodicity can break the site symmetry and lead to incorrect point
    symmetry determination (particularly if using non-scalar supercell matrices
    with high symmetry materials). If the unrelaxed bulk structure
    (``bulk_structure``) is also supplied, then ``doped`` will determine the
    defect site and then automatically check if this is the case, and warn you
    if so.

    This can also be checked by using this function on your doped `generated`
    defects:

    .. code-block:: python

        from doped.generation import get_defect_name_from_entry
        for defect_name, defect_entry in defect_gen.items():
            print(defect_name,
                  get_defect_name_from_entry(defect_entry, relaxed=False),
                  get_defect_name_from_entry(defect_entry), "\n")

    And if the point symmetries match in each case, then using this function on
    your parsed `relaxed` |DefectEntry| objects should correctly determine
    the final relaxed defect symmetry -- otherwise periodicity-breaking
    prevents this.

    If ``bulk_structure`` is supplied and ``relaxed`` is set to ``False``, then
    returns the bulk site symmetry of the defect, which for
    vacancies/substitutions is the symmetry of the corresponding bulk site,
    while for interstitials it is the point symmetry of the `final relaxed`
    interstitial site when placed in the (unrelaxed) bulk structure.

    Args:
        structure (|Structure|):
            |Structure| object for which to determine the point symmetry.
        bulk_structure (|Structure|):
            |Structure| object of the bulk structure, if known. Default is
            ``None``. If provided and ``relaxed = True``, will be used to check
            if the supercell is breaking the crystal periodicity (and thus
            preventing accurate determination of the relaxed defect point
            symmetry) and warn you if so.
        symprec (float):
            Symmetry precision to use for determining symmetry operations and
            thus point symmetries with ``spglib``. Default is 0.01 for
            unrelaxed structures, 0.1 for relaxed (to account for residual
            structural noise, matching that used by the ``Materials Project``).
            You may want to adjust for your system (e.g. if there are very
            slight octahedral distortions etc.).
            If ``fixed_symprec_and_dist_tol_factor`` is ``False`` (default),
            this value will be automatically adjusted (up to 10x, down to 0.1x)
            until the identified equivalent sites from ``spglib`` have
            consistent point group symmetries. Setting ``verbose`` to ``True``
            will print information on the trialled ``symprec`` (and
            ``dist_tol_factor`` values).
        relaxed (bool):
            If ``False``, determines the site symmetry using the defect site
            `in the unrelaxed bulk supercell` (i.e. the bulk site symmetry),
            otherwise tries to determine the point symmetry of the relaxed
            defect in the defect supercell. Default is ``True``.
        verbose (bool):
            If ``None`` (default) or ``True``, prints a warning if the
            supercell is detected to break the crystal periodicity (and hence
            not be able to return a reliable `relaxed` point symmetry).
            ``True`` corresponds to higher verbosity, where information on
            trialled ``symprec`` and ``dist_tol_factor`` values in equivalent
            site generation is also printed. Default is ``None``.
        return_periodicity_breaking (bool):
            If ``True``, also returns a boolean specifying if the supercell has
            been detected to break the crystal periodicity (and hence not be
            able to return a reliable `relaxed` point symmetry) or not. Default
            is ``False``.
        skip_atom_mapping_check (bool):
            If ``True``, skips the atom mapping check which ensures that the
            bulk and defect supercell lattice definitions are matched
            (important for accurate defect site determination and charge
            corrections). Can be used to speed up parsing when you are sure
            the cell definitions match (e.g. both supercells were generated
            with ``doped``). Default is ``False``.
        **kwargs:
            Additional keyword arguments to pass to ``get_all_equiv_sites``,
            such as ``dist_tol_factor`` and
            ``fixed_symprec_and_dist_tol_factor``, and
            ``point_symmetry_from_defect_entry``, such as
            ``attempt_periodicity_restoration``.

    Returns:
        str:
            |Structure| point symmetry (and if
            ``return_periodicity_breaking = True``, a boolean specifying if the
            supercell has been detected to break the crystal periodicity).
    """
    if symprec is None:
        symprec = 0.1 if relaxed else 0.01  # relaxed structures likely have structural noise

    spglib_point_group_symbol = None
    if relaxed and bulk_structure is None:
        with contextlib.suppress(Exception):
            spglib_point_group_symbol = schoenflies_from_hermann(
                get_sga(structure, symprec=symprec).get_point_group_symbol()
            )
        if spglib_point_group_symbol is not None:
            return (
                (spglib_point_group_symbol, False)
                if return_periodicity_breaking
                else spglib_point_group_symbol
            )

    if bulk_structure is not None:
        defect_entry = template_defect_entry_from_structures(
            structure,
            bulk_structure,
            oxi_state="Undetermined",
            multiplicity=1,
            skip_atom_mapping_check=skip_atom_mapping_check,
        )

        return point_symmetry_from_defect_entry(
            defect_entry,
            symprec=symprec,
            relaxed=relaxed,
            verbose=verbose,
            return_periodicity_breaking=return_periodicity_breaking,
            **kwargs,
        )

    # else bulk structure is None and normal relaxed structure symmetry determination failed
    raise RuntimeError(
        "Target site symmetry could not be determined using just the input structure. Please also supply "
        "the unrelaxed bulk structure (`bulk_structure`)."
    )




[docs]
def point_symmetry_from_site(
    site: PeriodicSite | np.ndarray | list,
    structure: Structure,
    coords_are_cartesian: bool = False,
    symprec: float = 0.01,
    **kwargs,
) -> str:
    r"""
    Get the point symmetry of a site in a structure.

    Args:
        site (|PeriodicSite| | np.ndarray | list):
            Site for which to determine the point symmetry. Can be a
            |PeriodicSite| object, or a list or numpy array of the
            coordinates of the site (fractional coordinates by default, or
            Cartesian if ``coords_are_cartesian = True``).
        structure (|Structure|):
            |Structure| object for which to determine the point symmetry of
            the site.
        coords_are_cartesian (bool):
            If ``True``, the site coordinates are assumed to be in Cartesian
            coordinates. Default is False.
        symprec (float):
            Symmetry precision to use for determining symmetry operations and
            thus point symmetries with ``spglib``. Default is 0.01. You may
            want to adjust for your system (e.g. if there are very slight
            octahedral distortions etc.). If
            ``fixed_symprec_and_dist_tol_factor`` is ``False`` (default), this
            value will be automatically adjusted (up to 10x, down to 0.1x)
            until the identified equivalent sites from ``spglib`` have
            consistent point group symmetries. Setting ``verbose`` to ``True``
            will print information on the trialled ``symprec`` (and
            ``dist_tol_factor`` values).
        **kwargs:
            Additional keyword arguments to pass to ``get_all_equiv_sites``,
            such as ``dist_tol_factor``, ``fixed_symprec_and_dist_tol_factor``,
            and ``verbose``.

    Returns:
        str: Site point symmetry.
    """
    if isinstance(site, np.ndarray | list):
        site = PeriodicSite(
            species="X", coords=site, lattice=structure.lattice, coords_are_cartesian=coords_are_cartesian
        )

    try:
        symm_dataset, unique_sites = _get_symm_dataset_of_struct_with_all_equiv_sites(
            site.frac_coords,
            structure,
            symprec=symprec,
            species=site.species_string,
            **kwargs,
        )
    except SymmetryUndeterminedError:
        symm_dataset, unique_sites = _get_symm_dataset_of_struct_with_all_equiv_sites(
            site.frac_coords, structure, symprec=symprec, species="X", **kwargs
        )

    spglib_point_group_symbols = [
        schoenflies_from_hermann(hermann_symbol)
        for hermann_symbol in symm_dataset.site_symmetry_symbols[-len(unique_sites) :]
    ]  # get point group symbols for all unique sites
    return max(
        spglib_point_group_symbols, key=group_order_from_schoenflies
    )  # use highest symmetry point group symbol



# Schoenflies, Hermann-Mauguin, spgid dict: (Taken from the excellent Abipy with GNU GPL License)
_PTG_IDS = [
    ("C1", "1", 1),
    ("Ci", "-1", 2),
    ("C2", "2", 3),
    ("Cs", "m", 6),
    ("C2h", "2/m", 10),
    ("D2", "222", 16),
    ("C2v", "mm2", 25),
    ("D2h", "mmm", 47),
    ("C4", "4", 75),
    ("S4", "-4", 81),
    ("C4h", "4/m", 83),
    ("D4", "422", 89),
    ("C4v", "4mm", 99),
    ("D2d", "-42m", 111),
    ("D4h", "4/mmm", 123),
    ("C3", "3", 143),
    ("C3i", "-3", 147),
    ("D3", "32", 149),
    ("C3v", "3m", 156),
    ("D3d", "-3m", 162),
    ("C6", "6", 168),
    ("C3h", "-6", 174),
    ("C6h", "6/m", 175),
    ("D6", "622", 177),
    ("C6v", "6mm", 183),
    ("D3h", "-6m2", 189),
    ("D6h", "6/mmm", 191),
    ("T", "23", 195),
    ("Th", "m-3", 200),
    ("O", "432", 207),
    ("Td", "-43m", 215),
    ("Oh", "m-3m", 221),
]

_SCH_to_HERM = {t[0]: t[1] for t in _PTG_IDS}
_HERM_to_SCH = {t[1]: t[0] for t in _PTG_IDS}
sch_symbols = list(_SCH_to_HERM.keys())



[docs]
def schoenflies_from_hermann(herm_symbol):
    """
    Convert from Hermann-Mauguin to Schoenflies.
    """
    herm_symbol = herm_symbol.replace(".", "")
    schoenflies = _HERM_to_SCH.get(herm_symbol)
    if schoenflies is None:
        # try rearranging, symbols in spglib can be rearranged vs _HERM_to_SCH dict
        # get _HERM_to_SCH key that has the same characters as herm_symbol
        # (i.e. same characters, but possibly in a different order)
        from collections import Counter

        def find_matching_key(input_str, input_dict):
            input_str_counter = Counter(input_str)
            for key in input_dict:
                if Counter(key) == input_str_counter:
                    return key
            return None

        herm_key = find_matching_key(herm_symbol, _HERM_to_SCH)
        if herm_key is not None:
            schoenflies = _HERM_to_SCH[herm_key]

    return schoenflies



_point_group_order = {
    "C1": 1,
    "Ci": 2,  # aka. S2, -1 in Hermann-Mauguin
    "C2": 2,
    "Cs": 2,  # aka. C1h (m in Hermann-Mauguin)
    "C3": 3,
    "C4": 4,
    "S4": 4,  # C4 with improper rotation
    "C2h": 4,  # 2/m in Hermann-Mauguin
    "D2": 4,  # 222 in Hermann-Mauguin
    "C2v": 4,  # mm2 in Hermann-Mauguin
    "C3i": 6,  # aka. S6, -3 in Hermann-Mauguin
    "C6": 6,
    "C3h": 6,
    "D3": 6,  # 32 in Hermann-Mauguin
    "C3v": 6,  # 3m in Hermann-Mauguin
    "D2h": 8,  # mmm in Hermann-Mauguin
    "C4h": 8,  # 4/m in Hermann-Mauguin
    "D4": 8,  # 422 in Hermann-Mauguin
    "C4v": 8,  # 4mm in Hermann-Mauguin
    "D2d": 8,  # 42m in Hermann-Mauguin
    "C6h": 12,  # 6/m in Hermann-Mauguin
    "T": 12,  # 23 in Hermann-Mauguin
    "D3d": 12,  # 3m1 in Hermann-Mauguin
    "D6": 12,  # 622 in Hermann-Mauguin
    "C6v": 12,  # 6mm in Hermann-Mauguin
    "D3h": 12,  # 6m2 in Hermann-Mauguin
    "D4h": 16,  # 4/mmm in Hermann-Mauguin
    "D6h": 24,  # 6/mmm in Hermann-Mauguin
    "Th": 24,  # m3 in Hermann-Mauguin
    "O": 24,  # 432 in Hermann-Mauguin
    "Td": 24,  # 43m in Hermann-Mauguin
    "Oh": 48,  # m3m in Hermann-Mauguin
}



[docs]
def group_order_from_schoenflies(sch_symbol):
    """
    Return the order of the point group from the Schoenflies symbol.

    Useful for symmetry and orientational degeneracy analysis.
    """
    return _point_group_order[sch_symbol]




[docs]
def get_orientational_degeneracy(
    defect_entry: DefectEntry | None = None,
    relaxed_point_group: str | None = None,
    bulk_site_point_group: str | None = None,
    symprec: float = 0.1,
    bulk_symprec: float = 0.01,
    **kwargs,
) -> float:
    r"""
    Get the orientational degeneracy factor for a given `relaxed`
    |DefectEntry|, by supplying either the |DefectEntry| object or the bulk-
    site & relaxed defect point group symbols (e.g. "Td", "C3v" etc.).

    If a |DefectEntry| is supplied (and the point group symbols are not),
    this is computed by determining the `relaxed` defect point symmetry and the
    (unrelaxed) bulk site symmetry, and then getting the ratio of their point
    group orders (equivalent to the ratio of partition functions or number of
    symmetry operations (i.e. degeneracy)).

    For interstitials, the bulk site symmetry corresponds to the point symmetry
    of the interstitial site with `no relaxation of the host structure`, while
    for vacancies/substitutions it is simply the symmetry of their
    corresponding bulk site. This corresponds to the point symmetry of
    ``DefectEntry.defect``, or
    ``calculation_metadata["bulk_site"]/["unrelaxed_defect_structure"]``.

    Note: This tries to use the ``defect_entry.defect_supercell`` to determine
    the `relaxed` site symmetry. However, it should be noted that this is not
    guaranteed to work in all cases; namely for non-diagonal supercell
    expansions, or sometimes for non-scalar supercell expansion matrices (e.g.
    a 2x1x2 expansion)(particularly with high-symmetry materials) which can
    mess up the periodicity of the cell. ``doped`` tries to automatically check
    if this is the case, and will warn you if so.

    This can also be checked by using this function on your doped `generated`
    defects:

    .. code-block:: python

        from doped.generation import get_defect_name_from_entry
        for defect_name, defect_entry in defect_gen.items():
            print(defect_name,
                  get_defect_name_from_entry(defect_entry, relaxed=False),
                  get_defect_name_from_entry(defect_entry), "\n")

    And if the point symmetries match in each case, then using this function on
    your parsed `relaxed` |DefectEntry| objects should correctly determine
    the final relaxed defect symmetry (and orientational degeneracy) --
    otherwise periodicity-breaking prevents this.

    If periodicity-breaking prevents auto-symmetry determination, you can
    manually determine the relaxed defect and bulk-site point symmetries,
    and/or orientational degeneracy, from visualising the structures (e.g.
    using VESTA)(can use |get_orientational_degeneracy| to obtain the
    corresponding orientational degeneracy factor for given defect/bulk-site
    point symmetries) and setting the corresponding values in the
    ``calculation_metadata['relaxed point symmetry']/['bulk site symmetry']``
    and/or ``degeneracy_factors['orientational degeneracy']`` attributes. Note
    that the bulk-site point symmetry corresponds to that of
    ``DefectEntry.defect``, or equivalently
    ``calculation_metadata["bulk_site"]/["unrelaxed_defect_structure"]``, which
    for vacancies/substitutions is the symmetry of the corresponding bulk site,
    while for interstitials it is the point symmetry of the `final relaxed`
    interstitial site when placed in the (unrelaxed) bulk structure. The
    degeneracy factor is used in the calculation of defect/carrier
    concentrations and Fermi level behaviour (discussion in
    https://doi.org/10.1039/D2FD00043A, https://doi.org/10.1039/D3CS00432E,
    https://doi.org/10.1038/s41578-025-00879-y...).

    Args:
        defect_entry (|DefectEntry|):
            |DefectEntry| object. (Default = None)
        relaxed_point_group (str | None):
            Point group symmetry (e.g. "Td", "C3v" etc.) of the `relaxed`
            defect structure, if already calculated / manually determined.
            Default is ``None`` (automatically calculated by ``doped``).
        bulk_site_point_group (str | None):
            Point group symmetry (e.g. "Td", "C3v" etc.) of the defect site in
            the bulk, if already calculated / manually determined. For
            vacancies/substitutions, this should match the site symmetry label
            from ``doped`` when generating the defect, while for interstitials
            it should be the point symmetry of the `final relaxed` interstitial
            site, when placed in the bulk structure.
            Default is ``None`` (automatically calculated by ``doped``).
        symprec (float):
            Symmetry precision to use for determining symmetry operations and
            thus point symmetries with ``spglib``, for the `relaxed` point
            symmetry. Default is ``0.1`` which matches that used by the
            ``Materials Project`` and is larger than the ``pymatgen`` default
            of ``0.01`` to account for residual structural noise in relaxed
            defect supercells. You may want to adjust for your system (e.g. if
            there are very slight octahedral distortions etc.).
            If ``fixed_symprec_and_dist_tol_factor`` is ``False`` (default),
            this value will be automatically adjusted (up to 10x, down to 0.1x)
            until the identified equivalent sites from ``spglib`` have
            consistent point group symmetries. Setting ``verbose`` to ``True``
            will print information on the trialled ``symprec`` (and
            ``dist_tol_factor`` values).
        bulk_symprec (float):
            Symmetry precision to use for determining symmetry operations and
            thus point symmetries with ``spglib``, for the `unrelaxed` (bulk
            site) point symmetry. Default is ``0.01`` which matches the
            ``pymatgen`` default. You may want to adjust for your system (e.g.
            if there are very slight octahedral distortions etc.).
            If ``fixed_symprec_and_dist_tol_factor`` is ``False`` (default),
            this value will be automatically adjusted (up to 10x, down to 0.1x)
            until the identified equivalent sites from ``spglib`` have
            consistent point group symmetries. Setting ``verbose`` to ``True``
            will print information on the trialled ``symprec`` (and
            ``dist_tol_factor`` values).
        **kwargs:
            Additional keyword arguments to pass to ``get_all_equiv_sites``,
            such as ``dist_tol_factor``, ``fixed_symprec_and_dist_tol_factor``,
            and ``verbose``, and to ``point_symmetry_from_defect_entry``, such
            as ``attempt_periodicity_restoration``.

    Returns:
        float: Orientational degeneracy factor for the defect.
    """
    if defect_entry is None:
        if relaxed_point_group is None or bulk_site_point_group is None:
            raise ValueError(
                "Either the DefectEntry or both defect and bulk site point group symbols must be "
                "provided for doped to determine the orientational degeneracy! "
            )

    else:
        if relaxed_point_group is None:
            # this will throw warning if auto-detected that supercell breaks trans symmetry
            relaxed_point_group = cast(
                "str",
                point_symmetry_from_defect_entry(
                    defect_entry,
                    symprec=symprec,
                    relaxed=True,  # relaxed
                    **kwargs,
                ),
            )  # str as return_periodicity_breaking is False

        if bulk_site_point_group is None:
            bulk_site_point_group = cast(
                "str",
                point_symmetry_from_defect_entry(
                    defect_entry,
                    symprec=bulk_symprec,  # same default as equiv_sites (-> multiplicity) for consistency
                    relaxed=False,  # unrelaxed
                    **kwargs,
                ),
            )  # str as return_periodicity_breaking is False

    # actually fine for split-vacancies (e.g. Ke's V_Sb in Sb2O5), or antisite-swaps etc:
    # (so avoid warning for now; user will be warned anyway if symmetry determination failing)
    # if orientational_degeneracy < 1 and not (
    #     defect_type == DefectType.Interstitial
    #     or (isinstance(defect_type, str) and defect_type.lower() == "interstitial")
    # ):
    #     raise ValueError(
    #         f"From the input/determined point symmetries, an orientational degeneracy factor of "
    #         f"{orientational_degeneracy} is predicted, which is less than 1, which is not reasonable "
    #         f"for vacancies/substitutions, indicating an error in the symmetry determination!"
    #     )

    return group_order_from_schoenflies(bulk_site_point_group) / group_order_from_schoenflies(
        relaxed_point_group
    )




[docs]
def is_periodic_image(
    sites_1: Iterable[PeriodicSite | np.ndarray],
    sites_2: Iterable[PeriodicSite | np.ndarray],
    frac_tol: float = 0.01,
    same_image: bool = False,
) -> bool:
    r"""
    Determine if the |PeriodicSite|/``frac_coords`` in ``sites_1`` are a
    periodic image of those in ``sites_2``.

    This function determines if the set of fractional coordinates in
    ``sites_1`` are periodic images of those in ``sites_2``, with only unique
    site matches permitted (i.e. no repeat matches; each site can only have
    one match).

    If ``same_image`` is ``True``, then the sites must all be of the same
    periodic image translation (i.e. the same rigid translation vector), such
    that ``sites_1`` can be `rigidly` translated by any combination of lattice
    vectors to match the set of fractional coordinates in ``sites_2``.

    Note that the this function tests if the `full` set of sites is a periodic
    image of the other, and not just that `each` site in ``sites_1`` is
    (individually) a periodic image of a site in ``sites_2`` (for which the
    ``PeriodicSite.is_periodic_image`` method could be used).

    Args:
        sites_1 (list): List of |PeriodicSite|\s or ``frac_coords`` arrays.
        sites_2 (list): List of |PeriodicSite|\s or ``frac_coords`` arrays.
        frac_tol (float): Fractional coordinate tolerance for comparing sites.
        same_image (bool):
            If ``True``, also check that the sites are the `same` periodic
            image translation (i.e. the same rigid translation vector).
            Default is ``False``.

    Returns:
        bool:
            ``True`` if ``sites_1`` is a periodic image of ``sites_2``,
            ``False`` otherwise.
    """
    sites_1_frac_coords = [site.frac_coords if hasattr(site, "frac_coords") else site for site in sites_1]
    sites_2_frac_coords = [site.frac_coords if hasattr(site, "frac_coords") else site for site in sites_2]

    if len(sites_1_frac_coords) != len(sites_2_frac_coords):
        raise ValueError("``is_periodic_image`` requires the same number of sites in both lists!")

    if not same_image:
        return len(sites_1_frac_coords) == len(sites_2_frac_coords) and is_coord_subset_pbc(
            sites_1_frac_coords, sites_2_frac_coords
        )

    lattice = Lattice(np.eye(3))  # if fractional coords
    for site in [next(iter(sites_1)), next(iter(sites_2))]:
        if isinstance(site, PeriodicSite):
            lattice = site.lattice

    # first need to match sites with their closest (individual) periodic images, to account for order /
    # permutation invariance:
    site_mapping = _get_site_mapping_from_coords_and_indices(
        sites_1_frac_coords, sites_2_frac_coords, lattice=lattice
    )  # list of tuples of (dist, s1_index, s2_index)
    reordered_sites_1_frac_coords = [
        sites_1_frac_coords[mapping_tuple[1]]
        for mapping_tuple in site_mapping
        if mapping_tuple[1] is not None
    ]  # TODO: Compact this 5-liner?

    pbc_frac_dist = np.subtract(reordered_sites_1_frac_coords, sites_2_frac_coords)
    pbc_frac_diff = pbc_frac_dist - np.round(pbc_frac_dist)
    return np.allclose(  # all sites are periodic images
        pbc_frac_diff, np.zeros(pbc_frac_diff.shape), atol=frac_tol
    ) and (  # all sites are _the same_ translation (periodic image)
        np.allclose(pbc_frac_dist, pbc_frac_dist[0], atol=frac_tol)
    )