Source code for doped.utils.supercells

"""
Utility code and functions for generating defect supercells.
"""

from typing import Any, Optional, Union

import numpy as np
from pymatgen.core.lattice import Lattice
from pymatgen.core.structure import Structure
from pymatgen.transformations.advanced_transformations import CubicSupercellTransformation
from tqdm import tqdm

from doped.utils.symmetry import _get_sga, get_clean_structure


[docs] def get_min_image_distance(structure: Structure) -> float: """ Get the minimum image distance (i.e. minimum distance between periodic images of sites in a lattice) for the input structure. This is also known as the Shortest Vector Problem (SVP), and has no known analytical solution, requiring enumeration type approaches. (https://wikipedia.org/wiki/Lattice_problem#Shortest_vector_problem_(SVP)) Args: structure (Structure): Structure object. Returns: float: Minimum image distance. """ return _get_min_image_distance_from_matrix(structure.lattice.matrix)
def _proj(b: np.ndarray, a: np.ndarray) -> np.ndarray: """ Returns the vector projection of vector b onto vector a. Based on the _proj() function in pymatgen.transformations.advanced_transformations, but made significantly more efficient for looping over many times in optimisation functions. Args: b (np.ndarray): Vector to project. a (np.ndarray): Vector to project onto. Returns: np.ndarray: Vector projection of b onto a. """ normalised_a = a / np.linalg.norm(a) return np.dot(b, normalised_a) * normalised_a def _get_min_image_distance_from_matrix(matrix: np.ndarray) -> float: """ Get the minimum image distance (i.e. minimum distance between periodic images of sites in a lattice) for the input lattice matrix, using the pymatgen get_points_in_sphere() Lattice method. This is also known as the Shortest Vector Problem (SVP), and has no known analytical solution, requiring enumeration type approaches. (https://wikipedia.org/wiki/Lattice_problem#Shortest_vector_problem_(SVP)) Args: matrix (np.ndarray): Lattice matrix. """ # Note that the max hypothetical min image distance in a 3D lattice is sixth root of 2 times the # effective cubic lattice parameter (i.e. the cube root of the volume), which is for HCP/FCC systems # while of course the minimum possible min image distance is the minimum cell vector length lattice = Lattice(matrix) eff_cubic_length = lattice.volume ** (1 / 3) max_min_dist = eff_cubic_length * (2 ** (1 / 6)) # max hypothetical min image distance in 3D lattice zipped_fcoords_dist_idx_image = list( lattice.get_points_in_sphere([[0, 0, 0]], [0, 0, 0], r=max_min_dist * 1.01) ) # sort zipped list by dist: zipped_fcoords_dist_idx_image.sort(key=lambda x: x[1]) min_dist = zipped_fcoords_dist_idx_image[1][1] # second in list is min image (first is itself, zero) if min_dist <= 0: raise ValueError( "Minimum image distance less than or equal to zero! This is possibly due to a co-planar / " "non-orthogonal lattice. Please check your inputs!" ) return round( # round to 4 decimal places to avoid tiny numerical differences messing with sorting min_dist, 4 ) def _get_min_image_distance_from_matrix_raw(matrix: np.ndarray, max_ijk: int = 10): """ Get the minimum image distance (i.e. minimum distance between periodic images of sites in a lattice) for the input lattice matrix, using brute force numpy enumeration. This is also known as the Shortest Vector Problem (SVP), and has no known analytical solution, requiring enumeration type approaches. (https://wikipedia.org/wiki/Lattice_problem#Shortest_vector_problem_(SVP)) As the cell angles deviate more from cubic (90°), the required max_ijk to get the correct converged result increases. For near-cubic systems, a max_ijk of 2 or 3 is usually sufficient. Args: matrix (np.ndarray): Lattice matrix. max_ijk (int): Maximum absolute i/j/k coefficient to allow in the search for the shortest (minimum image) vector: [i*a, j*b, k*c]. (Default = 10) """ # Note that the max hypothetical min image distance in a 3D lattice is sixth root of 2 times the # effective cubic lattice parameter (i.e. the cube root of the volume), which is for HCP/FCC systems # while of course the minimum possible min image distance is the minimum cell vector length ijk_range = np.array(range(-max_ijk, max_ijk + 1)) i, j, k = np.meshgrid(ijk_range, ijk_range, ijk_range, indexing="ij") vectors = ( i[..., np.newaxis] * matrix[0] + j[..., np.newaxis] * matrix[1] + k[..., np.newaxis] * matrix[2] ) distances = np.linalg.norm(vectors, axis=-1).flatten() return round( # round to 4 decimal places to avoid tiny numerical differences messing with sorting np.min(distances[distances > 0]), 4 ) def _get_largest_cube_from_matrix(matrix: np.ndarray, max_ijk: int = 10): """ Gets the side length of the largest possible cube that can fit in the cell defined by the input lattice matrix. As the cell angles deviate more from cubic (90°), the required max_ijk to get the correct converged result increases. For near-cubic systems, a max_ijk of 2 or 3 is usually sufficient. Similar to the implementation in pymatgen's CubicSupercellTransformation, but generalised to work for all cell shapes (e.g. needly thin cells etc), as the pymatgen one relies on the input cell being nearly cubic. E.g. gives incorrect cube size for: [[-1, -2, 0], [1, -1, 2], [1, -2, 3]] Args: matrix (np.ndarray): Lattice matrix. max_ijk (int): Maximum absolute i/j/k coefficient to allow in the search for the shortest cube length, using the projections along: [i*a, j*b, k*c]. (Default = 10) """ a = matrix[0] b = matrix[1] c = matrix[2] proj_ca = _proj(c, a) # a-c plane proj_ac = _proj(a, c) proj_ba = _proj(b, a) # b-a plane proj_ab = _proj(a, b) proj_cb = _proj(c, b) # b-c plane proj_bc = _proj(b, c) ijk_range = np.array(range(-max_ijk, max_ijk + 1)) # Create a grid of i, j indices I_vals, J_vals = np.meshgrid(ijk_range, ijk_range, indexing="ij") # Flatten I and J for vectorized computation I_flat = I_vals.flatten() J_flat = J_vals.flatten() # Include k in the vectorized computation K = ijk_range[ijk_range != 0][:, None, None] # exclude cases with k=0 # Vectorized computation for each of the three terms term1 = c * K - I_flat[:, None] * proj_ca - J_flat[:, None] * proj_cb term2 = a * K - I_flat[:, None] * proj_ac - J_flat[:, None] * proj_ab term3 = b * K - I_flat[:, None] * proj_ba - J_flat[:, None] * proj_bc # Concatenate the results and reshape length_vecs = np.concatenate((term1, term2, term3), axis=1).reshape(-1, 3) return np.min(np.linalg.norm(length_vecs, axis=1))
[docs] def cell_metric(cell_matrix: np.ndarray, target: str = "SC") -> float: """ Calculates the deviation of the given cell matrix from an ideal simple cubic (if target = "SC") or face-centred cubic (if target = "FCC") matrix, by evaluating the root mean square (RMS) difference of the vector lengths from that of the idealised values (i.e. the corresponding SC/FCC lattice vector lengths for the given cell volume). For target = "SC", the idealised lattice vector length is the effective cubic length (i.e. the cube root of the volume), while for "FCC" it is 2^(1/6) (~1.12) times the effective cubic length. This is a fixed version of the cell metric function in ASE (``get_deviation_from_optimal_cell_shape``), described in https://wiki.fysik.dtu.dk/ase/tutorials/defects/defects.html which currently does not account for rotated matrices (e.g. a cubic cell with target = "SC", which should have a perfect score of 0, will have a bad score if its lattice vectors are rotated away from x, y and z, or even if they are just swapped as z, x, y). e.g. with ASE, [[1, 0, 0], [0, 1, 0], [0, 0, 1]] and [[0, 0, 1], [1, 0, 0], [0, 1, 0]] give scores of 0 and 1, but with this function they both give perfect scores of 0 as desired. Args: cell_matrix (np.ndarray): Cell matrix for which to calculate the cell metric. target (str): Target cell shape, for which to calculate the normalised deviation score from. Either "SC" for simple cubic or "FCC" for face-centred cubic. Default = "SC" Returns: float: Cell metric (0 is perfect score) """ eff_cubic_length = float(abs(np.linalg.det(cell_matrix)) ** (1 / 3)) norms = np.linalg.norm(cell_matrix, axis=0) if target.upper() == "SC": return round( np.sqrt( # get rms difference to eff cubic np.sum(((norms - eff_cubic_length) / eff_cubic_length) ** 2) ), 4, ) # round to 4 decimal places to avoid tiny numerical differences messing with sorting if target.upper() != "FCC": raise ValueError(f"Allowed values for `target` are 'SC' or 'FCC'. Got {target}") # FCC is characterised by 60 degree angles & lattice vectors = 2**(1/6) times the eff cubic length eff_fcc_length = eff_cubic_length * 2 ** (1 / 6) return round( np.sqrt( # get rms difference to eff cubic np.sum(((norms - eff_fcc_length) / eff_fcc_length) ** 2) ), 4, ) # round to 4 decimal places to avoid tiny numerical differences messing with sorting
def _lengths_and_angles_from_matrix(matrix: np.ndarray) -> tuple[Any, ...]: lengths = tuple(np.sqrt(np.sum(matrix**2, axis=1)).tolist()) angles = np.zeros(3) for dim in range(3): j = (dim + 1) % 3 k = (dim + 2) % 3 angles[dim] = np.clip(np.dot(matrix[j], matrix[k]) / (lengths[j] * lengths[k]), -1, 1) angles = np.arccos(angles) * 180.0 / np.pi angles = tuple(angles.tolist()) return (*lengths, *angles) def _vectorized_lengths_and_angles_from_matrices(matrices: np.ndarray) -> np.ndarray: """ Vectorized version of _lengths_and_angles_from_matrix(). Matrices is a numpy array of shape (n, 3, 3), where n is the number of matrices. """ lengths = np.linalg.norm(matrices, axis=2) # Compute lengths (norms of row vectors) angles = np.zeros((matrices.shape[0], 3)) for dim in range(3): # compute angles j = (dim + 1) % 3 k = (dim + 2) % 3 dot_products = np.sum(matrices[:, j, :] * matrices[:, k, :], axis=1) angle = np.arccos(np.clip(dot_products / (lengths[:, j] * lengths[:, k]), -1, 1)) angles[:, dim] = np.degrees(angle) # Return lengths and angles, as shape matrices.shape[0] x 6 return np.concatenate((lengths, angles), axis=1) def _P_matrix_sorting_func(P: np.ndarray, cell: np.ndarray = None) -> tuple: """ Sorting function to apply on an iterable of transformation matrices. Matrices are sorted by: - minimum ASE style cubic-like metric (using the fixed, efficient doped version) - matrix symmetry (around diagonal) - minimum absolute sum of elements - minimum absolute sum of off-diagonal elements - minimum number of negative elements - minimum largest (absolute) element - maximum number of x, y, z that are equal - maximum absolute sum of diagonal elements. - maximum sum of diagonal elements. Args: P (np.ndarray): Transformation matrix. cell (np.ndarray): Cell matrix (on which to apply P). Returns: tuple: Tuple of sorting criteria values """ cubic_metric = cell_metric(np.matmul(P, cell)) if cell is not None else cell_metric(P) symmetric = np.allclose(P, P.T) abs_sum_off_diag = np.sum(np.abs(P) - np.abs(np.diag(np.diag(P)))) abs_P = np.abs(P) abs_sum = np.sum(abs_P) num_negs = np.sum(P < 0) max_abs = np.max(abs_P) abs_diag_sum = np.sum(np.abs(np.diag(P))) diag_sum = np.sum(np.diag(P)) P_flat = P.flatten() num_equals = sum( P_flat[i] == P_flat[j] for i in range(len(P_flat)) for j in range(i, len(P_flat)) ) # double (square) counting, but doesn't matter (sorting behaviour the same) # Note: Initial idea was also to use cell symmetry operations to sort, but this is far too slow, and # in theory should be accounted for with the other (min dist, cubic cell metric) criteria anyway. # struct = Structure(Lattice(P), ["H"], [[0, 0, 0]]) # sga = _get_sga(struct) # symm_ops = len(sga.get_symmetry_operations()) return ( cubic_metric, not symmetric, abs_sum_off_diag, abs_sum, num_negs, max_abs, -num_equals, -abs_diag_sum, -diag_sum, ) def _lean_sort_func(P): abs_P = np.abs(P) abs_sum = np.sum(abs_P) num_negs = np.sum(P < 0) max_abs = np.max(abs_P) diag_sum = np.sum(np.diag(P)) return (abs_sum, num_negs, max_abs, -diag_sum) def _vectorized_lean_sort_func(P_batch): abs_P = np.abs(P_batch) abs_sum = np.sum(abs_P, axis=(1, 2)) num_negs = np.sum(P_batch < 0, axis=(1, 2)) max_abs = np.max(abs_P, axis=(1, 2)) diag_sum = np.sum(np.diagonal(P_batch, axis1=1, axis2=2), axis=1) return np.stack((abs_sum, num_negs, max_abs, -diag_sum), axis=1) def _get_candidate_P_arrays( cell: np.ndarray, target_size: int, limit: int = 2, verbose: bool = False, target_metric: Optional[np.ndarray] = None, label="SC", ) -> tuple: """ Get the possible supercell transformation (P) matrices for the given cell, target_size, limit and target_metric, and also determine the unique matrices based on the transformed cell lengths and angles. """ if target_metric is None: target_metric = np.eye(3) # SC by default # Normalize cell metric to reduce computation time during looping norm = (target_size * abs(np.linalg.det(cell)) / np.linalg.det(target_metric)) ** (-1.0 / 3) norm_cell = norm * cell if verbose: print(f"{label} normalization factor (Q): {norm}") ideal_P = np.matmul(target_metric, np.linalg.inv(norm_cell)) # Approximate initial P matrix if verbose: print(f"{label} idealized transformation matrix (ideal_P):") print(ideal_P) starting_P = np.array(np.around(ideal_P, 0), dtype=int) if verbose: print(f"{label} closest integer transformation matrix (P_0, starting_P):") print(starting_P) indices = np.indices([2 * limit + 1] * 9).reshape(9, -1).T - limit dP_array = indices.reshape(-1, 3, 3) P_array = starting_P[None, :, :] + dP_array # Compute determinants and filter to only those with the correct size: dets = np.abs(np.linalg.det(P_array)) rounded_dets = np.around(dets, 0).astype(int) valid_P = P_array[rounded_dets == target_size] # any P in valid_P that are all negative, flip the sign of the matrix: valid_P[np.all(valid_P <= 0, axis=(1, 2))] *= -1 # get unique lattices before computing metrics: cell_matrices = np.einsum("ijk,kl->ijl", valid_P, norm_cell) lengths_angles = _vectorized_lengths_and_angles_from_matrices(cell_matrices) # for each row in lengths_angles, get the product multiplied by the sum, as a hash: lengths_angles_hash = np.around(np.prod(lengths_angles, axis=1) / np.sum(lengths_angles, axis=1), 4) unique_hashes, indices = np.unique(lengths_angles_hash, return_index=True) unique_cell_matrices = cell_matrices[indices] if verbose: print(f"{label} searched matrices (P_array): {len(P_array)}") print(f"{label} valid matrices (matching target_size; valid_P): {len(valid_P)}") print(f"{label} unique valid matrices (unique_cell_matrices): {len(unique_cell_matrices)}") return valid_P, norm, norm_cell, unique_cell_matrices, unique_hashes, lengths_angles_hash def _get_optimal_P( valid_P, selected_indices, unique_hashes, lengths_angles_hash, norm_cell, verbose, label, cell ): """ Get the optimal/cleanest P matrix from the given valid_P array (with provided set of grouped unique matrices), according to the _P_matrix_sorting_func. """ poss_P = [] for idx in selected_indices: hash_value = unique_hashes[idx] matching_indices = np.where(lengths_angles_hash == hash_value)[0] poss_P.extend(valid_P[matching_indices]) poss_P.sort(key=lambda x: _P_matrix_sorting_func(x, norm_cell)) if verbose: print(f"{label} number of possible P matrices with best score (poss_P): {len(poss_P)}") optimal_P = poss_P[0] # Finalize. if verbose: print(f"{label} optimal transformation matrix (P_opt):") print(optimal_P) print(f"{label} supercell size:") print(np.round(np.matmul(optimal_P, cell), 4)) return optimal_P def _min_sum_off_diagonals(prim_struct: Structure, supercell_matrix: np.ndarray): """ Get the minimum absolute sum of off-diagonal elements in the given supercell matrix (for the primitive structure), or the corresponding supercell matrix for the conventional structure (of ``prim_struct``). Used to determine if we have an ideal supercell matrix (i.e. a diagonal transformation matrix of either the primitive or conventional cells). Args: prim_struct (Structure): Primitive structure. supercell_matrix (np.ndarray): Supercell matrix to check. Returns: int: Minimum absolute sum of off-diagonal elements, for the primitive or conventional supercell matrix. """ num_off_diagonals_prim = np.sum(np.abs(supercell_matrix - np.diag(np.diag(supercell_matrix)))) sga = _get_sga(prim_struct) conv_supercell_matrix = np.matmul( supercell_matrix, sga.get_conventional_to_primitive_transformation_matrix() ) num_off_diagonals_conv = np.sum( np.abs(conv_supercell_matrix - np.diag(np.diag(conv_supercell_matrix))) ) return min(num_off_diagonals_prim, num_off_diagonals_conv)
[docs] def find_ideal_supercell( cell: np.ndarray, target_size: int, limit: int = 2, clean: bool = True, return_min_dist: bool = False, verbose: bool = False, ) -> Union[np.ndarray, tuple]: r""" Given an input cell matrix (e.g. Structure.lattice.matrix or Atoms.cell) and chosen target_size (size of supercell in number of ``cell``\s), finds an ideal supercell matrix (P) that yields the largest minimum image distance (i.e. minimum distance between periodic images of sites in a lattice), while also being as close to cubic as possible. Supercell matrices are searched for by first identifying the ideal (fractional) transformation matrix (P) that would yield a perfectly cubic supercell with volume equal to target_size, and then scanning over all matrices where the elements are within +/-``limit`` of the ideal P matrix elements (rounded to the nearest integer). For relatively small target_sizes (<100) and/or cells with mostly similar lattice vector lengths, the default ``limit`` of +/-2 performs very well. For larger ``target_size``\s, ``cell``\s with very different lattice vector lengths, and/or cases where small differences in minimum image distance are very important, a larger ``limit`` may be required (though typically only improves the minimum image distance by 1-6%). This is also known as the Shortest Vector Problem (SVP), and has no known analytical solution, requiring enumeration type approaches. (https://wikipedia.org/wiki/Lattice_problem#Shortest_vector_problem_(SVP)), so can be slow for certain cases. Args: cell (np.ndarray): Unit cell matrix for which to find a supercell. target_size (int): Target supercell size (in number of ``cell``\s). limit (int): Supercell matrices are searched for by first identifying the ideal (fractional) transformation matrix (P) that would yield a perfectly SC/FCC supercell with volume equal to target_size, and then scanning over all matrices where the elements are within +/-``limit`` of the ideal P matrix elements (rounded to the nearest integer). (Default = 2) clean (bool): Whether to return the supercell matrix which gives the 'cleanest' supercell (according to `_lattice_matrix_sorting_func`; most symmetric, with mostly positive diagonals and c >= b >= a). (Default = True) return_min_dist (bool): Whether to return the minimum image distance (in Å) as a second return value. (Default = False) verbose (bool): Whether to print out extra information. (Default = False) Returns: np.ndarray: Supercell matrix (P). float: Minimum image distance (in Å) if ``return_min_dist`` is True. """ if target_size == 1: # just identity innit return np.eye(3, dtype=int), ( _get_min_image_distance_from_matrix(cell) if return_min_dist else np.eye(3, dtype=int) ) # Initial code here is based off that in ASE's find_optimal_cell_shape() function, but with significant # efficiency improvements, and then re-based on the minimum image distance rather than cubic cell # metric, then secondarily sorted by the (fixed) cubic cell metric (in doped), and then by some other # criteria to give the cleanest output sc_target_metric = np.eye(3) # simple cubic type target fcc_target_metric = 0.5 * np.array([[0, 1, 1], [1, 0, 1], [1, 1, 0]], dtype=float) def _find_ideal_supercell_for_target_metric( cell: np.ndarray, target_size: int, limit: int = 2, verbose: bool = False, target_metric: np.ndarray = sc_target_metric, label="SC", ): ( valid_P, norm, norm_cell, unique_cell_matrices, unique_hashes, lengths_angles_hash, ) = _get_candidate_P_arrays( cell=cell, target_size=target_size, limit=limit, verbose=verbose, target_metric=target_metric, label=label, ) min_image_dists = np.array( [_get_min_image_distance_from_matrix(cell_matrix) for cell_matrix in unique_cell_matrices] ) # for near cubic systems, the min image distance in most cases is just the minimum cell vector, # so if the efficiency of this function was the bottleneck we could rank first with the fixed # cubic-cell metric, then subselect and apply this function, but at present this is not the # limiting factor in this function so not worth it. if len(min_image_dists) == 0: raise ValueError("No valid P matrices found with given settings") # get indices of min_image_dists that are equal to the minimum best_min_dist = np.max(min_image_dists) # in terms of supercell effective cubic length if verbose: print(f"{label} best minimum image distance (best_min_dist): {best_min_dist}") min_dist_indices = np.where(min_image_dists == best_min_dist)[0] return _get_optimal_P( valid_P=valid_P, selected_indices=min_dist_indices, unique_hashes=unique_hashes, lengths_angles_hash=lengths_angles_hash, norm_cell=norm_cell, verbose=verbose, label=label, cell=cell, ) sc_optimal_P = _find_ideal_supercell_for_target_metric( cell=cell, target_size=target_size, limit=limit, verbose=verbose, target_metric=sc_target_metric, label="SC", ) # tested and found that amalgamating SC/FCC target matrices earlier leads to massive slowdown, # so more efficient to just generate both this way and compare fcc_optimal_P = _find_ideal_supercell_for_target_metric( cell=cell, target_size=target_size, limit=limit, verbose=verbose, target_metric=fcc_target_metric, label="FCC", ) # recalculate min dists (reduces numerical errors inherited from transformations) sc_min_dist = round(_get_min_image_distance_from_matrix(np.matmul(sc_optimal_P, cell)), 3) fcc_min_dist = round(_get_min_image_distance_from_matrix(np.matmul(fcc_optimal_P, cell)), 3) sc_fcc_P_and_min_dists = [ (sc_optimal_P, sc_min_dist), (fcc_optimal_P, fcc_min_dist), ] sc_fcc_P_and_min_dists.sort( key=lambda x: (-x[1], _P_matrix_sorting_func(x[0], cell)) ) # sort by max min dist, then by sorting func optimal_P, min_dist = sc_fcc_P_and_min_dists[0] if clean: supercell = Structure(Lattice(cell), ["H"], [[0, 0, 0]]) * optimal_P clean_supercell, T = get_clean_structure(supercell, return_T=True) # T maps orig to clean_super # T*orig = clean -> orig = T^-1*clean # optimal_P was: P*cell = orig -> T*P*cell = clean -> P' = T*P optimal_P = np.matmul(T, optimal_P) return (optimal_P, min_dist) if return_min_dist else optimal_P
[docs] def get_pmg_cubic_supercell_dict(struct: Structure, uc_range: tuple = (1, 200)) -> dict: """ Get a dictionary of (near-)cubic supercell matrices for the given structure and range of numbers of unit cells (in the supercell). Returns a dictionary of format: .. code-block:: python {Number of Unit Cells: {"P": transformation matrix, "min_dist": minimum image distance} } for (near-)cubic supercells generated by the pymatgen CubicSupercellTransformation class. If a (near-)cubic supercell cannot be found for a given number of unit cells, then the corresponding dict value will be set to an empty dict. Args: struct (Structure): pymatgen Structure object to generate supercells for uc_range (tuple): Range of numbers of unit cells to search over Returns: dict of: ``{Number of Unit Cells: {"P": transformation matrix, "min_dist": minimum image distance}}`` """ pmg_supercell_dict = {} prim_min_dist = get_min_image_distance(struct) for i in tqdm(range(*uc_range)): cst = CubicSupercellTransformation( min_atoms=i * len(struct), max_atoms=i * len(struct), min_length=prim_min_dist, force_diagonal=False, ) try: supercell = cst.apply_transformation(struct) pmg_supercell_dict[i] = { "P": cst.transformation_matrix, "min_dist": get_min_image_distance(supercell), } except Exception: pmg_supercell_dict[i] = {} return pmg_supercell_dict
[docs] def find_optimal_cell_shape( cell: np.ndarray, target_size: int, target_shape: str = "SC", limit: int = 2, return_score: bool = False, verbose: bool = False, ) -> Union[np.ndarray, tuple]: r""" Find the transformation matrix that produces a supercell corresponding to *target_size* unit cells that most closely approximates the shape defined by *target_shape*. This is an updated version of ASE's find_optimal_cell_shape() function, but fixed to be rotationally-invariant (explained below), with significant efficiency improvements, and then secondarily sorted by the (fixed) cell metric (in doped), and then by some other criteria to give the cleanest output. Finds the optimal supercell transformation matrix by calculating the deviation of the possible supercell matrices from an ideal simple cubic (if target = "SC") or face-centred cubic (if target = "FCC") matrix - and then taking that with the best (lowest) score, by evaluating the root mean square (RMS) difference of the vector lengths from that of the idealised values (i.e. the corresponding SC/FCC lattice vector lengths for the given cell volume). For target = "SC", the idealised lattice vector length is the effective cubic length (i.e. the cube root of the volume), while for "FCC" it is 2^(1/6) (~1.12) times the effective cubic length. The ``get_deviation_from_optimal_cell_shape`` function in ASE - described in https://wiki.fysik.dtu.dk/ase/tutorials/defects/defects.html - currently does not account for rotated matrices (e.g. a cubic cell with target = "SC", which should have a perfect score of 0, will have a bad score if its lattice vectors are rotated away from x, y and z, or even if they are just swapped as z, x, y). e.g. with ASE, [[1, 0, 0], [0, 1, 0], [0, 0, 1]] and [[0, 0, 1], [1, 0, 0], [0, 1, 0]] give scores of 0 and 1, but with this function they both give perfect scores of 0 as desired. Args: cell (np.ndarray): Unit cell matrix for which to find a supercell transformation. target_size (int): Target supercell size (in number of ``cell``\s). target_shape (str): Target cell shape, for which to calculate the normalised deviation score from. Either "SC" for simple cubic or "FCC" for face-centred cubic. Default = "SC" limit (int): Supercell matrices are searched for by first identifying the ideal (fractional) transformation matrix (P) that would yield a perfectly SC/FCC supercell with volume equal to target_size, and then scanning over all matrices where the elements are within +/-``limit`` of the ideal P matrix elements (rounded to the nearest integer). (Default = 2) return_score (bool): Whether to return the cell metric score as a second return value. (Default = False) verbose (bool): Whether to print out extra information. (Default = False) Returns: np.ndarray: Supercell matrix (P). float: Cell metric (0 is perfect score) if ``return_score`` is True. """ # Set up target metric if target_shape.lower() in {"sc", "simple-cubic"}: target_shape = "SC" target_metric = np.eye(3) elif target_shape.lower() in {"fcc", "face-centered cubic"}: target_shape = "FCC" target_metric = 0.5 * np.array([[0, 1, 1], [1, 0, 1], [1, 1, 0]], dtype=float) ( valid_P, norm, norm_cell, unique_cell_matrices, unique_hashes, lengths_angles_hash, ) = _get_candidate_P_arrays( cell=cell, target_size=target_size, limit=limit, verbose=verbose, target_metric=target_metric, label=target_shape, ) score_list = [cell_metric(cell_matrix, target=target_shape) for cell_matrix in unique_cell_matrices] best_score = np.min(score_list) if verbose: print(f"Best score: {best_score}") best_score_indices = np.where(np.array(score_list) == best_score)[0] optimal_P = _get_optimal_P( valid_P=valid_P, selected_indices=best_score_indices, unique_hashes=unique_hashes, lengths_angles_hash=lengths_angles_hash, norm_cell=norm_cell, verbose=verbose, label=target_shape, cell=cell, ) if verbose: print(f"Score: {best_score}") return (optimal_P, best_score) if return_score else optimal_P