Source code for stk._internal.ea.mutation.similar_building_block

import typing
from collections.abc import Callable, Iterable, Iterator
from functools import partial

import numpy as np

from stk._internal.building_block import BuildingBlock
from stk._internal.ea.molecule_record import MoleculeRecord
from stk._internal.ea.mutation.record import MutationRecord
from stk._internal.key_makers.inchi import Inchi
from stk._internal.key_makers.molecule import MoleculeKeyMaker
from stk._internal.topology_graphs.topology_graph.topology_graph import (
    TopologyGraph,
)
from stk._internal.utilities.utilities import dice_similarity

T = typing.TypeVar("T", bound=TopologyGraph)


[docs] class SimilarBuildingBlock: """ Substitutes similar building blocks. This mutator takes a :class:`.ConstructedMolecule` and substitutes the building blocks with the most similar one from a given set. Repeated mutations on the same molecule will substituted the next most similar molecule from the set. Examples: *Constructed Molecule Mutation* .. testcode:: constructed-molecule-mutation import stk # Create a molecule which is to be mutated. bb1 = stk.BuildingBlock('NCCN', [stk.PrimaryAminoFactory()]) bb2 = stk.BuildingBlock('O=CCC=O', [stk.AldehydeFactory()]) polymer = stk.MoleculeRecord( topology_graph=stk.polymer.Linear((bb1, bb2), 'AB', 3), ) # Create molecules used to substitute building blocks. building_blocks = ( stk.BuildingBlock( smiles='NC[Si]CCN', functional_groups=[stk.PrimaryAminoFactory()], ), stk.BuildingBlock( smiles='NCCCCCCCN', functional_groups=[stk.PrimaryAminoFactory()], ), stk.BuildingBlock( smiles='NC1CCCCC1N', functional_groups=[stk.PrimaryAminoFactory()], ), ) # Create the mutator. def has_primary_amino_group(building_block): fg, = building_block.get_functional_groups(0) return type(fg) is stk.PrimaryAmino similar_bb = stk.SimilarBuildingBlock( building_blocks=building_blocks, is_replaceable=has_primary_amino_group, ) # Mutate a molecule. mutation_record1 = similar_bb.mutate(polymer) # Mutate the molecule a second time. mutation_record2 = similar_bb.mutate(polymer) """ def __init__( self, building_blocks: Iterable[BuildingBlock], is_replaceable: Callable[[BuildingBlock], bool], key_maker: MoleculeKeyMaker = Inchi(), name: str = "SimilarBuildingBlock", random_seed: int | np.random.Generator | None = None, ) -> None: """ Parameters: building_blocks (list[BuildingBlock]): A group of molecules which are used to replace building blocks in molecules being mutated. is_replaceable: This function is applied to every building block in the molecule being mutated. Building blocks which returned ``True`` are liable for substitution by one of the molecules in `building_blocks`. key_maker: Molecules which return the same key, will iterate through the same set of similar molecules. name: A name to help identify the mutator instance. random_seed: The random seed to use. """ if random_seed is None or isinstance(random_seed, int): random_seed = np.random.default_rng(random_seed) self._building_blocks = tuple(building_blocks) self._is_replaceable = is_replaceable self._key_maker = key_maker self._name = name self._generator = random_seed self._similar_building_blocks: dict[ typing.Any, dict[typing.Any, Iterator[BuildingBlock]] ] = {}
[docs] def mutate( self, record: MoleculeRecord[T], ) -> MutationRecord[MoleculeRecord[T]]: """ Return a mutant of `record`. Parameters: record: The molecule to be mutated. Returns: A record of the mutation. """ key = self._key_maker.get_key(record.get_molecule()) if key not in self._similar_building_blocks: # Maps the key to a dict. The dict maps each # building block to an iterator. # The iterators yield the next most similar molecules in # `building_blocks` to the building block. self._similar_building_blocks[key] = {} similar_building_blocks = self._similar_building_blocks[key] # Choose the building block which undergoes mutation. replaceable_building_blocks = tuple( filter( self._is_replaceable, ( bb for bb in record.get_molecule().get_building_blocks() # TODO: this is actually a type error -- maybe # get_building_blocks needs to return BuildingBlock # instances? if isinstance(bb, BuildingBlock) ), ) ) replaced_building_block = self._generator.choice( a=replaceable_building_blocks, # type: ignore ) # If the building block has not been chosen before, create an # iterator yielding similar molecules from `building_blocks` # for it. replaced_key = self._key_maker.get_key(replaced_building_block) if replaced_key not in similar_building_blocks: similar_building_blocks[replaced_key] = iter( sorted( self._building_blocks, key=partial(dice_similarity, replaced_building_block), reverse=True, ) ) try: replacement = next(similar_building_blocks[replaced_key]) except StopIteration: similar_building_blocks[replaced_key] = iter( sorted( self._building_blocks, key=partial(dice_similarity, replaced_building_block), reverse=True, ) ) replacement = next(similar_building_blocks[replaced_key]) # If the most similar molecule in `building_blocks` is itself, # then take the next most similar one. if self._key_maker.get_key(replacement) == replaced_key: try: replacement = next(similar_building_blocks[replaced_key]) except StopIteration: similar_building_blocks[replaced_key] = iter( sorted( self._building_blocks, key=partial( dice_similarity, replaced_building_block, ), reverse=True, ) ) replacement = next(similar_building_blocks[replaced_key]) # Build the new ConstructedMolecule. graph = record.get_topology_graph().with_building_blocks( { replaced_building_block: replacement, } ) return MutationRecord( molecule_record=MoleculeRecord(graph), mutator_name=self._name, )