Source code for stk._internal.ea.fitness_calculators.property_vector

import typing
from collections.abc import Callable, Iterable
from typing import Any

from stk._internal.databases.value import ValueDatabase
from stk._internal.ea.molecule_record import MoleculeRecord

from .fitness_calculator import FitnessCalculator

T = typing.TypeVar("T", bound=MoleculeRecord)


[docs] class PropertyVector(FitnessCalculator[T]): """ Uses multiple molecular properties as a fitness value. Examples: *Calculating Fitness Values* .. testcode:: calculating-fitness-values import stk # First, create the functions which calculate the properties # of molecules. def get_num_atoms(record): return record.get_molecule().get_num_atoms() def get_num_bonds(record): return record.get_molecule().get_num_bonds() def get_diameter(record): return record.get_molecule().get_maximum_diameter() # Next, create the fitness calculator. fitness_calculator = stk.PropertyVector( property_functions=( get_num_atoms, get_num_bonds, get_diameter, ), ) # Calculate the fitness value of a molecule. # "value" is a tuple, holding the number of atoms, number of # bonds and the diameter of the molecule. record = stk.MoleculeRecord( topology_graph=stk.polymer.Linear( building_blocks=(stk.BuildingBlock('BrCCBr'), ), repeating_unit='A', num_repeating_units=1, ), ) value = fitness_calculator.get_fitness_value(record) .. testcode:: calculating-fitness-values :hide: _bb = stk.BuildingBlock('BrCCBr') assert value == ( _bb.get_num_atoms(), _bb.get_num_bonds(), _bb.get_maximum_diameter(), ) *Storing Fitness Values in a Database* Sometimes you want to store fitness values in a database, you can do this by providing the `output_database` parameter. .. testsetup:: storing-fitness-values-in-a-database import stk # Change the database used, so that when a developer # runs the doctests locally, their "stk" database is not # contaminated. _test_database = '_stk_doctest_database' _old_init = stk.ValueMongoDb stk.ValueMongoDb = lambda mongo_client, collection: ( _old_init( mongo_client=mongo_client, database=_test_database, collection=collection, ) ) # Change the database MongoClient will connect to. import os import pymongo _mongo_client = pymongo.MongoClient _mongodb_uri = os.environ.get( 'MONGODB_URI', 'mongodb://localhost:27017/' ) pymongo.MongoClient = lambda: _mongo_client(_mongodb_uri) .. testcode:: storing-fitness-values-in-a-database import stk import pymongo # Create a database which stores the fitness value of each # molecule. fitness_db = stk.ValueMongoDb( # This connects to a local database - so make sure you have # local MongoDB server running. You can also connect to # a remote MongoDB with MongoClient(), read to pymongo # docs to see how to do that. mongo_client=pymongo.MongoClient(), collection='fitness_values', ) # Define the functions which calculate molecular properties. def get_num_atoms(record): return record.get_molecule().get_num_atoms() def get_num_bonds(record): return record.get_molecule().get_num_bonds() def get_diameter(record): return record.get_molecule().get_maximum_diameter() # Create the fitness calculator. fitness_calculator = stk.PropertyVector( property_functions=( get_num_atoms, get_num_bonds, get_diameter, ), output_database=fitness_db, ) # Calculate fitness values. record = stk.MoleculeRecord( topology_graph=stk.polymer.Linear( building_blocks=(stk.BuildingBlock('BrCCBr'), ), repeating_unit='A', num_repeating_units=1, ), ) value1 = fitness_calculator.get_fitness_value(record) # You can retrieve the fitness values from the database. value2 = fitness_db.get(record.get_molecule()) .. testcode:: storing-fitness-values-in-a-database :hide: assert value1 == tuple(value2) .. testcleanup:: storing-fitness-values-in-a-database stk.ValueMongoDb = _old_init pymongo.MongoClient().drop_database(_test_database) pymongo.MongoClient = _mongo_client *Caching Fitness Values* Usually, if you calculate the fitness value of a molecule, you do not want to re-calculate it, because this may be expensive, and the fitness value is going to be the same anyway. By using the `input_database` parameter, together with the `output_database` parameter, you can make sure you store and retrieve calculated fitness values instead of repeating the same calculation multiple times. The `input_database` is checked before a calculation happens, to see if the value already exists, while the `output_database` has the calculated fitness value deposited into it. .. testsetup:: caching-fitness-values import stk # Change the database used, so that when a developer # runs the doctests locally, their "stk" database is not # contaminated. _test_database = '_stk_doctest_database' _old_init = stk.ValueMongoDb stk.ValueMongoDb = lambda mongo_client, collection: ( _old_init( mongo_client=mongo_client, database=_test_database, collection=collection, ) ) # Change the database MongoClient will connect to. import os import pymongo _mongo_client = pymongo.MongoClient _mongodb_uri = os.environ.get( 'MONGODB_URI', 'mongodb://localhost:27017/' ) pymongo.MongoClient = lambda: _mongo_client(_mongodb_uri) .. testcode:: caching-fitness-values import stk import pymongo # You can use the same database for both the input_database # and output_database parameters. fitness_db = stk.ValueMongoDb( # This connects to a local database - so make sure you have # local MongoDB server running. You can also connect to # a remote MongoDB with MongoClient(), read to pymongo # docs to see how to do that. mongo_client=pymongo.MongoClient(), collection='fitness_values', ) # Define the functions which calculate molecular properties. def get_num_atoms(record): return record.get_molecule().get_num_atoms() def get_num_bonds(record): return record.get_molecule().get_num_bonds() def get_diameter(record): return record.get_molecule().get_maximum_diameter() # Create the fitness calculator. fitness_calculator = stk.PropertyVector( property_functions=( get_num_atoms, get_num_bonds, get_diameter, ), input_database=fitness_db, output_database=fitness_db, ) # Assuming that a fitness value for this molecule was not # deposited into the database in a previous session, this # will calculate the fitness value. record = stk.MoleculeRecord( topology_graph=stk.polymer.Linear( building_blocks=(stk.BuildingBlock('BrCCBr'), ), repeating_unit='A', num_repeating_units=1, ), ) value1 = fitness_calculator.get_fitness_value(record) # This will not re-calculate the fitness value, instead, # value1 will be retrieved from the database. value2 = fitness_calculator.get_fitness_value(record) .. testcode:: caching-fitness-values :hide: value3 = fitness_calculator.get_fitness_value(record) assert value2 is value3 .. testcleanup:: caching-fitness-values stk.ValueMongoDb = _old_init pymongo.MongoClient().drop_database(_test_database) pymongo.MongoClient = _mongo_client """ def __init__( self, property_functions: Iterable[Callable[[T], Any]], input_database: ValueDatabase | None = None, output_database: ValueDatabase | None = None, ) -> None: """ Parameters: property_functions \ (list[~collections.abc.Callable[[T], typing.Any]]): A group of functions, each of which is used to calculate a single property of the molecule. input_database: A database to check before calling `fitness_function`. If a fitness value exists for a molecule in the database, the stored value is returned, instead of calling `fitness_function`. output_database: A database into which the calculate fitness value is placed. """ self._property_functions = tuple(property_functions) self._input_database = input_database self._output_database = output_database
[docs] def get_fitness_value(self, record: T) -> typing.Any: if self._input_database is not None: try: fitness_value = self._input_database.get( molecule=record.get_molecule(), ) except KeyError: fitness_value = tuple( property_function(record) for property_function in self._property_functions ) else: fitness_value = tuple( property_function(record) for property_function in self._property_functions ) if self._output_database is not None: self._output_database.put(record.get_molecule(), fitness_value) return fitness_value