Source code for openpharmacophore.molecular_systems.binding_site

import abc
import numpy as np
import pyunitwizard as puw
from typing import List
import openpharmacophore.molecular_systems.chem_feats as cf
from openpharmacophore.molecular_systems.convert import topology_to_mol
from openpharmacophore.utils import maths


class AbstractBindingSite(abc.ABC):

    @abc.abstractmethod
    def get_atoms(self, frame: int) -> np.ndarray:
        raise NotImplementedError

    @abc.abstractmethod
    def get_residues(self, frame: int) -> List[int]:
        raise NotImplementedError

    @abc.abstractmethod
    def get_chem_feats(self, frame: int) -> cf.ChemFeatContainer:
        raise NotImplementedError

    @abc.abstractmethod
    def to_rdkit(self):
        raise NotImplementedError


BS_DIST_MAX = puw.quantity(0.85, "nanometers")


[docs]class ComplexBindingSite(AbstractBindingSite): """ Class to extract and find chemical features in the binding site of a protein-ligand complex. Parameters ---------- protein : openpharmacophore.Protein ligand : openpharmacophore.Ligand """ def __init__(self, protein, ligand): self._protein = protein self._ligand = ligand self._bsite_mol = None # type: rdkit.Chem.Mol
[docs] def get_atoms(self, frame): """ Get the indices of the atoms in the binding site. This will include the ligand atoms as well. Parameters ---------- frame : int Returns ------- np.ndarray Array with the atom indices """ conformer = self._ligand.get_conformer(frame) lig_centroid = self._ligand_centroid(conformer) max_extent = self._ligand_max_extent(conformer, lig_centroid) bs_cutoff = max_extent + BS_DIST_MAX return self._protein.atoms_at_distance( frame, lig_centroid, max_dist=bs_cutoff, min_dist=0)
[docs] def get_residues(self, frame): """ Get the indices of the residues in the protein that correspond to the binding site. The binding site is defined as the sphere with centroid at the ligand centroid with a radius of ligand maximum extent plus a cutoff distance. Parameters ---------- frame : int Returns ------- list[int] Indices of the residues that encompass the binding site """ atoms = self.get_atoms(frame) return self._protein.topology.atoms_residue_index(atoms)
def _get_binding_site(self, frame): """ Create a Protein object representing the binding site Parameters ---------- frame : int Returns ------- openPharmacophore.Protein The binding site """ residues = self.get_residues(frame) atoms = self._protein.topology.residues_atoms(residues) return self._protein.slice(atoms, frame)
[docs] def get_chem_feats(self, frame, types=None): """ Get the chemical features of the binding site at the specified frame. Parameters ---------- frame : int types : set[str], optional Types of features to search for. If none all feat types will be used. Returns ------- ChemFeatContainer """ b_site = self._get_binding_site(frame) self._bsite_mol = topology_to_mol( b_site.topology, puw.get_value(b_site.coords[0], "nanometers"), remove_hyd=False) indices = cf.get_indices(self._bsite_mol, feat_def=cf.SMARTS_PROTEIN, types=types) # Donors and aromatics need to be processed differently because they require extra data donors = indices.pop("hb donor") aromatics = indices.pop("aromatic ring") container = cf.mol_chem_feats(indices, b_site.coords[0]) container.add_feats(cf.donor_chem_feats(donors, b_site.coords[0], self._bsite_mol)) container.add_feats(cf.aromatic_chem_feats(aromatics, b_site.coords[0])) return container
@staticmethod def _ligand_centroid(coords): """ Get the centroid of the ligand. Parameters --------- coords : puw.Quantity Shape (n_atoms, 3) Returns ------- puw.Quantity Shape (3, 3) """ return np.mean(coords, axis=0) @staticmethod def _ligand_max_extent(coords, centroid): """ Computes the maximum distance from the ligand centroid to any of its atoms. Parameters ---------- coords : puw.Quantity Shape (n_atoms, 3) centroid : puw.Quantity Shape (3, 3) Returns ------- puw.Quantity A scalar """ return maths.maximum_distance(centroid, coords) def _ligand_has_hyd(self, lig_res_ind): """ Returns true of the ligand has hydrogens. """ return lig_res_ind is not None and \ self._protein.topology.residue_has_hyd(lig_res_ind)
[docs] def to_rdkit(self): """ Returns the binding site as an rdkit molecule. Returns ------- rdkit.Chem.Mol """ return self._bsite_mol
class BindingSite(AbstractBindingSite): """ Class to extract and find chemical features in the binding site of a protein. """ pass