Thrombin ligands for pharmacophore extraction

from collections import defaultdict
import os
import pandas as pd
from rdkit import Chem
from IPython.display import SVG

import openpharmacophore as oph

We want to get ligand based pharmacophores for thrombin.

We’ll start by loading a set pf thrombin ligands and generating conformers for each. Then we will store the conformers on a file to avoid needing to generate new conformers each time the notebooks is ran.

lig_df = pd.read_csv("../data/thrombin.csv")
lig_df

	Smiles	PDBID	LigID
0	[H]/N=C(\C1CCC(CC1)CNC(=O)[C@@H]2C=C(CN3N2C(=O...	1C4V	IH2
1	CN[C@H](Cc1ccccc1)C(=O)N2CCC[C@H]2C(=O)NCC3CCC...	1TOM	MIN
2	c1ccc(cc1)S(=O)(=O)CCN2C(=O)N3CC=C[C@H](N3C2=O...	1D6W	00R
3	c1ccc(cc1)S(=O)(=O)CCN2C(=O)N3CC=C[C@H](N3C2=O...	1D91	00P
4	[H]/N=C(/c1ccc(cc1)C[C@H](C(=O)N2CCCCC2)NC(=O)...	1DWD	MID
5	[H]/N=C(\c1ccc2c(c1)cc([nH]2)C(=O)N3CCC(CC3)Cc...	1D4P	BPP
6	CCC1CCN(CC1)C(=O)[C@H](CCCNC(=[NH2+])N)NS(=O)(...	1FPC	0ZI

def load_ligands():
    smiles = lig_df["Smiles"].tolist()
    return oph.load_ligands(smiles, form="smi")

conf_file = "../data/thrombin_ligands.sdf"

if not os.path.isfile(conf_file):
    ligands = load_ligands()
else:
    ligands = oph.load(conf_file)

print("Number of conformers ", [l.n_conformers for l in ligands])
oph.draw_ligands(ligands, n_per_row=4, legends=[l.to_rdkit().GetProp("_Name") for l in ligands])

Number of conformers  [176, 174, 166, 147, 176, 40, 286]

../../../_images/29ccaa505762d3993d971950a1ea630092949471d7cf62eb3412ed9f73ca4873.png

Now we generate conformers for each ligand. The argument n_confs is set to -1 so that the conformer generator creates and “ideal” number of conformers for each ligand.

for lig in ligands:
    if not lig.has_conformers:
        lig.add_hydrogens()
        lig.generate_conformers(n_confs=-1)

for ii, lig in enumerate(ligands):
    print(f"Ligand {ii} has {lig.n_conformers} conformers")

Ligand 0 has 176 conformers
Ligand 1 has 174 conformers
Ligand 2 has 166 conformers
Ligand 3 has 147 conformers
Ligand 4 has 176 conformers
Ligand 5 has 40 conformers
Ligand 6 has 286 conformers

# Save conformers to file
if not os.path.isfile(conf_file):
    writer = Chem.SDWriter(conf_file)
    for lig in ligands:
        lig_rdkit = lig.to_rdkit()
        for conf in range(lig.n_conformers):
            writer.write(lig_rdkit, confId=conf)
    writer.close()

Analyizing chem feats

def chem_feats_df():
    n_feats = defaultdict(list)
    names = []
    for lig in ligands:
        names.append(lig.to_rdkit().GetProp("_Name"))
        for feat_type, ind_list in lig.feat_ind.items():
            n_feats[feat_type].append(len(ind_list))
    
    df = pd.DataFrame.from_dict(n_feats)
    df["ligand"] = names
    df.set_index("ligand", drop=True, inplace=True)
    df["total"] = df.sum(axis=1)
    return df

for lig in ligands:
    lig.get_chem_feats(0)

chem_feats_df()

	aromatic ring	hydrophobicity	negative charge	positive charge	hb acceptor	hb donor	total
ligand
1C4V	3	2	0	1	3	3	12
1TOM	1	3	0	2	4	3	13
1D6W	3	2	0	0	6	3	14
1D91	2	2	0	1	6	2	13
1DWD	3	2	0	1	5	4	15
1D4P	3	0	0	1	2	3	9
1FPC	2	4	0	2	3	4	15

drawing = oph.draw_ligands_chem_feats(ligands, lig_size=(300, 280))
SVG(drawing.GetDrawingText())

../../../_images/9da6f6a5ac8d916538f2c70c1d1bd8f774eaed8d0fd2763126ce416c73c5e9cc.svg

Finding common pharmacophores

pharma_file = "../data/thrombin_pharmacophores.mol2"

import time

start = time.time()

n_points = [3, 4, 5, 6]
min_actives = [4, 5, 6]

if not os.path.isfile(pharma_file):

    pharmacophores = []

    for ii in range(len(n_points)):
        for jj in range(len(min_actives)):        
            pnts = n_points[ii]
            actives = min_actives[jj]

            pharma = oph.LigandBasedPharmacophore(ligands)
            pharma.extract(
                    n_points=pnts, min_actives=actives, max_pharmacophores=10
            )

            print(f"Num points: {pnts}. Min actives: {actives}. Found {len(pharma)} pharmacophores")
            if len(pharma) > 0:
                for p in pharma.pharmacophores:
                    p.props["min_actives"] = actives
                pharmacophores.extend(pharma.pharmacophores)

else:
    pharmacophores = oph.pharmacophore_reader.read_mol2(pharma_file)
    
print(f"Found {len(pharmacophores)} pharmacopohores")

Found 63 pharmacopohores

end = time.time() - start
print(f"{end} seconds")

0.22133302688598633 seconds

# Save pharmacophores to file
if not os.path.isfile(pharma_file):
    oph.pharmacophore_writer.save_mol2(pharmacophores, pharma_file)

def pharmacophore_df():
    
    rows = {}
    ii = 0
    
    for pharma in pharmacophores: 
        feat_str = ""
        # Only add the top scoring pharmacophore
        for point in pharma:
            feat_str += point.short_name
            
        n_pnts = len(pharma)
        n_actives = int(pharma.props["min_actives"])
        rows[ii] = [n_pnts, n_actives, feat_str, pharma.ref_mol, pharma.score]
        ii += 1
    
    df = pd.DataFrame.from_dict(
        rows, 
        orient="index",
        columns=["Number of points", "Number of actives", "Features", "Reference ligand", "Score"]
    )
    return df

df = pharmacophore_df()
df

	Number of points	Number of actives	Features	Reference ligand	Score
0	3	4	HPR	0	0.947654
1	3	4	ADR	2	0.957363
2	3	4	AHP	1	0.959299
3	3	4	ADH	3	0.948446
4	3	4	AHR	1	0.969500
...	...	...	...	...	...
58	5	4	AADDP	0	0.301428
59	5	4	AADHP	6	0.248704
60	5	4	AAADP	4	0.202498
61	5	5	AADHR	6	0.241077
62	5	5	AADHP	4	0.139471

63 rows × 5 columns

indices = []

for pnts in n_points:
        for actives in min_actives:        
            filter_ = (df["Number of points"] == pnts) & (df["Number of actives"] == actives)
            scores = df[filter_]["Score"]
            if not scores.empty:
                indices.append(scores.idxmax())

df = df.iloc[indices]
df.set_index("Number of points", drop=True, inplace=True)
df

	Number of actives	Features	Reference ligand	Score
Number of points
3	4	AHR	1	0.969500
3	5	AHR	1	0.969500
3	6	AHR	1	0.969500
4	4	ADHP	3	0.830411
4	5	ADPR	3	0.807592
4	6	ADPR	3	0.671040
5	4	ADHPR	3	0.766332
5	5	AADHR	6	0.241077

# Get the highest scoring pharmacophores and the one with most features
top_score = max((p for p in pharmacophores), key=lambda x: x.score)
most_feats = max((p for p in pharmacophores), key=lambda x: len(x))

Visualizing the pharmacophores

viewer_1 = oph.Viewer()
ligand = ligands[top_score.ref_mol]
viewer_1.add_components([top_score, ligand])

print(f"Score: {top_score.score}")
print(f"Num points: {len(top_score)}")

Score: 0.9695001830941523
Num points: 3

viewer_1.show(struct=top_score.ref_struct)

viewer_2 = oph.Viewer()
ligand = ligands[most_feats.ref_mol]
viewer_2.add_components([most_feats, ligand])

print(f"Score: {most_feats.score}")
print(f"Num points: {len(most_feats)}")

Score: 0.4623265470544659
Num points: 5

viewer_2.show(struct=most_feats.ref_struct)