Source code for openpharmacophore.io.mol_files

from rdkit import Chem


# MOL2 files
def _mol2(file_):
    molecules = []
    doc = [line for line in file_.readlines()]

    start = [index for (index, p) in enumerate(doc) if '@<TRIPOS>MOLECULE' in p]
    finish = [index - 1 for (index, p) in enumerate(doc) if '@<TRIPOS>MOLECULE' in p]
    finish.append(len(doc))

    interval = list(zip(start, finish[1:]))

    for ii in interval:
        block = ",".join(doc[ii[0]: ii[1]]).replace(',', '')
        mol = Chem.MolFromMol2Block(block)
        molecules.append(mol)

    return molecules


[docs]def read_mol2(file_name): """ Load molecules from a mol2 file. Parameters ---------- file_name : str Name of the file containing the ligands Returns --------- molecules : list[rdkit.Mol] """ with open(file_name, 'r') as fp: molecules = _mol2(fp) return molecules
def _iter_mol2(file_): line = file_.readline() while line: if not line: break if not line.strip(): continue # Skip comments if line.startswith("#"): continue if '@<TRIPOS>MOLECULE' not in line: mol2_block = "@<TRIPOS>MOLECULE\n" mol2_block += line else: mol2_block = line while True: line = file_.readline() if '@<TRIPOS>MOLECULE' in line or len(line) == 0: break mol2_block += line yield Chem.MolFromMol2Block(mol2_block) line = file_.readline()
[docs]def mol2_supplier(file_name): """ A molecule generator for mol2 files. Parameters ---------- file_name : str A file object Yields ------ mol : rdkit.Mol A molecule. """ with open(file_name) as fp: return _iter_mol2(fp)
# SDF files def _sdf(file_, supplier, remove_hs=False): supp = supplier(file_, removeHs=remove_hs) molecules = {} for mol in supp: name = mol.GetProp("_Name") try: molecules[name].AddConformer(mol.GetConformer(), assignId=True) except KeyError: molecules[name] = mol return list(molecules.values())
[docs]def read_sdf(file_path, remove_hs=False): """ Load an sdf file with molecules that may contain multiple conformers. Parameters ---------- file_path : str remove_hs : bool, default=False Whether to remove the hydrogens from the molecules. Returns ------- list : [rdkit.Chem.Mol] """ return _sdf(file_path, Chem.SDMolSupplier, remove_hs)
def _iter_sdf(file_, supplier, remove_hs=False): """ Iterate an sdf file Parameters ---------- file_ : str or FileIO A path to a file or an stream io such as a BytesIO supplier : Callable remove_hs : bool Returns ------- Iterable An iterable of molecules. """ return supplier(file_, removeHs=remove_hs) # SMI files def _parse_smi(line, sep): fragments = line.split(sep) mol = Chem.MolFromSmiles(fragments[0]) if len(fragments) > 1: mol.SetProp("_Name", fragments[1]) return mol def _smi(file_, sep=None, header=False): start = 1 if header else 0 lines = file_.readlines() molecules = [] for ii in range(start, len(lines)): molecules.append(_parse_smi(lines[ii], sep)) return molecules def read_smi(file_name, sep=None, header=False): """ Read molecules from a smi file. Parameters ---------- file_name : str Name or path to the file sep : str Separator between text in a line of the file. Default behavior is to split between whitespace. header : bool, default=False Whether the file contains a header """ with open(file_name) as fp: molecules = _smi(fp, sep, header) return molecules def _iter_smi(file_, sep=None, header=False): """ Iterate a smi file. Parameters ---------- file_ : FileIO A file like object. sep : str header : bool Yields ------ rdkit.Mol """ if header: file_.readline() line = file_.readline() while line: yield _parse_smi(line, sep) line = file_.readline()