Source code for molsysmt.build.define_new_chain

from molsysmt._private.digestion import digest
from molsysmt._private.variables import is_all
import numpy as np

[docs] @digest() def define_new_chain(molecular_system, selection='all', chain_id=None, chain_name=None, syntax='MolSysMT', skip_digestion=False): """ Defining a new chain from selected atoms. This function creates a new chain by assigning a unique `chain_id` and/or `chain_name` to a selection of atoms in a molecular system. The selected atoms are grouped into a new chain, which is appended to the existing list of chains in the system. This does not alter or remove any of the existing chains. Parameters ---------- molecular_system : molecular system Molecular system in any of :ref:`the supported forms <Introduction_Forms>` to be modified. selection : index, tuple, list, numpy.ndarray or str, default 'all' Selection of atoms to assign to the new chain. The selection can be given as a list, tuple, or NumPy array of atom indices (0-based integers), or as a string following one of the :ref:`supported selection syntaxes <Introduction_Selection>`. chain_id : str or int, optional Identifier of the new chain (e.g., 'A', 'B', 'C', or numeric value). If not provided, a unique `chain_id` will be automatically generated. chain_name : str, optional Descriptive name for the new chain. If not provided, the name will be left empty or automatically assigned. syntax : str, default 'MolSysMT' :ref:`Supported syntax <Introduction_Selection>` used in the `selection` argument (in case it is a string). Returns ------- molecular system A modified molecular system with the selected atoms grouped into a newly defined chain. Raises ------ NotSupportedFormError Raised if the molecular system has a non-supported form. ArgumentError Raised if any of the input values is invalid or incompatible. Notes ----- - This function is useful when merging multiple molecules, redefining topology, or recovering proper chain segmentation after structural edits. - Chains are identified by their `chain_id` (unique identifier) and `chain_name` (optional descriptive label). See Also -------- :func:`molsysmt.basic.set` Setting attribute values in a molecular system. :func:`molsysmt.basic.info` Displaying a summary of elements and chain structure. :func:`molsysmt.build.merge` Merging multiple molecular systems, often requiring reassignment of chains. Examples -------- >>> import molsysmt as msm >>> molsys = msm.convert('1TCD') >>> msm.build.define_new_chain(molsys, selection='molecule_type=="water"', chain_name='C') >>> msm.get(molsys, element='chain', chain_name=True) ['A', 'B', 'C'] .. admonition:: User guide Follow this link for a tutorial on how to work with this function: :ref:`User Guide > Tools > Build > Define new chain <Tutorial_Define_new_chain>` .. versionadded:: 1.0.0 """ from molsysmt.basic import get, set, select, get_form from molsysmt.element.chain import all_chain_names from molsysmt._private.atom_indices import complementary_atom_indices if is_all(selection): if chain_id is None: chain_id = 0 if chain_name is None: chain_name = 'A' set(molecular_system, element='atom', selection='all', chain_index=0, skip_digestion=True) set(molecular_system, element='chain', selection='all', chain_id=[chain_id], chain_name=[chain_name], skip_digestion=True) else: atom_indices = select(molecular_system, selection=selection) rest_atom_indices = complementary_atom_indices(molecular_system, atom_indices) former_chain_ids, former_chain_names = get(molecular_system, selection=rest_atom_indices, chain_id=True, chain_name=True) aux_chain_ids = sorted(np.unique(former_chain_ids).tolist()) aux_chain_names = sorted(np.unique(former_chain_names).tolist()) if chain_id is None: if isinstance(aux_chain_ids[0], str): for ii in all_chain_names: if ii not in aux_chain_ids: chain_id = ii break if chain_id is None: raise ValueError(f'MolSysMT run out of chain names') else: for ii in range(len(aux_chain_ids)): if ii not in aux_chain_ids: chain_id = ii break if chain_id is None: chain_id = len(aux_chain_ids) else: if chain_id in aux_chain_ids: raise ValueError(f'There is already a chain with chain_id={chain_id}.') if chain_name is None: for ii in all_chain_names: if ii not in aux_chain_names: chain_name = ii break if chain_name is None: raise ValueError(f'MolSysMT run out of chain names') else: if chain_name in aux_chain_names: raise ValueError(f'There is already a chain with chain_name={chain_name}.') all_atom_indices = np.array(atom_indices+rest_atom_indices) all_chain_ids = np.array([chain_id for ii in atom_indices]+former_chain_ids) all_chain_names = np.array([chain_name for ii in atom_indices]+former_chain_names) sorted_indices = np.argsort(all_atom_indices) all_atom_indices = all_atom_indices[sorted_indices] all_chain_ids = all_chain_ids[sorted_indices] all_chain_names = all_chain_names[sorted_indices] chain_index=-1 chain_ids_done=[] new_chain_indices=[] new_chain_ids=[] new_chain_names=[] aux_dict={} for ii,jj,kk in zip(all_atom_indices, all_chain_ids, all_chain_names): if jj not in chain_ids_done: chain_index+=1 aux_dict[jj]=chain_index chain_ids_done.append(jj) new_chain_indices.append(chain_index) new_chain_ids.append(jj) new_chain_names.append(kk) else: new_chain_indices.append(aux_dict[jj]) n_chains=chain_index+1 form_in = get_form(molecular_system) if form_in=='molsysmt.MolSys': molecular_system.topology.reset_chains(n_chains=n_chains) elif form_in=='molsysmt.Topology': molecular_system.reset_chains(n_chains=n_chains) set(molecular_system, element='atom', selection='all', chain_index=new_chain_indices, skip_digestion=True) set(molecular_system, element='chain', selection='all', chain_id=new_chain_ids, chain_name=new_chain_names, skip_digestion=True) if form_in=='molsysmt.MolSys': molecular_system.topology.rebuild_chains(redefine_indices=False, redefine_ids=False, redefine_types=True, redefine_names=False) elif form_in=='molsysmt.Topology': molecular_system.rebuild_chains(redefine_indices=False, redefine_ids=False, redefine_types=True, redefine_names=False) del new_chain_indices, new_chain_ids, new_chain_names del all_atom_indices, all_chain_ids, all_chain_names del atom_indices, rest_atom_indices pass