Source code for molsysmt.basic.selector.molsysmt

import numpy as np
import pandas as pd
from molsysmt._private.variables import is_all
from molsysmt._private.strings import get_parenthesis
from molsysmt.element import _plural_elements_to_singular, _element_index
from re import findall
from inspect import stack, getargvalues



[docs]
def select(molecular_system, selection='all', structure_indices='all'):

    if isinstance(selection, str):

        while selection_with_special_subsentences(selection):

            sub_selection = selection_with_special_subsentences(selection)
            sub_atom_indices = select(molecular_system, sub_selection, structure_indices)
            selection = selection.replace(sub_selection, 'atom_index==@sub_atom_indices')

        if _in_elements_of(selection):

            atom_indices = select_in_elements_of(molecular_system, selection)

        elif 'within' in selection:

            atom_indices = select_within(molecular_system, selection, structure_indices)

        elif 'bonded to' in selection:

            atom_indices = select_bonded_to(molecular_system, selection)

        else:

            atom_indices = select_standard(molecular_system, selection)

    return atom_indices




[docs]
def select_standard(item, selection):


    from molsysmt.basic import convert, get_form
    from molsysmt.config import selection_shortcuts
    from molsysmt.form import _dict_modules

    tmp_selection = selection

    shortcuts = selection_shortcuts['MolSysMT']

    for key in shortcuts:
        if key in selection:
            tmp_selection = tmp_selection.replace(key, shortcuts[key])



    form_in = get_form(item)

    if form_in == 'molsysmt.Topology':
        tmp_item = item
    else:

        conversion_needs_missing_bonds=False

        if isinstance(form_in, (list, tuple)):
            for ii in form_in:
                if (not _dict_modules[ii].bonds_are_explicit) and _dict_modules[ii].bonds_can_be_computed:
                    conversion_needs_missing_bonds=True
                    break
        else: 
            if (not _dict_modules[form_in].bonds_are_explicit) and _dict_modules[form_in].bonds_can_be_computed:
                conversion_needs_missing_bonds=True

        if conversion_needs_missing_bonds:

            from molsysmt.attribute.bonds_are_required_to_get_attribute import bond_dependent_attributes

            bonds_required_by_selection = False
            for attribute in bond_dependent_attributes:
                if attribute in tmp_selection:
                    bonds_required_by_selection = True
                    break

            tmp_item = convert(item, to_form='molsysmt.Topology', get_missing_bonds=bonds_required_by_selection,
                               skip_digestion=True)

        else:

            tmp_item = convert(item, to_form='molsysmt.Topology', skip_digestion=True)

    if '@' in selection:

        var_names = _var_names_in_selection(selection)

        all_stack_frames = stack()

        counter = -1
        n_frames = len(all_stack_frames)

        no_wrapper_stack_frames = [ii for ii in all_stack_frames if ii[3] != 'wrapper']

        for aux_stack in no_wrapper_stack_frames:
            args, args_paramname, kwargs_paramname, values = getargvalues(aux_stack.frame)
            if 'selection' in args:
                selection_input = values['selection']
                aux_var_names = _var_names_in_selection(selection_input)
                if all([ii in aux_var_names for ii in var_names]):
                    counter += 1
                else:
                    break
            else:
                break

        # print(counter)
        # for ii in range(len(no_wrapper_stack_frames)):
        #     aaa = no_wrapper_stack_frames[ii]
        #     print(ii, aaa[3])
        #     if 'selection' in getargvalues(aaa.frame)[3]:
        #         print('#####', getargvalues(aaa.frame)[3]['selection'])
        #     print('>>>', var_names[0] in aaa[0].f_locals)
        #     print('>>>', var_names[0] in aaa[0].f_globals)

        for var_name in var_names:
            if var_name in no_wrapper_stack_frames[counter][0].f_locals:
                var_value = no_wrapper_stack_frames[counter][0].f_locals[var_name]
            elif var_name in no_wrapper_stack_frames[counter][0].f_globals:
                var_value = no_wrapper_stack_frames[counter][0].f_globals[var_name]
            elif var_name in no_wrapper_stack_frames[counter+1][0].f_locals:
                var_value = no_wrapper_stack_frames[counter+1][0].f_locals[var_name]
            elif var_name in no_wrapper_stack_frames[counter+1][0].f_globals:
                var_value = no_wrapper_stack_frames[counter+1][0].f_globals[var_name]
            else:
                raise ValueError("The variable", var_name, "was not found by the selection tool.")
            tmp_selection = tmp_selection.replace('@'+var_name, '@auxiliar_variable_'+var_name)
            if type(var_value) in [np.ndarray]:
                var_value = list(var_value)
            locals()['auxiliar_variable_'+var_name]=var_value

    if is_all(tmp_selection):

        output = np.array(np.arange(tmp_item.atoms.shape[0]))

    else:

        atom_columns = []
        group_columns = []
        component_columns = []
        molecule_columns = []
        entity_columns = []
        chain_columns = []

        for column in tmp_item.atoms.keys():
            if column in tmp_selection:
                atom_columns.append(column)

        for column in tmp_item.groups.keys():
            if column in tmp_selection:
                group_columns.append(column)

        for column in tmp_item.components.keys():
            if column in tmp_selection:
                component_columns.append(column)

        for column in tmp_item.molecules.keys():
            if column in tmp_selection:
                molecule_columns.append(column)

        for column in tmp_item.entities.keys():
            if column in tmp_selection:
                entity_columns.append(column)

        for column in tmp_item.chains.keys():
            if column in tmp_selection:
                chain_columns.append(column)

        if len(entity_columns):
            if 'entity_index' not in molecule_columns:
                molecule_columns.append('entity_index')

        if len(molecule_columns):
            if 'molecule_index' not in group_columns:
                group_columns.append('molecule_index')

        if len(group_columns):
            if 'group_index' not in atom_columns:
                atom_columns.append('group_index')

        if len(component_columns):
            if 'component_index' not in atom_columns:
                atom_columns.append('component_index')

        if len(chain_columns):
            if 'chain_index' not in atom_columns:
                atom_columns.append('chain_index')

        aux_df = None

        if len(entity_columns):

            aux_df = pd.merge(tmp_item.molecules[molecule_columns], tmp_item.entities[entity_columns],
                              left_on='entity_index', right_index=True)

        if len(molecule_columns):

            if aux_df is None:

                aux_df = pd.merge(tmp_item.groups[group_columns], tmp_item.molecules[molecule_columns],
                                  left_on='molecule_index', right_index=True)

            else:

                aux_df = pd.merge(tmp_item.groups[group_columns], aux_df,
                                  left_on='molecule_index', right_index=True)

        if len(group_columns):

            if aux_df is None:

                aux_df = pd.merge(tmp_item.atoms[atom_columns], tmp_item.groups[group_columns],
                                  left_on='group_index', right_index=True)

            else:

                aux_df = pd.merge(tmp_item.atoms[atom_columns], aux_df,
                                  left_on='group_index', right_index=True)

        else:

            aux_df = tmp_item.atoms[atom_columns]

        if len(component_columns):

            aux_df = pd.merge(aux_df, tmp_item.components[component_columns],
                              left_on='component_index', right_index=True)

        if len(chain_columns):

            aux_df = pd.merge(aux_df, tmp_item.chains[chain_columns],
                              left_on='chain_index', right_index=True)

        tmp_selection = tmp_selection.replace('atom_index','index')
        output = aux_df.query(tmp_selection, engine='python').index.to_list()

        del aux_df

    return output




[docs]
def select_within(molecular_system, selection, structure_indices):

    from molsysmt.structure.get_contacts import get_contacts

    not_within = False

    if "not within " in selection:
        selection_1, tmp_selection = selection.split(" not within ")
        not_within = True
    else:
        selection_1, tmp_selection = selection.split(" within ")

    pbc = False

    if "with pbc " in tmp_selection:
        pbc = True
        tmp_selection = tmp_selection.replace("with pbc ", "")
    elif "without pbc " in tmp_selection:
        tmp_selection = tmp_selection.replace("without pbc ", "")

    threshold, selection_2 = tmp_selection.split(" of ")

    atom_indices_1 = select(molecular_system, selection_1)
    atom_indices_2 = select(molecular_system, selection_2)
    cmap = get_contacts(molecular_system, selection=atom_indices_1, selection_2=atom_indices_2,
                        structure_indices=structure_indices, threshold=threshold, pbc=pbc)

    if not_within:
        output = np.array(atom_indices_1)[np.where(cmap.all(axis=2)[0] == False)[0]].tolist()
    else:
        output = np.array(atom_indices_1)[np.where(cmap.any(axis=2)[0] == True)[0]].tolist()

    return output




[docs]
def select_bonded_to(molecular_system, selection):

    from molsysmt.basic import get

    not_bonded = False

    if "not bonded to" in selection:
        selection_1, selection_2 = selection.split(" not bonded to")
        not_bonded = True
    else:
        selection_1, selection_2 = selection.split(" bonded to")

    atom_indices_1 = select(molecular_system, selection=selection_1)
    atom_indices_2 = get(molecular_system, element='atom', selection=selection_2, bonded_atoms=True)
    atom_indices_2 = np.unique(np.concatenate(atom_indices_2).ravel())

    if not_bonded:
        output = np.setdiff1d(atom_indices_1, atom_indices_2, assume_unique=True).tolist()
    else:
        output = np.intersect1d(atom_indices_1, atom_indices_2, assume_unique=True).tolist()

    return output



_aux_dict_in_elements_in = {
        'groups': ['components',
                   'molecules',
                   'chains',
                   'entities'],
        'components': ['molecules',
                       'chains',
                       'entities'],
        'molecules': ['chains',
                      'entities'],
        'chains': ['molecules',
                   'entities'],
        'entities': [],
            }

#_aux_dict_in_elements_in = {
#        'entities': [],
#        'chains': ['molecules',
#                   'entities'],
#        'molecules': ['chains',
#                      'entities'],
#        'components': ['molecules',
#                       'chains',
#                       'entities'],
#        'groups': ['components',
#                   'molecules',
#                   'chains',
#                   'entities'],
#            }


[docs]
def select_in_elements_of(molecular_system, selection):

    from molsysmt.basic import get

    for elements_1, list_elements_2 in _aux_dict_in_elements_in.items():

        if 'in '+elements_1 in selection:

            before, after = selection.split('in '+elements_1)

            before = before.strip()
            after = after.strip()

            if _in_elements_of(after):

                for elements_2 in list_elements_2:

                    if 'in '+elements_2 in after:

                        element_1 = _plural_elements_to_singular[elements_1]
                        element_2 = _plural_elements_to_singular[elements_2]

                        bbefore, aafter = after.split('in '+elements_2)

                        bbefore = bbefore.strip()
                        aafter = aafter.strip()

                        bbefore = bbefore.replace('of ', '')
                        aafter = aafter.replace('of ', '')

                        if bbefore == '':
                            bbefore = 'all'

                        if aafter == '':
                            aafter = 'all'

                        kwarg = {_element_index[element_1]: True}
                        pre_output = get(molecular_system, element=element_2, selection=aafter, skip_digestion=True,
                                         **kwarg)
                        if is_all(bbefore):
                            output_2 = pre_output
                        else:
                            mask = get(molecular_system, element=element_1, selection=bbefore, skip_digestion=True,
                                       **kwarg)
                            output_2 = [np.intersect1d(ii, mask).tolist() for ii in pre_output]
                        output_2 = [ii for ii in output_2 if len(ii) > 0]

                        output = []

                        aux_output_2 = np.concatenate(output_2).tolist()
                        pre_output = get(molecular_system, element=element_1, selection=aux_output_2, skip_digestion=True,
                                         atom_index=True)
                        aux_dict = {ii:jj for ii,jj in zip(aux_output_2, pre_output)}

                        if before == '':
                            before = 'all'

                        if is_all(before):
                            for aux_after in output_2:
                                pre_output = [aux_dict[ii] for ii in aux_after]
                                aux_output = [ii for ii in pre_output if len(ii) > 0]
                                output.append(aux_output)
                        else:
                            mask = select(molecular_system, selection=before)
                            for aux_after in output_2:
                                pre_output = [aux_dict[ii] for ii in aux_after]
                                aux_output = [np.intersect1d(ii, mask).tolist() for ii in pre_output]
                                aux_output = [ii for ii in aux_output if len(ii) > 0]
                                output.append(aux_output)

                        return output

            else:

                element_1 = _plural_elements_to_singular[elements_1]

                after = after.replace('of ', '')

                if before == '':
                    before = 'all'

                if after == '':
                    after = 'all'

                pre_output = get(molecular_system, element=element_1, selection=after, skip_digestion=True,
                                 atom_index=True)
                mask = select(molecular_system, selection=before)
                output = [np.intersect1d(ii, mask).tolist() for ii in pre_output]
                output = [ii for ii in output if len(ii) > 0]

                return output

    raise NotImplementedError



#def select_in_groups_of(molecular_system, selection):
#
#    from molsysmt.basic import get
#
#    before, after = selection.split('in groups of')
#    before = before.strip()
#    after = after.strip()
#
#    if before == '' or is_all(before):
#
#        output = get(molecular_system, element='group', selection=after, atom_index=True)
#        output = [ii for ii in output]
#
#    else:
#
#        pre_output = get(molecular_system, element='group', selection=after, atom_index=True)
#        mask = select(molecular_system, selection=before)
#        output = [np.intersect1d(ii, mask) for ii in pre_output]
#        output = [ii for ii in output if ii.shape[0] > 0]
#
#    return output



[docs]
def selection_with_special_subsentences(selection):

    output = None
    parenthesis = get_parenthesis(selection)
    for subselection in parenthesis:
        if ('within ' in subselection) or ('bonded to ' in subselection):
            output = subselection
            break

    return output



def _var_names_in_selection(selection):

    var_names = []

    if isinstance(selection, str):

        var_names = [ii[1:] for ii in findall(r"@[\w']+", selection)]

    elif isinstance(selection, (tuple, list)):

        for ii in selection:

            var_names += _var_names_in_selection(ii)

    return var_names


def _in_elements_of(selection):

    output = False

    if "in groups" in selection:
        output = True
    elif "in components" in selection:
        output = True
    elif "in chains" in selection:
        output = True
    elif "in molecules" in selection:
        output = True
    elif "in entities" in selection:
        output = True

    return output