import numpy as np
import pandas as pd
from molsysmt._private.variables import is_all
from molsysmt._private.strings import get_parenthesis
from molsysmt.element import _plural_elements_to_singular, _element_index
from re import findall
from inspect import stack, getargvalues
[docs]
def select(molecular_system, selection='all', structure_indices='all'):
if isinstance(selection, str):
while selection_with_special_subsentences(selection):
sub_selection = selection_with_special_subsentences(selection)
sub_atom_indices = select(molecular_system, sub_selection, structure_indices)
selection = selection.replace(sub_selection, 'atom_index==@sub_atom_indices')
if _in_elements_of(selection):
atom_indices = select_in_elements_of(molecular_system, selection)
elif 'within' in selection:
atom_indices = select_within(molecular_system, selection, structure_indices)
elif 'bonded to' in selection:
atom_indices = select_bonded_to(molecular_system, selection)
else:
atom_indices = select_standard(molecular_system, selection)
return atom_indices
[docs]
def select_standard(item, selection):
from molsysmt.basic import convert, get_form
from molsysmt.config import selection_shortcuts
from molsysmt.form import _dict_modules
tmp_selection = selection
shortcuts = selection_shortcuts['MolSysMT']
for key in shortcuts:
if key in selection:
tmp_selection = tmp_selection.replace(key, shortcuts[key])
form_in = get_form(item)
if form_in == 'molsysmt.Topology':
tmp_item = item
else:
conversion_needs_missing_bonds=False
if isinstance(form_in, (list, tuple)):
for ii in form_in:
if (not _dict_modules[ii].bonds_are_explicit) and _dict_modules[ii].bonds_can_be_computed:
conversion_needs_missing_bonds=True
break
else:
if (not _dict_modules[form_in].bonds_are_explicit) and _dict_modules[form_in].bonds_can_be_computed:
conversion_needs_missing_bonds=True
if conversion_needs_missing_bonds:
from molsysmt.attribute.bonds_are_required_to_get_attribute import bond_dependent_attributes
bonds_required_by_selection = False
for attribute in bond_dependent_attributes:
if attribute in tmp_selection:
bonds_required_by_selection = True
break
tmp_item = convert(item, to_form='molsysmt.Topology', get_missing_bonds=bonds_required_by_selection,
skip_digestion=True)
else:
tmp_item = convert(item, to_form='molsysmt.Topology', skip_digestion=True)
if '@' in selection:
var_names = _var_names_in_selection(selection)
all_stack_frames = stack()
counter = -1
n_frames = len(all_stack_frames)
no_wrapper_stack_frames = [ii for ii in all_stack_frames if ii[3] != 'wrapper']
for aux_stack in no_wrapper_stack_frames:
args, args_paramname, kwargs_paramname, values = getargvalues(aux_stack.frame)
if 'selection' in args:
selection_input = values['selection']
aux_var_names = _var_names_in_selection(selection_input)
if all([ii in aux_var_names for ii in var_names]):
counter += 1
else:
break
else:
break
# print(counter)
# for ii in range(len(no_wrapper_stack_frames)):
# aaa = no_wrapper_stack_frames[ii]
# print(ii, aaa[3])
# if 'selection' in getargvalues(aaa.frame)[3]:
# print('#####', getargvalues(aaa.frame)[3]['selection'])
# print('>>>', var_names[0] in aaa[0].f_locals)
# print('>>>', var_names[0] in aaa[0].f_globals)
for var_name in var_names:
if var_name in no_wrapper_stack_frames[counter][0].f_locals:
var_value = no_wrapper_stack_frames[counter][0].f_locals[var_name]
elif var_name in no_wrapper_stack_frames[counter][0].f_globals:
var_value = no_wrapper_stack_frames[counter][0].f_globals[var_name]
elif var_name in no_wrapper_stack_frames[counter+1][0].f_locals:
var_value = no_wrapper_stack_frames[counter+1][0].f_locals[var_name]
elif var_name in no_wrapper_stack_frames[counter+1][0].f_globals:
var_value = no_wrapper_stack_frames[counter+1][0].f_globals[var_name]
else:
raise ValueError("The variable", var_name, "was not found by the selection tool.")
tmp_selection = tmp_selection.replace('@'+var_name, '@auxiliar_variable_'+var_name)
if type(var_value) in [np.ndarray]:
var_value = list(var_value)
locals()['auxiliar_variable_'+var_name]=var_value
if is_all(tmp_selection):
output = np.array(np.arange(tmp_item.atoms.shape[0]))
else:
atom_columns = []
group_columns = []
component_columns = []
molecule_columns = []
entity_columns = []
chain_columns = []
for column in tmp_item.atoms.keys():
if column in tmp_selection:
atom_columns.append(column)
for column in tmp_item.groups.keys():
if column in tmp_selection:
group_columns.append(column)
for column in tmp_item.components.keys():
if column in tmp_selection:
component_columns.append(column)
for column in tmp_item.molecules.keys():
if column in tmp_selection:
molecule_columns.append(column)
for column in tmp_item.entities.keys():
if column in tmp_selection:
entity_columns.append(column)
for column in tmp_item.chains.keys():
if column in tmp_selection:
chain_columns.append(column)
if len(entity_columns):
if 'entity_index' not in molecule_columns:
molecule_columns.append('entity_index')
if len(molecule_columns):
if 'molecule_index' not in group_columns:
group_columns.append('molecule_index')
if len(group_columns):
if 'group_index' not in atom_columns:
atom_columns.append('group_index')
if len(component_columns):
if 'component_index' not in atom_columns:
atom_columns.append('component_index')
if len(chain_columns):
if 'chain_index' not in atom_columns:
atom_columns.append('chain_index')
aux_df = None
if len(entity_columns):
aux_df = pd.merge(tmp_item.molecules[molecule_columns], tmp_item.entities[entity_columns],
left_on='entity_index', right_index=True)
if len(molecule_columns):
if aux_df is None:
aux_df = pd.merge(tmp_item.groups[group_columns], tmp_item.molecules[molecule_columns],
left_on='molecule_index', right_index=True)
else:
aux_df = pd.merge(tmp_item.groups[group_columns], aux_df,
left_on='molecule_index', right_index=True)
if len(group_columns):
if aux_df is None:
aux_df = pd.merge(tmp_item.atoms[atom_columns], tmp_item.groups[group_columns],
left_on='group_index', right_index=True)
else:
aux_df = pd.merge(tmp_item.atoms[atom_columns], aux_df,
left_on='group_index', right_index=True)
else:
aux_df = tmp_item.atoms[atom_columns]
if len(component_columns):
aux_df = pd.merge(aux_df, tmp_item.components[component_columns],
left_on='component_index', right_index=True)
if len(chain_columns):
aux_df = pd.merge(aux_df, tmp_item.chains[chain_columns],
left_on='chain_index', right_index=True)
tmp_selection = tmp_selection.replace('atom_index','index')
output = aux_df.query(tmp_selection, engine='python').index.to_list()
del aux_df
return output
[docs]
def select_within(molecular_system, selection, structure_indices):
from molsysmt.structure.get_contacts import get_contacts
not_within = False
if "not within " in selection:
selection_1, tmp_selection = selection.split(" not within ")
not_within = True
else:
selection_1, tmp_selection = selection.split(" within ")
pbc = False
if "with pbc " in tmp_selection:
pbc = True
tmp_selection = tmp_selection.replace("with pbc ", "")
elif "without pbc " in tmp_selection:
tmp_selection = tmp_selection.replace("without pbc ", "")
threshold, selection_2 = tmp_selection.split(" of ")
atom_indices_1 = select(molecular_system, selection_1)
atom_indices_2 = select(molecular_system, selection_2)
cmap = get_contacts(molecular_system, selection=atom_indices_1, selection_2=atom_indices_2,
structure_indices=structure_indices, threshold=threshold, pbc=pbc)
if not_within:
output = np.array(atom_indices_1)[np.where(cmap.all(axis=2)[0] == False)[0]].tolist()
else:
output = np.array(atom_indices_1)[np.where(cmap.any(axis=2)[0] == True)[0]].tolist()
return output
[docs]
def select_bonded_to(molecular_system, selection):
from molsysmt.basic import get
not_bonded = False
if "not bonded to" in selection:
selection_1, selection_2 = selection.split(" not bonded to")
not_bonded = True
else:
selection_1, selection_2 = selection.split(" bonded to")
atom_indices_1 = select(molecular_system, selection=selection_1)
atom_indices_2 = get(molecular_system, element='atom', selection=selection_2, bonded_atoms=True)
atom_indices_2 = np.unique(np.concatenate(atom_indices_2).ravel())
if not_bonded:
output = np.setdiff1d(atom_indices_1, atom_indices_2, assume_unique=True).tolist()
else:
output = np.intersect1d(atom_indices_1, atom_indices_2, assume_unique=True).tolist()
return output
_aux_dict_in_elements_in = {
'groups': ['components',
'molecules',
'chains',
'entities'],
'components': ['molecules',
'chains',
'entities'],
'molecules': ['chains',
'entities'],
'chains': ['molecules',
'entities'],
'entities': [],
}
#_aux_dict_in_elements_in = {
# 'entities': [],
# 'chains': ['molecules',
# 'entities'],
# 'molecules': ['chains',
# 'entities'],
# 'components': ['molecules',
# 'chains',
# 'entities'],
# 'groups': ['components',
# 'molecules',
# 'chains',
# 'entities'],
# }
[docs]
def select_in_elements_of(molecular_system, selection):
from molsysmt.basic import get
for elements_1, list_elements_2 in _aux_dict_in_elements_in.items():
if 'in '+elements_1 in selection:
before, after = selection.split('in '+elements_1)
before = before.strip()
after = after.strip()
if _in_elements_of(after):
for elements_2 in list_elements_2:
if 'in '+elements_2 in after:
element_1 = _plural_elements_to_singular[elements_1]
element_2 = _plural_elements_to_singular[elements_2]
bbefore, aafter = after.split('in '+elements_2)
bbefore = bbefore.strip()
aafter = aafter.strip()
bbefore = bbefore.replace('of ', '')
aafter = aafter.replace('of ', '')
if bbefore == '':
bbefore = 'all'
if aafter == '':
aafter = 'all'
kwarg = {_element_index[element_1]: True}
pre_output = get(molecular_system, element=element_2, selection=aafter, skip_digestion=True,
**kwarg)
if is_all(bbefore):
output_2 = pre_output
else:
mask = get(molecular_system, element=element_1, selection=bbefore, skip_digestion=True,
**kwarg)
output_2 = [np.intersect1d(ii, mask).tolist() for ii in pre_output]
output_2 = [ii for ii in output_2 if len(ii) > 0]
output = []
aux_output_2 = np.concatenate(output_2).tolist()
pre_output = get(molecular_system, element=element_1, selection=aux_output_2, skip_digestion=True,
atom_index=True)
aux_dict = {ii:jj for ii,jj in zip(aux_output_2, pre_output)}
if before == '':
before = 'all'
if is_all(before):
for aux_after in output_2:
pre_output = [aux_dict[ii] for ii in aux_after]
aux_output = [ii for ii in pre_output if len(ii) > 0]
output.append(aux_output)
else:
mask = select(molecular_system, selection=before)
for aux_after in output_2:
pre_output = [aux_dict[ii] for ii in aux_after]
aux_output = [np.intersect1d(ii, mask).tolist() for ii in pre_output]
aux_output = [ii for ii in aux_output if len(ii) > 0]
output.append(aux_output)
return output
else:
element_1 = _plural_elements_to_singular[elements_1]
after = after.replace('of ', '')
if before == '':
before = 'all'
if after == '':
after = 'all'
pre_output = get(molecular_system, element=element_1, selection=after, skip_digestion=True,
atom_index=True)
mask = select(molecular_system, selection=before)
output = [np.intersect1d(ii, mask).tolist() for ii in pre_output]
output = [ii for ii in output if len(ii) > 0]
return output
raise NotImplementedError
#def select_in_groups_of(molecular_system, selection):
#
# from molsysmt.basic import get
#
# before, after = selection.split('in groups of')
# before = before.strip()
# after = after.strip()
#
# if before == '' or is_all(before):
#
# output = get(molecular_system, element='group', selection=after, atom_index=True)
# output = [ii for ii in output]
#
# else:
#
# pre_output = get(molecular_system, element='group', selection=after, atom_index=True)
# mask = select(molecular_system, selection=before)
# output = [np.intersect1d(ii, mask) for ii in pre_output]
# output = [ii for ii in output if ii.shape[0] > 0]
#
# return output
[docs]
def selection_with_special_subsentences(selection):
output = None
parenthesis = get_parenthesis(selection)
for subselection in parenthesis:
if ('within ' in subselection) or ('bonded to ' in subselection):
output = subselection
break
return output
def _var_names_in_selection(selection):
var_names = []
if isinstance(selection, str):
var_names = [ii[1:] for ii in findall(r"@[\w']+", selection)]
elif isinstance(selection, (tuple, list)):
for ii in selection:
var_names += _var_names_in_selection(ii)
return var_names
def _in_elements_of(selection):
output = False
if "in groups" in selection:
output = True
elif "in components" in selection:
output = True
elif "in chains" in selection:
output = True
elif "in molecules" in selection:
output = True
elif "in entities" in selection:
output = True
return output