%load_ext autoreload
%autoreload 2
import molsysmt as msm

Get covalent chains#

Lets load first of all a molecular system to work with in this section:

molecular_system = msm.systems['TcTIM']['1tcd.h5msm']
molecular_system = msm.convert(molecular_system)
msm.info(molecular_system)
form n_atoms n_groups n_components n_chains n_molecules n_entities n_waters n_proteins n_structures
molsysmt.MolSys 3983 662 167 4 167 2 165 2 1

MolSysMT includes a method to get all covalent chains found in the molecular system given by a sequence of atom names. To illustrate how the method molsysmt.covalent_chains works lets extract all segments of atoms C, N, CA an C covalently bound in this order (C-N-CA-C):

covalent_chains =msm.topology.get_covalent_chains(molecular_system, chain=['atom_name=="C"',
                                                                           'atom_name=="N"',
                                                                           'atom_name=="CA"',
                                                                           'atom_name=="C"'],
                                                  selection="component_index==0")
covalent_chains.shape
(247, 4)

The output is a numpy array 2-ranked where the dimension of the first axe or rank is the number of chains found in the system, and the second rank has dimension 4 (since it chain was chosen to have 4 atoms):

covalent_chains
array([[   2,    9,   10,   11],
       [  11,   16,   17,   18],
       [  18,   25,   26,   27],
       [  27,   32,   33,   34],
       [  34,   40,   41,   42],
       [  42,   45,   46,   47],
       [  47,   50,   51,   52],
       [  52,   55,   56,   57],
       [  57,   63,   64,   65],
       [  65,   77,   78,   79],
       [  79,   86,   87,   88],
       [  88,   92,   93,   94],
       [  94,  100,  101,  102],
       [ 102,  104,  105,  106],
       [ 106,  110,  111,  112],
       [ 112,  119,  120,  121],
       [ 121,  125,  126,  127],
       [ 127,  133,  134,  135],
       [ 135,  141,  142,  143],
       [ 143,  148,  149,  150],
       [ 150,  155,  156,  157],
       [ 157,  163,  164,  165],
       [ 165,  171,  172,  173],
       [ 173,  180,  181,  182],
       [ 182,  187,  188,  189],
       [ 189,  195,  196,  197],
       [ 197,  203,  204,  205],
       [ 205,  208,  209,  210],
       [ 210,  213,  214,  215],
       [ 215,  220,  221,  222],
       [ 222,  231,  232,  233],
       [ 233,  239,  240,  241],
       [ 241,  249,  250,  251],
       [ 251,  257,  258,  259],
       [ 259,  264,  265,  266],
       [ 266,  273,  274,  275],
       [ 275,  279,  280,  281],
       [ 281,  286,  287,  288],
       [ 288,  293,  294,  295],
       [ 295,  298,  299,  300],
       [ 300,  305,  306,  307],
       [ 307,  312,  313,  314],
       [ 314,  323,  324,  325],
       [ 325,  331,  332,  333],
       [ 333,  341,  342,  343],
       [ 343,  349,  350,  351],
       [ 351,  356,  357,  358],
       [ 358,  364,  365,  366],
       [ 366,  371,  372,  373],
       [ 373,  380,  381,  382],
       [ 382,  385,  386,  387],
       [ 387,  396,  397,  398],
       [ 398,  404,  405,  406],
       [ 406,  411,  412,  413],
       [ 413,  419,  420,  421],
       [ 421,  426,  427,  428],
       [ 428,  435,  436,  437],
       [ 437,  446,  447,  448],
       [ 448,  455,  456,  457],
       [ 457,  463,  464,  465],
       [ 465,  468,  469,  470],
       [ 470,  473,  474,  475],
       [ 475,  482,  483,  484],
       [ 484,  490,  491,  492],
       [ 492,  495,  496,  497],
       [ 497,  503,  504,  505],
       [ 505,  510,  511,  512],
       [ 512,  521,  522,  523],
       [ 523,  527,  528,  529],
       [ 529,  531,  532,  533],
       [ 533,  536,  537,  538],
       [ 538,  547,  548,  549],
       [ 549,  554,  555,  556],
       [ 556,  558,  559,  560],
       [ 560,  567,  568,  569],
       [ 569,  574,  575,  576],
       [ 576,  580,  581,  582],
       [ 582,  588,  589,  590],
       [ 590,  597,  598,  599],
       [ 599,  605,  606,  607],
       [ 607,  613,  614,  615],
       [ 615,  622,  623,  624],
       [ 624,  630,  631,  632],
       [ 632,  642,  643,  644],
       [ 644,  646,  647,  648],
       [ 648,  654,  655,  656],
       [ 656,  660,  661,  662],
       [ 662,  674,  675,  676],
       [ 676,  681,  682,  683],
       [ 683,  688,  689,  690],
       [ 690,  696,  697,  698],
       [ 698,  700,  701,  702],
       [ 702,  710,  711,  712],
       [ 712,  716,  717,  718],
       [ 718,  725,  726,  727],
       [ 727,  736,  737,  738],
       [ 738,  747,  748,  749],
       [ 749,  755,  756,  757],
       [ 757,  767,  768,  769],
       [ 769,  779,  780,  781],
       [ 781,  783,  784,  785],
       [ 785,  792,  793,  794],
       [ 794,  799,  800,  801],
       [ 801,  807,  808,  809],
       [ 809,  816,  817,  818],
       [ 818,  824,  825,  826],
       [ 826,  831,  832,  833],
       [ 833,  836,  837,  838],
       [ 838,  845,  846,  847],
       [ 847,  854,  855,  856],
       [ 856,  861,  862,  863],
       [ 863,  866,  867,  868],
       [ 868,  875,  876,  877],
       [ 877,  880,  881,  882],
       [ 882,  886,  887,  888],
       [ 888,  891,  892,  893],
       [ 893,  896,  897,  898],
       [ 898,  900,  901,  902],
       [ 902,  911,  912,  913],
       [ 913,  921,  922,  923],
       [ 923,  928,  929,  930],
       [ 930,  936,  937,  938],
       [ 938,  943,  944,  945],
       [ 945,  949,  950,  951],
       [ 951,  956,  957,  958],
       [ 958,  960,  961,  962],
       [ 962,  969,  970,  971],
       [ 971,  976,  977,  978],
       [ 978,  984,  985,  986],
       [ 986,  993,  994,  995],
       [ 995, 1002, 1003, 1004],
       [1004, 1013, 1014, 1015],
       [1015, 1022, 1023, 1024],
       [1024, 1027, 1028, 1029],
       [1029, 1031, 1032, 1033],
       [1033, 1042, 1043, 1044],
       [1044, 1049, 1050, 1051],
       [1051, 1054, 1055, 1056],
       [1056, 1059, 1060, 1061],
       [1061, 1066, 1067, 1068],
       [1068, 1073, 1074, 1075],
       [1075, 1081, 1082, 1083],
       [1083, 1088, 1089, 1090],
       [1090, 1097, 1098, 1099],
       [1099, 1105, 1106, 1107],
       [1107, 1110, 1111, 1112],
       [1112, 1115, 1116, 1117],
       [1117, 1122, 1123, 1124],
       [1124, 1127, 1128, 1129],
       [1129, 1136, 1137, 1138],
       [1138, 1145, 1146, 1147],
       [1147, 1153, 1154, 1155],
       [1155, 1159, 1160, 1161],
       [1161, 1168, 1169, 1170],
       [1170, 1177, 1178, 1179],
       [1179, 1182, 1183, 1184],
       [1184, 1196, 1197, 1198],
       [1198, 1202, 1203, 1204],
       [1204, 1213, 1214, 1215],
       [1215, 1220, 1221, 1222],
       [1222, 1227, 1228, 1229],
       [1229, 1235, 1236, 1237],
       [1237, 1240, 1241, 1242],
       [1242, 1252, 1253, 1254],
       [1254, 1261, 1262, 1263],
       [1263, 1268, 1269, 1270],
       [1270, 1275, 1276, 1277],
       [1277, 1289, 1290, 1291],
       [1291, 1294, 1295, 1296],
       [1296, 1302, 1303, 1304],
       [1304, 1306, 1307, 1308],
       [1308, 1313, 1314, 1315],
       [1315, 1317, 1318, 1319],
       [1319, 1326, 1327, 1328],
       [1328, 1333, 1334, 1335],
       [1335, 1338, 1339, 1340],
       [1340, 1345, 1346, 1347],
       [1347, 1352, 1353, 1354],
       [1354, 1361, 1362, 1363],
       [1363, 1370, 1371, 1372],
       [1372, 1375, 1376, 1377],
       [1377, 1384, 1385, 1386],
       [1386, 1393, 1394, 1395],
       [1395, 1400, 1401, 1402],
       [1402, 1410, 1411, 1412],
       [1412, 1419, 1420, 1421],
       [1421, 1427, 1428, 1429],
       [1429, 1435, 1436, 1437],
       [1437, 1446, 1447, 1448],
       [1448, 1457, 1458, 1459],
       [1459, 1471, 1472, 1473],
       [1473, 1478, 1479, 1480],
       [1480, 1489, 1490, 1491],
       [1491, 1495, 1496, 1497],
       [1497, 1504, 1505, 1506],
       [1506, 1512, 1513, 1514],
       [1514, 1516, 1517, 1518],
       [1518, 1523, 1524, 1525],
       [1525, 1531, 1532, 1533],
       [1533, 1539, 1540, 1541],
       [1541, 1544, 1545, 1546],
       [1546, 1549, 1550, 1551],
       [1551, 1558, 1559, 1560],
       [1560, 1566, 1567, 1568],
       [1568, 1577, 1578, 1579],
       [1579, 1585, 1586, 1587],
       [1587, 1593, 1594, 1595],
       [1595, 1605, 1606, 1607],
       [1607, 1609, 1610, 1611],
       [1611, 1613, 1614, 1615],
       [1615, 1619, 1620, 1621],
       [1621, 1626, 1627, 1628],
       [1628, 1633, 1634, 1635],
       [1635, 1638, 1639, 1640],
       [1640, 1647, 1648, 1649],
       [1649, 1655, 1656, 1657],
       [1657, 1660, 1661, 1662],
       [1662, 1671, 1672, 1673],
       [1673, 1678, 1679, 1680],
       [1680, 1686, 1687, 1688],
       [1688, 1698, 1699, 1700],
       [1700, 1707, 1708, 1709],
       [1709, 1715, 1716, 1717],
       [1717, 1726, 1727, 1728],
       [1728, 1734, 1735, 1736],
       [1736, 1742, 1743, 1744],
       [1744, 1750, 1751, 1752],
       [1752, 1754, 1755, 1756],
       [1756, 1765, 1766, 1767],
       [1767, 1773, 1774, 1775],
       [1775, 1780, 1781, 1782],
       [1782, 1784, 1785, 1786],
       [1786, 1788, 1789, 1790],
       [1790, 1793, 1794, 1795],
       [1795, 1799, 1800, 1801],
       [1801, 1807, 1808, 1809],
       [1809, 1816, 1817, 1818],
       [1818, 1823, 1824, 1825],
       [1825, 1832, 1833, 1834],
       [1834, 1843, 1844, 1845],
       [1845, 1850, 1851, 1852],
       [1852, 1859, 1860, 1861],
       [1861, 1867, 1868, 1869],
       [1869, 1875, 1876, 1877],
       [1877, 1884, 1885, 1886],
       [1886, 1889, 1890, 1891],
       [1891, 1896, 1897, 1898]])

Lets check that the name of the atoms in any of the obtained chains is correct:

msm.get(molecular_system, element='atom', selection=covalent_chains[0], name=True)
['C', 'N', 'CA', 'C']

The atom name specified at each place does not need to be unique, we can introduce variants at any position defining the covalent chain. Lets see for instance how to get all 4 atoms covalent chains where the first three atoms are C-N-CA, in this order, and the fourth atom can either be C or CB:

covalent_chains =msm.topology.get_covalent_chains(molecular_system,
                                                  chain=['atom_name=="C"', 'atom_name=="N"',
                                                         'atom_name=="CA"', 'atom_name==["C", "CB"]'],
                                                    selection="component_index==0")
covalent_chains
array([[   2,    9,   10,   11],
       [   2,    9,   10,   13],
       [  11,   16,   17,   18],
       ...,
       [1886, 1889, 1890, 1893],
       [1891, 1896, 1897, 1898],
       [1891, 1896, 1897, 1900]])
msm.get(molecular_system, element='atom', selection=covalent_chains[0], name=True)
['C', 'N', 'CA', 'C']
msm.get(molecular_system, element='atom', selection=covalent_chains[1], name=True)
['C', 'N', 'CA', 'CB']

The covalent chains defining the \(\phi\), \(\psi\), \(\omega\) and , \(\xi_1\) dihedral angles are obtained as follows:

# Covalent chains defining all phi dihedral angles in the molecular system
phi_chains = msm.topology.get_covalent_chains(molecular_system, chain=['atom_name=="C"', 'atom_name=="N"',
                                                          'atom_name=="CA"', 'atom_name=="C"'])
# Covalent chains defining all psi dihedral angles in the molecular system
psi_chains = msm.topology.get_covalent_chains(molecular_system, chain=['atom_name=="N"', 'atom_name=="CA"',
                                                          'atom_name=="C"', 'atom_name=="N"'])
# Covalent chains defining all omega dihedral angles in the molecular system
omega_chains = msm.topology.get_covalent_chains(molecular_system, chain=['atom_name==["CA","CH3"]', 'atom_name=="C"',
                                                            'atom_name=="N"', 'atom_name==["CA", "CH3"]'])
# Covalent chains defining all chi1 dihedral angles in the molecular system
chi1_chains = msm.topology.get_covalent_chains(molecular_system, chain=['atom_name=="N"', 'atom_name=="CA"',
                                                           'atom_name=="CB"', 'atom_name=="CG"'])