IMP logo
IMP Reference Guide  develop.50fdd7fa33,2025/09/05
The Integrative Modeling Platform
get_fraglib_from_native.py
1 import os
2 import IMP.atom
3 import IMP.core
4 import argparse
5 
6 
7 def get_SSE_fragments(stride_file):
8  '''
9  get all the SSE fragments info for the native structure from its
10  stride file
11 
12  '''
13  current_chain = None
14  stride_dict = {}
15  frag_list = []
16  # 1. collect all helix and strand info as a dictionary, where the
17  # information is stored based on chain id as well
18  with open(stride_file, 'r') as infile:
19  for line in infile:
20  if line.startswith('ASG '):
21  chain_id = line[9]
22  resid = int(line[10:15].strip())
23  sstype = line[24]
24 
25  if current_chain is None or chain_id != current_chain:
26  current_chain = chain_id
27  stride_dict[current_chain] = {}
28  if sstype == 'H' or sstype == 'E':
29  stride_dict[current_chain][resid] = sstype
30  else:
31  if sstype == 'H' or sstype == 'E':
32  stride_dict[current_chain][resid] = sstype
33 
34  # 2, Loop over each chain to detect the fragments
35  for chainid in stride_dict:
36  previous_ss = None
37  previous_res = None
38  current_frag = []
39  for resid in stride_dict[chainid]:
40  current_ss = stride_dict[chainid][resid]
41  if previous_ss is None:
42  previous_ss = current_ss
43  previous_res = resid
44  # only contiguous residues with same SS type should be
45  # part of a fragment
46  if current_ss == previous_ss and resid - previous_res < 2:
47  current_frag.append((resid, current_ss))
48  previous_ss = current_ss
49  previous_res = resid
50  else:
51  current_frag.sort(key=lambda y: y[0])
52  first_res = current_frag[0][0]
53  last_res = current_frag[-1][0]
54  length = len(current_frag)
55  new_key = (chainid + '_' + current_frag[0][1] + '_'
56  + str(first_res) + '_' + str(last_res) + '_'
57  + str(length))
58  frag_list.append(new_key)
59  current_frag = [(resid, current_ss)]
60  previous_ss = current_ss
61  previous_res = resid
62  # This step is to collect the last fragment information for each chain
63  if len(current_frag) > 1:
64  current_frag.sort(key=lambda y: y[0])
65  first_res = current_frag[0][0]
66  last_res = current_frag[-1][0]
67  length = len(current_frag)
68  new_key = (chainid + '_' + current_frag[0][1] + '_'
69  + str(first_res) + '_' + str(last_res) + '_'
70  + str(length))
71  frag_list.append(new_key)
72  # final fragment list
73  return frag_list, stride_dict
74 
75 
76 def get_fragment_coords(pdbname, frag_info, path_to_store_parts):
77  '''
78  get coords from pdbname chain id and residue ranges
79  also save the pdbs of the helical fragments and check if for strands
80  there could be multiple strands possibilities
81  if 2 strands are possible then save 2-strand pdbs
82  '''
83  all_pdbs = []
84  m = IMP.Model()
85  native_h = IMP.atom.read_pdb(pdbname, m, (IMP.atom.ATOMPDBSelector()))
86  outdir = path_to_store_parts
87  if not os.path.exists(outdir):
88  os.makedirs(outdir)
89  selected_atom_types = ['N', 'CA', 'C', 'O', 'CB']
90  for frag in frag_info:
91  # A_E_5_8_4
92  chainid, SSEtype, firstres, lastres, length = frag.split('_')
93  if int(length) >= 3:
94  if SSEtype == 'H':
95  # write a pdb now
96  h_frag = IMP.atom.Selection(
97  native_h, chain_id=chainid,
98  residue_indexes=range(int(firstres), int(lastres)+1),
99  atom_types=[IMP.atom.AtomType(n)
100  for n in selected_atom_types])
101  pdb_name = '_'.join(['h', length, chainid,
102  firstres, lastres]) + '.pdb'
103  IMP.atom.write_pdb(h_frag, outdir + '/' + pdb_name)
104  all_pdbs.append(outdir + '/' + pdb_name)
105  elif SSEtype == 'E':
106  # collect the coordinates first
107  s_frag = IMP.atom.Selection(
108  native_h, chain_id=chainid,
109  residue_indexes=range(int(firstres), int(lastres)+1),
110  atom_types=[IMP.atom.AtomType(n)
111  for n in selected_atom_types])
112  pdb_name = '_'.join(['s', length, chainid,
113  firstres, lastres]) + '.pdb'
114  IMP.atom.write_pdb(s_frag, outdir + '/' + pdb_name)
115  all_pdbs.append(outdir + '/' + pdb_name)
116  else:
117  print('Something is wrong, should only process strands '
118  'and helix')
119  return all_pdbs
120 
121 
122 def per_chain_segment_coords(pdbname, segment_info, path_to_store_parts):
123  '''
124  get coords from pdbname chain id and residue ranges
125  '''
126  all_pdbs = []
127  m = IMP.Model()
128  native_h = IMP.atom.read_pdb(pdbname, m, (IMP.atom.ATOMPDBSelector()))
129  outdir = path_to_store_parts
130  if not os.path.exists(outdir):
131  os.makedirs(outdir)
132  selected_atom_types = ['N', 'CA', 'C', 'O', 'CB']
133  for chains in segment_info.keys():
134  all_residueids = list(segment_info[chains].keys())
135  print(all_residueids)
136  # write a pdb now
137  h_frag = IMP.atom.Selection(
138  native_h, chain_id=chains,
139  residue_indexes=all_residueids,
140  atom_types=[IMP.atom.AtomType(n) for n in selected_atom_types])
141  pdb_name = '_'.join(['all', str(len(all_residueids)), chains,
142  str(all_residueids[0]),
143  str(all_residueids[-1])]) + '.pdb'
144  IMP.atom.write_pdb(h_frag, outdir + '/' + pdb_name)
145  all_pdbs.append(outdir + '/' + pdb_name)
146  return all_pdbs
147 
148 
149 def mutate_frag_to_ala(pdbname):
150  '''
151  mutate all residues from a pdb to ALA
152  '''
153  import mutate_all_ALA
154  mutate_all_ALA.mutate_all_ALA(pdbname)
155 # os.remove(pdbname)
156 
157 
158 parser = argparse.ArgumentParser(
159  description='Generate a library of parts from native structure, '
160  'using the information from STRIDE')
161 parser.add_argument('ref_pdb', type=str,
162  help='Provide the absolute path of the native pdb '
163  '(we assumed that as reference PDB)')
164 parser.add_argument('ref_stride', type=str,
165  help='Provide the absolute path of the stride file')
166 parser.add_argument('path_to_store_parts', type=str,
167  help='path to store the generated parts')
168 
169 parser.add_argument('--perChain', type=bool, default=False,
170  help='get segments per chain')
171 args = parser.parse_args()
172 
173 all_frags, stride_dict = get_SSE_fragments(args.ref_stride)
174 print(stride_dict)
175 
176 if not args.perChain:
177  all_new_pdbs = get_fragment_coords(args.ref_pdb, all_frags,
178  args.path_to_store_parts)
179 else:
180  all_new_pdbs = per_chain_segment_coords(args.ref_pdb, stride_dict,
181  args.path_to_store_parts)
182 
183 print(all_frags)
void write_pdb(const Selection &mhd, TextOutput out, unsigned int model=1)
The type of an atom.
void read_pdb(TextInput input, int model, Hierarchy h)
Class for storing model, its restraints, constraints, and particles.
Definition: Model.h:86
Select all non-alternative ATOM records.
Definition: pdb.h:128
Basic functionality that is expected to be used by a wide variety of IMP users.
Functionality for loading, creating, manipulating and scoring atomic structures.
Select hierarchy particles identified by the biological name.
Definition: Selection.h:70