IMP logo
IMP Reference Guide  2.5.0
The Integrative Modeling Platform
pmi/io/__init__.py
1 """@namespace IMP.pmi.io
2  Utility classes and functions for reading and storing PMI files
3 """
4 
5 from __future__ import print_function
6 import IMP
7 import IMP.algebra
8 import IMP.atom
9 import IMP.pmi
10 import IMP.rmf
11 import IMP.pmi.analysis
12 import IMP.pmi.output
13 import IMP.pmi.tools
14 import RMF
15 import sys,os
16 import numpy as np
17 import re
18 from collections import defaultdict
19 import itertools
20 def parse_dssp(dssp_fn, limit_to_chains=''):
21  """read dssp file, get SSEs. values are all PDB residue numbering.
22  Returns a SubsequenceData object containing labels helix, beta, loop.
23  Each one is a list of SelectionDictionaries
24 
25  Example for a structure with helix A:5-7 and Beta strands A:1-3,A:9-11:
26  helix : [ [ {'chain':'A','residue_indexes': [5,6,7]} ] ]
27  beta : [ [ {'chain':'A','residue_indexes': [1,2,3]},
28  {'chain':'A','residue_indexes': [9,10,11]} ] ]
29  loop : same format as helix
30  """
31  # setup
32  helix_classes = 'GHI'
33  strand_classes = 'EB'
34  loop_classes = [' ', '', 'T', 'S']
35  sse_dict = {}
36  for h in helix_classes:
37  sse_dict[h] = 'helix'
38  for s in strand_classes:
39  sse_dict[s] = 'beta'
40  for l in loop_classes:
41  sse_dict[l] = 'loop'
42  sses = SubsequenceData()
43 
44  # read file and parse
45  start = False
46  # temporary beta dictionary indexed by DSSP's ID
47  beta_dict = defaultdict(list)
48  prev_sstype = None
49  cur_sse = {'chain':'','residue_tuple':[-1,-1]}
50  prev_beta_id = None
51  for line in open(dssp_fn, 'r'):
52  fields = line.split()
53  chain_break = False
54  if len(fields) < 2:
55  continue
56  if fields[1] == "RESIDUE":
57  # Start parsing from here
58  start = True
59  continue
60  if not start:
61  continue
62  if line[9] == " ":
63  chain_break = True
64  elif limit_to_chains != '' and line[11] not in limit_to_chains:
65  continue
66 
67  # gather line info
68  if not chain_break:
69  pdb_res_num = int(line[5:10])
70  chain = line[11]
71  sstype = sse_dict[line[16]]
72  beta_id = line[33]
73 
74  # decide whether to extend or store the SSE
75  if prev_sstype is None:
76  cur_sse = {'chain':chain,'residue_tuple':[pdb_res_num,pdb_res_num]}
77  elif sstype != prev_sstype or chain_break:
78  # add cur_sse to the right place
79  if prev_sstype in ['helix', 'loop']:
80  sses.add_subsequence(prev_sstype,Subsequence(**cur_sse))
81  elif prev_sstype == 'beta':
82  beta_dict[prev_beta_id].append(cur_sse)
83  cur_sse = {'chain':chain,'residue_tuple':[pdb_res_num,pdb_res_num]}
84  else:
85  cur_sse['residue_tuple'][1]=pdb_res_num
86  if chain_break:
87  prev_sstype = None
88  prev_beta_id = None
89  else:
90  prev_sstype = sstype
91  prev_beta_id = beta_id
92 
93  # final SSE processing
94  if not prev_sstype is None:
95  if prev_sstype in ['helix', 'loop']:
96  sses.add_subsequence(prev_sstype,Subsequence(**cur_sse))
97  elif prev_sstype == 'beta':
98  beta_dict[prev_beta_id].append(cur_sse)
99  # gather betas
100  for beta_sheet in beta_dict:
101  seq = Subsequence()
102  for strand in beta_dict[beta_sheet]:
103  seq.add_range(**strand)
104  sses.add_subsequence('beta',seq)
105  return sses
106 
107 def parse_xlinks_davis(data_fn,
108  max_num=-1,
109  name_map={},
110  named_offsets={},
111  use_chains={}):
112  """ Format from Trisha Davis. Lines are:
113  ignore ignore seq1 seq2 >Name(res) >Name(res) score
114  @param data_fn The data file name
115  @param max_num Maximum number of XL to read (-1 is all)
116  @param name_map Dictionary mapping text file names to the molecule name
117  @param named_offsets Integer offsets to apply to the indexing in the file
118  Output is a CrossLinkData object containing SelectionDictionaries
119  data[unique_id] =
120  [ { 'r1': {'molecule':'A','residue_index':5},
121  'r2': {'molecule':'B','residue_index':100},
122  'score': 123 },
123  { 'r1': {'molecule':'C','residue_index':63},
124  'r2': {'molecule':'D','residue_index':94},
125  'score': 600 }
126  ]
127  """
128 
129  inf=open(data_fn,'r')
130  data=defaultdict(list)
131  found=set()
132  data = CrossLinkData()
133  for nl,l in enumerate(inf):
134  if max_num==-1 or nl<max_num:
135  ig1,ig2,seq1,seq2,s1,s2,score=l.split()
136  score=float(score)
137  m1=re.search(r'>([\w-]+)\((\w+)\)',s1)
138  m2=re.search(r'>([\w-]+)\((\w+)\)',s2)
139 
140  n1=m1.group(1)
141  r1=int(m1.group(2))
142  if n1 in name_map:
143  n1=name_map[n1]
144  if n1 in named_offsets:
145  r1+=named_offsets[n1]
146 
147  n2=m2.group(1)
148  r2=int(m2.group(2))
149  if n2 in name_map:
150  n2=name_map[n2]
151  if n2 in named_offsets:
152  r2+=named_offsets[n2]
153  key=tuple(sorted(['%s.%i'%(n1,r1),'%s.%i'%(n2,r2)]))
154  if key in found:
155  print('skipping duplicated xl',key)
156  continue
157  found.add(key)
158  data.add_cross_link(nl,
159  {'molecule':n1,'residue_index':r1},
160  {'molecule':n2,'residue_index':r2},
161  score)
162  inf.close()
163  return data
164 
165 
166 class Subsequence(object):
167  """ A light class to store multiple not-necessarily-contiguous residue ranges."""
168  def __init__(self,chain=None,molecule=None,residue_tuple=None,subsequences=None):
169  """Create subsequence and optionally pass the first contiguous range.
170  @param chain The chain ID
171  @param molecule The molecule name
172  @param residue_tuple PDB-style inclusive residue range
173  @param subsequences A list of other subsequences to combine (not implemented)
174  """
175  self.seqs=[]
176  if chain or molecule or residue_tuple:
177  self.add_range(chain,molecule,residue_tuple)
178  if subsequences:
179  pass
180  def add_range(self,chain=None,molecule=None,residue_tuple=None):
181  """Add some stuff to this subsequence
182  @param chain The chain ID
183  @param molecule The molecule name
184  @param residue_tuple PDB-style inclusive residue range
185  """
186  self.seqs.append({'chain':chain,'molecule':molecule,'residue_tuple':residue_tuple})
187  def join(self,new_subsequence):
188  for s in new_subsequence:
189  self.seqs.append(s)
190 
191  def get_selection(self,hier,**kwargs):
192  """Create an IMP Selection from this subsequence
193  @param hier An IMP hierarchy or list of them
194  \note any additional keyword arguments will be appended to the selection
195  """
196  for nseq,seq in enumerate(self.seqs):
197  args=kwargs
198  if seq['chain']:
199  args['chain']=seq['chain']
200  if seq['molecule']:
201  args['molecule']=seq['molecule']
202  if seq['residue_tuple']:
203  args['residue_indexes']=list(range(seq['residue_tuple'][0],
204  seq['residue_tuple'][1]+1))
205  sel = IMP.atom.Selection(hier,**args)
206  if nseq==0:
207  ret=sel
208  else:
209  ret|=sel
210  return ret
211  def __repr__(self):
212  rep=''
213  for nseq,seq in enumerate(self.seqs):
214  this_str=[]
215  if seq['residue_tuple'] is not None:
216  this_str.append('%i-%i'%(seq['residue_tuple'][0],seq['residue_tuple'][1]))
217  if seq['molecule'] is not None:
218  this_str.append('%s'%seq['molecule'])
219  if seq['chain'] is not None:
220  this_str.append('%s'%seq['chain'])
221  rep+='.'.join(this_str)
222  if nseq < len(self.seqs)-1:
223  rep+='_'
224  return rep
225 
226  def __getitem__(self,key):
227  return self.data[key]
228 
229  def __iter__(self):
230  return self.seqs.__iter__()
231 
232  def __add__(self,other):
233  self.join(other)
234  return self
235 
236 class SubsequenceData(object):
237  """ Group a bunch of subsequences with certain labels
238  Use cases: storing lists of secondary structures from DSSP or PSIPRED
239  storing lists of molecules that should be made symmetric
240  """
241  def __init__(self):
242  """Setup groups of subsequences
243  """
244  self.data=defaultdict(list)
245 
246  def add_subsequence(self,label,subsequence):
247  """ Append a new cross-link to a given unique ID
248  @param label label for this subsequence (e.g., 'helix')
249  @param subsequence a Subsequence object to store under that label
250  e.g. Subsequence(chain='A',residue_tuple=(10,15))
251  """
252  if type(subsequence) is not Subsequence:
253  raise InputError('must provide a subsequence object')
254  self.data[label].append(subsequence)
255 
256  def __getitem__(self,key):
257  return self.data[key]
258 
259  def __repr__(self):
260  return self.data.__repr__()
261 
262  def keys(self):
263  return self.data.keys()
264 
265 class CrossLink(object):
266  """A class to store the selection commands for a single crosslink.
267  """
268  def __init__(self,unique_id,r1,r2,score):
269  """Add a crosslink.
270  @param unique_id The id is used to group crosslinks that are alternatives
271  @param r1 A dictionary of selection keywords for the first residue
272  @param r2 A dictionary of selection keywards for the second residue
273  @param score A score that might be used later for filtering
274  @note The dictionaries can contain any Selection argument like
275  molecule or residue_index
276  """
277  self.unique_id = unique_id
278  self.r1 = r1
279  self.r2 = r2
280  self.score = score
281 
282  def __repr__(self):
283  return "CrossLink id: "+str(self.unique_id)+" r1: "+repr(self.r1)+", r2: "+repr(self.r2)
284 
285  def intersects(self,other):
286  if self.r1==other.r1 or self.r1==other.r2 or self.r2==other.r1 or self.r2==other.r2:
287  return True
288  else:
289  return False
290 
291  def get_selection(self,mh,**kwargs):
292  """Return a list of atom pairs (particles) for this crosslink.
293  Found by selecting everything with r1 and r2 then returning the
294  cartesian product.
295  @note you may want to provide some atom specifiers like
296  atom_type=IMP.atom.AtomType("CA")
297  @param mh The hierarchy to select from
298  @param kwargs Any additional selection arguments
299  """
300  rsel1=self.r1.copy()
301  rsel1.update(kwargs)
302  rsel2=self.r2.copy()
303  rsel2.update(kwargs)
304  sel1 = IMP.atom.Selection(mh,**rsel1).get_selected_particles()
305  sel2 = IMP.atom.Selection(mh,**rsel2).get_selected_particles()
306  if len(sel1)==0:
307  raise Exception("this selection is empty",rsel1)
308  if len(sel2)==0:
309  raise Exception("this selection is empty",rsel2)
310 
311  '''
312  # Check no repeating copy numbers....not sure if this is general
313  for s in (sel1,sel2):
314  idxs=[IMP.atom.get_copy_index(p) for p in s]
315  if len(idxs)!=len(set(idxs)):
316  raise Exception("this XL is selecting more than one particle per copy")
317  '''
318  ret = []
319  for p1,p2 in itertools.product(sel1,sel2):
320  ret.append((p1,p2))
321  return ret
322 
323 class CrossLinkData(object):
324  """A class for storing groups of crosslinks.
325  Acts like a dictionary where keys are unique IDs and values are CrossLinks
326  Equivalent (using objects instead of dicts) to a data structure like so:
327  data[1030] =
328  [ { 'r1': {'molecule':'A','residue_index':5},
329  'r2': {'molecule':'B','residue_index':100},
330  'Score': 123 },
331  { 'r1': {'molecule':'C','residue_index':63},
332  'r2': {'molecule':'D','residue_index':94},
333  'Score': 600 }
334  ]
335  """
336  def __init__(self):
337  """Setup a CrossLinkData object"""
338  self.data = defaultdict(list)
339  def add_cross_link(self,unique_id,kws1,kws2,score):
340  """Add a crosslink. They are organized by their unique_ids.
341  @param unique_id The id is used to group crosslinks that are alternatives
342  @param kws1 A dictionary of selection keywords for the first residue
343  @param kws2 A dictionary of selection keywards for the second residue
344  @param score A score that might be used later for filtering
345  @note The dictionaries can contain any Selection argument like
346  molecule or residue_index
347  """
348  self.data[unique_id].append(CrossLink(unique_id,kws1,kws2,score))
349  def copy_cross_link(self,xl):
350  if type(xl) is not CrossLink:
351  raise Exception("CrossLinkData::copy_cross_link() requires a Crosslink object")
352  self.data[xl.unique_id].append(xl)
353  def __getitem__(self, key):
354  return self.data[key]
355  def __repr__(self):
356  ret="CrossLinkData with these entries:\n"
357  for d in self.data:
358  for xl in self.data[d]:
359  ret+=repr(xl)+'\n'
360  return ret
361  def keys(self):
362  return self.data.keys()
363  def values(self):
364  return self.data.values()
365  def __contains__(self, item):
366  return item in self.data
367  def __iter__(self):
368  return iter(self.data)
369  def __len__(self):
370  return len(self.data)
371 
373  out_dir,
374  stat_files,
375  number_of_best_scoring_models=10,
376  get_every=1,
377  score_key="SimplifiedModel_Total_Score_None",
378  feature_keys=None,
379  rmf_file_key="rmf_file",
380  rmf_file_frame_key="rmf_frame_index",
381  override_rmf_dir=None):
382  """Given a list of stat files, read them all and find the best models.
383  Save to a single RMF along with a stat file.
384  @param mdl The IMP Model
385  @param out_dir The output directory. Will save 3 files (RMF, stat, summary)
386  @param stat_files List of all stat files to collect
387  @param number_of_best_scoring_models Num best models to gather
388  @param get_every Skip frames
389  @param score_key Used for the ranking
390  @param feature_keys Keys to keep around
391  @param rmf_file_key The key that says RMF file name
392  @param rmf_file_frame_key The key that says RMF frame number
393  @param override_rmf_dir For output, change the name of the RMF directory (experiment)
394  """
395 
396  # start by splitting into jobs
397  try:
398  from mpi4py import MPI
399  comm = MPI.COMM_WORLD
400  rank = comm.Get_rank()
401  number_of_processes = comm.size
402  except ImportError:
403  rank = 0
404  number_of_processes = 1
405  my_stat_files=IMP.pmi.tools.chunk_list_into_segments(
406  stat_files,number_of_processes)[rank]
407 
408  # filenames
409  out_stat_fn = os.path.join(out_dir,"top_"+str(number_of_best_scoring_models)+".out")
410  out_rmf_fn = os.path.join(out_dir,"top_"+str(number_of_best_scoring_models)+".rmf3")
411 
412  # extract all the models
413  all_fields=[]
414  for nsf,sf in enumerate(my_stat_files):
415 
416  # get list of keywords
417  root_directory_of_stat_file = os.path.dirname(os.path.dirname(sf))
418  print("getting data from file %s" % sf)
420  all_keys = [score_key,
421  rmf_file_key,
422  rmf_file_frame_key]
423  for k in po.get_keys():
424  for fk in feature_keys:
425  if fk in k:
426  all_keys.append(k)
427  fields = po.get_fields(all_keys,
428  get_every=get_every)
429 
430  # check that all lengths are all equal
431  length_set = set([len(fields[f]) for f in fields])
432  minlen = min(length_set)
433  # if some of the fields are missing, truncate
434  # the feature files to the shortest one
435  if len(length_set) > 1:
436  print("get_best_models: the statfile is not synchronous")
437  minlen = min(length_set)
438  for f in fields:
439  fields[f] = fields[f][0:minlen]
440  if nsf==0:
441  all_fields=fields
442  else:
443  for k in fields:
444  all_fields[k]+=fields[k]
445 
446  if override_rmf_dir is not None:
447  for i in range(minlen):
448  all_fields[rmf_file_key][i]=os.path.join(
449  override_rmf_dir,os.path.basename(all_fields[rmf_file_key][i]))
450 
451  # gather info, sort, write
452  if number_of_processes!=1:
453  comm.Barrier()
454  if rank!=0:
455  comm.send(all_fields, dest=0, tag=11)
456  else:
457  for i in range(1,number_of_processes):
458  data_tmp = comm.recv(source=i, tag=11)
459  for k in all_fields:
460  all_fields[k]+=data_tmp[k]
461 
462  # sort by total score
463  order = sorted(range(len(all_fields[score_key])),
464  key=lambda i: float(all_fields[score_key][i]))
465 
466  # write the stat and RMF files
467  stat = open(out_stat_fn,'w')
468  rh0 = RMF.open_rmf_file_read_only(
469  os.path.join(root_directory_of_stat_file,all_fields[rmf_file_key][0]))
470  prots = IMP.rmf.create_hierarchies(rh0,mdl)
471  del rh0
472  outf = RMF.create_rmf_file(out_rmf_fn)
473  IMP.rmf.add_hierarchies(outf,prots)
474  for nm,i in enumerate(order[:number_of_best_scoring_models]):
475  dline=dict((k,all_fields[k][i]) for k in all_fields)
476  dline['orig_rmf_file']=dline[rmf_file_key]
477  dline['orig_rmf_frame_index']=dline[rmf_file_frame_key]
478  dline[rmf_file_key]=out_rmf_fn
479  dline[rmf_file_frame_key]=nm
480  rh = RMF.open_rmf_file_read_only(
481  os.path.join(root_directory_of_stat_file,all_fields[rmf_file_key][i]))
482  IMP.rmf.link_hierarchies(rh,prots)
484  RMF.FrameID(all_fields[rmf_file_frame_key][i]))
485  IMP.rmf.save_frame(outf)
486  del rh
487  stat.write(str(dline)+'\n')
488  stat.close()
489  print('wrote stats to',out_stat_fn)
490  print('wrote rmfs to',out_rmf_fn)
491 
492 
493 
494 
495 def get_best_models(stat_files,
496  score_key="SimplifiedModel_Total_Score_None",
497  feature_keys=None,
498  rmf_file_key="rmf_file",
499  rmf_file_frame_key="rmf_frame_index",
500  prefiltervalue=None,
501  get_every=1):
502  """ Given a list of stat files, read them all and find the best models.
503  Returns the best rmf filenames, frame numbers, scores, and values for feature keywords
504  """
505  rmf_file_list=[] # best RMF files
506  rmf_file_frame_list=[] # best RMF frames
507  score_list=[] # best scores
508  feature_keyword_list_dict=defaultdict(list) # best values of the feature keys
509  for sf in stat_files:
510  root_directory_of_stat_file = os.path.dirname(os.path.dirname(sf))
511  print("getting data from file %s" % sf)
513  keywords = po.get_keys()
514 
515  feature_keywords = [score_key,
516  rmf_file_key,
517  rmf_file_frame_key]
518 
519  for k in keywords:
520  for fk in feature_keys:
521  if fk in k:
522  feature_keywords.append(k)
523 
524  if prefiltervalue is None:
525  fields = po.get_fields(feature_keywords,
526  get_every=get_every)
527  else:
528  fields = po.get_fields(feature_keywords,
529  filtertuple=(score_key,"<",prefiltervalue),
530  get_every=get_every)
531 
532  # check that all lengths are all equal
533  length_set = set()
534  for f in fields:
535  length_set.add(len(fields[f]))
536 
537  # if some of the fields are missing, truncate
538  # the feature files to the shortest one
539  if len(length_set) > 1:
540  print("get_best_models: the statfile is not synchronous")
541  minlen = min(length_set)
542  for f in fields:
543  fields[f] = fields[f][0:minlen]
544 
545  # append to the lists
546  score_list += fields[score_key]
547  for rmf in fields[rmf_file_key]:
548  rmf_file_list.append(os.path.join(root_directory_of_stat_file,rmf))
549 
550  rmf_file_frame_list += fields[rmf_file_frame_key]
551 
552  if feature_keywords is not None:
553  for k in feature_keywords:
554  feature_keyword_list_dict[k] += fields[k]
555 
556  return rmf_file_list,rmf_file_frame_list,score_list,feature_keyword_list_dict
557 
558 def get_trajectory_models(stat_files,
559  score_key="SimplifiedModel_Total_Score_None",
560  rmf_file_key="rmf_file",
561  rmf_file_frame_key="rmf_frame_index",
562  get_every=1):
563  """ Given a list of stat files, read them all and find a trajectory of models.
564  Returns the rmf filenames, frame numbers, scores, and values for feature keywords
565  """
566  rmf_file_list=[] # best RMF files
567  rmf_file_frame_list=[] # best RMF frames
568  score_list=[] # best scores
569  for sf in stat_files:
570  root_directory_of_stat_file = os.path.dirname(os.path.dirname(sf))
571  print("getting data from file %s" % sf)
573  keywords = po.get_keys()
574 
575  feature_keywords = [score_key,
576  rmf_file_key,
577  rmf_file_frame_key]
578 
579  fields = po.get_fields(feature_keywords,
580  get_every=get_every)
581 
582  # check that all lengths are all equal
583  length_set = set()
584  for f in fields:
585  length_set.add(len(fields[f]))
586 
587  # if some of the fields are missing, truncate
588  # the feature files to the shortest one
589  if len(length_set) > 1:
590  print("get_best_models: the statfile is not synchronous")
591  minlen = min(length_set)
592  for f in fields:
593  fields[f] = fields[f][0:minlen]
594 
595  # append to the lists
596  score_list += fields[score_key]
597  for rmf in fields[rmf_file_key]:
598  rmf_file_list.append(os.path.join(root_directory_of_stat_file,rmf))
599 
600  rmf_file_frame_list += fields[rmf_file_frame_key]
601 
602  return rmf_file_list,rmf_file_frame_list,score_list
603 
604 
606  rmf_tuples,
607  alignment_components=None,
608  rmsd_calculation_components=None,
609  state_number=0):
610  """ Read in coordinates of a set of RMF tuples.
611  Returns the coordinates split as requested (all, alignment only, rmsd only) as well as
612  RMF file names (as keys in a dictionary, with values being the rank number) and just a plain list
613  @param model The IMP model
614  @param rmf_tuples [score,filename,frame number,original order number, rank]
615  @param alignment_components Tuples to specify what you're aligning on
616  @param rmsd_calculation_components Tuples to specify what components are used for RMSD calc
617  """
618  all_coordinates = []
619  rmsd_coordinates = []
620  alignment_coordinates = []
621  all_rmf_file_names = []
622  rmf_file_name_index_dict = {} # storing the features
623 
624  for cnt, tpl in enumerate(rmf_tuples):
625  rmf_file = tpl[1]
626  frame_number = tpl[2]
627 
628  if cnt==0:
629  prots = IMP.pmi.analysis.get_hiers_from_rmf(model,
630  frame_number,
631  rmf_file)
632  else:
633  IMP.pmi.analysis.link_hiers_to_rmf(model,prots,frame_number,rmf_file)
634 
635  if not prots:
636  continue
637 
638  prot=prots[state_number]
639  # getting the particles
641  all_particles=[pp for key in part_dict for pp in part_dict[key]]
642  all_ps_set=set(all_particles)
643  # getting the coordinates
644  model_coordinate_dict = {}
645  template_coordinate_dict={}
646  rmsd_coordinate_dict={}
647  for pr in part_dict:
648  model_coordinate_dict[pr] = np.array(
649  [np.array(IMP.core.XYZ(i).get_coordinates()) for i in part_dict[pr]])
650 
651  if alignment_components is not None:
652  for pr in alignment_components:
653  if type(alignment_components[pr]) is str:
654  name=alignment_components[pr]
655  s=IMP.atom.Selection(prot,molecule=name)
656  elif type(alignment_components[pr]) is tuple:
657  name=alignment_components[pr][2]
658  rend=alignment_components[pr][1]
659  rbegin=alignment_components[pr][0]
660  s=IMP.atom.Selection(prot,molecule=name,residue_indexes=range(rbegin,rend+1))
661  ps=s.get_selected_particles()
662  filtered_particles=[p for p in ps if p in all_ps_set]
663  template_coordinate_dict[pr] = \
664  [list(map(float,IMP.core.XYZ(i).get_coordinates())) for i in filtered_particles]
665 
666  if rmsd_calculation_components is not None:
667  for pr in rmsd_calculation_components:
668  if type(rmsd_calculation_components[pr]) is str:
669  name=rmsd_calculation_components[pr]
670  s=IMP.atom.Selection(prot,molecule=name)
671  elif type(rmsd_calculation_components[pr]) is tuple:
672  name=rmsd_calculation_components[pr][2]
673  rend=rmsd_calculation_components[pr][1]
674  rbegin=rmsd_calculation_components[pr][0]
675  s=IMP.atom.Selection(prot,molecule=name,residue_indexes=range(rbegin,rend+1))
676  ps=s.get_selected_particles()
677  filtered_particles=[p for p in ps if p in all_ps_set]
678  rmsd_coordinate_dict[pr] = \
679  [list(map(float,IMP.core.XYZ(i).get_coordinates())) for i in filtered_particles]
680 
681  all_coordinates.append(model_coordinate_dict)
682  alignment_coordinates.append(template_coordinate_dict)
683  rmsd_coordinates.append(rmsd_coordinate_dict)
684  frame_name = rmf_file + '|' + str(frame_number)
685  all_rmf_file_names.append(frame_name)
686  rmf_file_name_index_dict[frame_name] = tpl[4]
687  return all_coordinates,alignment_coordinates,rmsd_coordinates,rmf_file_name_index_dict,all_rmf_file_names
688 
689 def get_bead_sizes(model,rmf_tuple,rmsd_calculation_components=None,state_number=0):
690  '''
691  @param model The IMP model
692  @param rmf_tuple score,filename,frame number,original order number, rank
693  @param rmsd_calculation_components Tuples to specify what components are used for RMSD calc
694  '''
695  rmf_file = rmf_tuple[1]
696  frame_number = rmf_tuple[2]
697 
698  prots = IMP.pmi.analysis.get_hiers_from_rmf(model,
699  frame_number,
700  rmf_file)
701 
702  prot=prots[state_number]
703 
704  # getting the particles
706  all_particles=[pp for key in part_dict for pp in part_dict[key]]
707  all_ps_set=set(all_particles)
708  # getting the coordinates
709  rmsd_bead_size_dict={}
710 
711  if rmsd_calculation_components is not None:
712  for pr in rmsd_calculation_components:
713  if type(rmsd_calculation_components[pr]) is str:
714  name=rmsd_calculation_components[pr]
715  s=IMP.atom.Selection(prot,molecule=name)
716  elif type(rmsd_calculation_components[pr]) is tuple:
717  name=rmsd_calculation_components[pr][2]
718  rend=rmsd_calculation_components[pr][1]
719  rbegin=rmsd_calculation_components[pr][0]
720  s=IMP.atom.Selection(prot,molecule=name,residue_indexes=range(rbegin,rend+1))
721  ps=s.get_selected_particles()
722  filtered_particles=[p for p in ps if p in all_ps_set]
723  rmsd_bead_size_dict[pr] = \
724  [len(IMP.pmi.tools.get_residue_indexes(p)) for p in filtered_particles]
725 
726 
727  return rmsd_bead_size_dict
A class for reading stat files.
Definition: output.py:633
atom::Hierarchies create_hierarchies(RMF::FileConstHandle fh, Model *m)
RMF::FrameID save_frame(RMF::FileHandle file, std::string name="")
Save the current state of the linked objects as a new RMF frame.
def parse_dssp
read dssp file, get SSEs.
def get_best_models
Given a list of stat files, read them all and find the best models.
Miscellaneous utilities.
Definition: tools.py:1
A class for storing groups of crosslinks.
def add_range
Add some stuff to this subsequence.
A light class to store multiple not-necessarily-contiguous residue ranges.
def add_cross_link
Add a crosslink.
def __init__
Setup a CrossLinkData object.
def get_trajectory_models
Given a list of stat files, read them all and find a trajectory of models.
def __init__
Create subsequence and optionally pass the first contiguous range.
def __init__
Setup groups of subsequences.
def parse_xlinks_davis
Format from Trisha Davis.
void load_frame(RMF::FileConstHandle file, RMF::FrameID frame)
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
def get_particles_at_resolution_one
Get particles at res 1, or any beads, based on the name.
void add_hierarchies(RMF::NodeHandle fh, const atom::Hierarchies &hs)
Group a bunch of subsequences with certain labels Use cases: storing lists of secondary structures fr...
Tools for clustering and cluster analysis.
Definition: pmi/Analysis.py:1
def get_selection
Create an IMP Selection from this subsequence.
Classes for writing output files and processing them.
Definition: output.py:1
def save_best_models
Given a list of stat files, read them all and find the best models.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
The general base class for IMP exceptions.
Definition: exception.h:49
void link_hierarchies(RMF::FileConstHandle fh, const atom::Hierarchies &hs)
def read_coordinates_of_rmfs
Read in coordinates of a set of RMF tuples.
Python classes to represent, score, sample and analyze models.
Functionality for loading, creating, manipulating and scoring atomic structures.
Select hierarchy particles identified by the biological name.
Definition: Selection.h:65
Support for the RMF file format for storing hierarchical molecular data and markup.
def get_residue_indexes
Retrieve the residue indexes for the given particle.
Definition: tools.py:959
def add_subsequence
Append a new cross-link to a given unique ID.