IMP  2.4.0
The Integrative Modeling Platform
pmi/io/__init__.py
1 """@namespace IMP.pmi.io
2  Utility classes and functions for reading and storing PMI files
3 """
4 
5 from __future__ import print_function
6 import IMP
7 import IMP.algebra
8 import IMP.atom
9 import IMP.pmi
10 import IMP.rmf
11 import IMP.pmi.analysis
12 import IMP.pmi.output
13 import IMP.pmi.tools
14 import RMF
15 import sys,os
16 import numpy as np
17 import re
18 from collections import defaultdict
19 import itertools
20 def parse_dssp(dssp_fn, limit_to_chains=''):
21  """read dssp file, get SSEs. values are all PDB residue numbering.
22  Returns a SubsequenceData object containing labels helix, beta, loop.
23  Each one is a list of SelectionDictionaries
24 
25  Example for a structure with helix A:5-7 and Beta strands A:1-3,A:9-11:
26  helix : [ [ {'chain':'A','residue_indexes': [5,6,7]} ] ]
27  beta : [ [ {'chain':'A','residue_indexes': [1,2,3]},
28  {'chain':'A','residue_indexes': [9,10,11]} ] ]
29  loop : same format as helix
30  """
31  # setup
32  helix_classes = 'GHI'
33  strand_classes = 'EB'
34  loop_classes = [' ', '', 'T', 'S']
35  sse_dict = {}
36  for h in helix_classes:
37  sse_dict[h] = 'helix'
38  for s in strand_classes:
39  sse_dict[s] = 'beta'
40  for l in loop_classes:
41  sse_dict[l] = 'loop'
42  sses = SubsequenceData()
43 
44  # read file and parse
45  start = False
46  # temporary beta dictionary indexed by DSSP's ID
47  beta_dict = defaultdict(list)
48  prev_sstype = None
49  cur_sse = {'chain':'','residue_tuple':[-1,-1]}
50  prev_beta_id = None
51  for line in open(dssp_fn, 'r'):
52  fields = line.split()
53  chain_break = False
54  if len(fields) < 2:
55  continue
56  if fields[1] == "RESIDUE":
57  # Start parsing from here
58  start = True
59  continue
60  if not start:
61  continue
62  if line[9] == " ":
63  chain_break = True
64  elif limit_to_chains != '' and line[11] not in limit_to_chains:
65  continue
66 
67  # gather line info
68  if not chain_break:
69  pdb_res_num = int(line[5:10])
70  chain = line[11]
71  sstype = sse_dict[line[16]]
72  beta_id = line[33]
73 
74  # decide whether to extend or store the SSE
75  if prev_sstype is None:
76  cur_sse = {'chain':chain,'residue_tuple':[pdb_res_num,pdb_res_num]}
77  elif sstype != prev_sstype or chain_break:
78  # add cur_sse to the right place
79  if prev_sstype in ['helix', 'loop']:
80  sses.add_subsequence(prev_sstype,Subsequence(**cur_sse))
81  elif prev_sstype == 'beta':
82  beta_dict[prev_beta_id].append(cur_sse)
83  cur_sse = {'chain':chain,'residue_tuple':[pdb_res_num,pdb_res_num]}
84  else:
85  cur_sse['residue_tuple'][1]=pdb_res_num
86  if chain_break:
87  prev_sstype = None
88  prev_beta_id = None
89  else:
90  prev_sstype = sstype
91  prev_beta_id = beta_id
92 
93  # final SSE processing
94  if not prev_sstype is None:
95  if prev_sstype in ['helix', 'loop']:
96  sses.add_subsequence(prev_sstype,Subsequence(**cur_sse))
97  elif prev_sstype == 'beta':
98  beta_dict[prev_beta_id].append(cur_sse)
99  # gather betas
100  for beta_sheet in beta_dict:
101  seq = Subsequence()
102  for strand in beta_dict[beta_sheet]:
103  seq.add_range(**strand)
104  sses.add_subsequence('beta',seq)
105  return sses
106 
107 def parse_xlinks_davis(data_fn,
108  max_num=-1,
109  name_map={},
110  named_offsets={},
111  use_chains={}):
112  """ Format from Trisha Davis. Lines are:
113  ignore ignore seq1 seq2 >Name(res) >Name(res) score
114  @param model An IMP model
115  @param data_fn The data file name
116  @param max_num Maximum number of XL to read (-1 is all)
117  @param name_map Dictionary mapping text file names to the molecule name
118  @param named_offsets Integer offsets to apply to the indexing in the file
119  Output is a CrossLinkData object containing SelectionDictionaries
120  data[unique_id] =
121  [ { 'r1': {'molecule':'A','residue_index':5},
122  'r2': {'molecule':'B','residue_index':100},
123  'score': 123 },
124  { 'r1': {'molecule':'C','residue_index':63},
125  'r2': {'molecule':'D','residue_index':94},
126  'score': 600 }
127  ]
128  """
129 
130  inf=open(data_fn,'r')
131  data=defaultdict(list)
132  found=set()
133  data = CrossLinkData()
134  for nl,l in enumerate(inf):
135  if max_num==-1 or nl<max_num:
136  ig1,ig2,seq1,seq2,s1,s2,score=l.split()
137  score=float(score)
138  m1=re.search(r'>([\w-]+)\((\w+)\)',s1)
139  m2=re.search(r'>([\w-]+)\((\w+)\)',s2)
140 
141  n1=m1.group(1)
142  r1=int(m1.group(2))
143  if n1 in name_map:
144  n1=name_map[n1]
145  if n1 in named_offsets:
146  r1+=named_offsets[n1]
147 
148  n2=m2.group(1)
149  r2=int(m2.group(2))
150  if n2 in name_map:
151  n2=name_map[n2]
152  if n2 in named_offsets:
153  r2+=named_offsets[n2]
154  key=tuple(sorted(['%s.%i'%(n1,r1),'%s.%i'%(n2,r2)]))
155  if key in found:
156  print('skipping duplicated xl',key)
157  continue
158  found.add(key)
159  data.add_cross_link(nl,
160  {'molecule':n1,'residue_index':r1},
161  {'molecule':n2,'residue_index':r2},
162  score)
163  inf.close()
164  return data
165 
166 
167 class Subsequence(object):
168  """ A light class to store multiple not-necessarily-contiguous residue ranges."""
169  def __init__(self,chain=None,molecule=None,residue_tuple=None,subsequences=None):
170  """Create subsequence and optionally pass the first contiguous range.
171  @param chain The chain ID
172  @param molecule The molecule name
173  @param residue_tuple PDB-style inclusive residue range
174  @param subsequences A list of other subsequences to combine (not implemented)
175  """
176  self.seqs=[]
177  if chain or molecule or residue_tuple:
178  self.add_range(chain,molecule,residue_tuple)
179  if subsequences:
180  pass
181  def add_range(self,chain=None,molecule=None,residue_tuple=None):
182  """Add some stuff to this subsequence
183  @param chain The chain ID
184  @param molecule The molecule name
185  @param residue_tuple PDB-style inclusive residue range
186  """
187  self.seqs.append({'chain':chain,'molecule':molecule,'residue_tuple':residue_tuple})
188  def join(self,new_subsequence):
189  for s in new_subsequence:
190  self.seqs.append(s)
191 
192  def get_selection(self,hier,**kwargs):
193  """Create an IMP Selection from this subsequence
194  @param hier An IMP hierarchy or list of them
195  \note any additional keyword arguments will be appended to the selection
196  """
197  for nseq,seq in enumerate(self.seqs):
198  args=kwargs
199  if seq['chain']:
200  args['chain']=seq['chain']
201  if seq['molecule']:
202  args['molecule']=seq['molecule']
203  if seq['residue_tuple']:
204  args['residue_indexes']=list(range(seq['residue_tuple'][0],
205  seq['residue_tuple'][1]+1))
206  sel = IMP.atom.Selection(hier,**args)
207  if nseq==0:
208  ret=sel
209  else:
210  ret|=sel
211  return ret
212  def __repr__(self):
213  rep=''
214  for nseq,seq in enumerate(self.seqs):
215  this_str=[]
216  if seq['residue_tuple'] is not None:
217  this_str.append('%i-%i'%(seq['residue_tuple'][0],seq['residue_tuple'][1]))
218  if seq['molecule'] is not None:
219  this_str.append('%s'%seq['molecule'])
220  if seq['chain'] is not None:
221  this_str.append('%s'%seq['chain'])
222  rep+='.'.join(this_str)
223  if nseq < len(self.seqs)-1:
224  rep+='_'
225  return rep
226 
227  def __getitem__(self,key):
228  return self.data[key]
229 
230  def __iter__(self):
231  return self.seqs.__iter__()
232 
233  def __add__(self,other):
234  self.join(other)
235  return self
236 
237 class SubsequenceData(object):
238  """ Group a bunch of subsequences with certain labels
239  Use cases: storing lists of secondary structures from DSSP or PSIPRED
240  storing lists of molecules that should be made symmetric
241  """
242  def __init__(self):
243  """Setup groups of subsequences
244  """
245  self.data=defaultdict(list)
246 
247  def add_subsequence(self,label,subsequence):
248  """ Append a new cross-link to a given unique ID
249  @param label label for this subsequence (e.g., 'helix')
250  @param subsequence a Subsequence object to store under that label
251  e.g. Subsequence(chain='A',residue_tuple=(10,15))
252  """
253  if type(subsequence) is not Subsequence:
254  raise InputError('must provide a subsequence object')
255  self.data[label].append(subsequence)
256 
257  def __getitem__(self,key):
258  return self.data[key]
259 
260  def __repr__(self):
261  return self.data.__repr__()
262 
263  def keys(self):
264  return self.data.keys()
265 
266 class CrossLink(object):
267  """A class to store the selection commands for a single crosslink.
268  """
269  def __init__(self,unique_id,r1,r2,score):
270  """Add a crosslink.
271  @param unique_id The id is used to group crosslinks that are alternatives
272  @param r1 A dictionary of selection keywords for the first residue
273  @param r2 A dictionary of selection keywards for the second residue
274  @param score A score that might be used later for filtering
275  @Note The dictionaries can contain any Selection argument like
276  molecule or residue_index
277  """
278  self.unique_id = unique_id
279  self.r1 = r1
280  self.r2 = r2
281  self.score = score
282 
283  def __repr__(self):
284  return "CrossLink id: "+str(self.unique_id)+" r1: "+repr(self.r1)+", r2: "+repr(self.r2)
285 
286  def intersects(self,other):
287  if self.r1==other.r1 or self.r1==other.r2 or self.r2==other.r1 or self.r2==other.r2:
288  return True
289  else:
290  return False
291 
292  def get_selection(self,mh,**kwargs):
293  """Return a list of atom pairs (particles) for this crosslink.
294  Found by selecting everything with r1 and r2 then returning the
295  cartesian product.
296  @Note you may want to provide some atom specifiers like
297  atom_type=IMP.atom.AtomType("CA")
298  @param mh The hierarchy to select from
299  @param kwargs Any additional selection arguments
300  """
301  rsel1=self.r1.copy()
302  rsel1.update(kwargs)
303  rsel2=self.r2.copy()
304  rsel2.update(kwargs)
305  sel1 = IMP.atom.Selection(mh,**rsel1).get_selected_particles()
306  sel2 = IMP.atom.Selection(mh,**rsel2).get_selected_particles()
307  if len(sel1)==0:
308  raise Exception("this selection is empty",rsel1)
309  if len(sel2)==0:
310  raise Exception("this selection is empty",rsel2)
311 
312  '''
313  # Check no repeating copy numbers....not sure if this is general
314  for s in (sel1,sel2):
315  idxs=[IMP.atom.get_copy_index(p) for p in s]
316  if len(idxs)!=len(set(idxs)):
317  raise Exception("this XL is selecting more than one particle per copy")
318  '''
319  ret = []
320  for p1,p2 in itertools.product(sel1,sel2):
321  ret.append((p1,p2))
322  return ret
323 
324 class CrossLinkData(object):
325  """A class for storing groups of crosslinks.
326  Acts like a dictionary where keys are unique IDs and values are CrossLinks
327  Equivalent (using objects instead of dicts) to a data structure like so:
328  data[1030] =
329  [ { 'r1': {'molecule':'A','residue_index':5},
330  'r2': {'molecule':'B','residue_index':100},
331  'Score': 123 },
332  { 'r1': {'molecule':'C','residue_index':63},
333  'r2': {'molecule':'D','residue_index':94},
334  'Score': 600 }
335  ]
336  """
337  def __init__(self):
338  """Setup a CrossLinkData object"""
339  self.data = defaultdict(list)
340  def add_cross_link(self,unique_id,kws1,kws2,score):
341  """Add a crosslink. They are organized by their unique_ids.
342  @param unique_id The id is used to group crosslinks that are alternatives
343  @param r1 A dictionary of selection keywords for the first residue
344  @param r2 A dictionary of selection keywards for the second residue
345  @param score A score that might be used later for filtering
346  @Note The dictionaries can contain any Selection argument like
347  molecule or residue_index
348  """
349  self.data[unique_id].append(CrossLink(unique_id,kws1,kws2,score))
350  def copy_cross_link(self,xl):
351  if type(xl) is not CrossLink:
352  raise Exception("CrossLinkData::copy_cross_link() requires a Crosslink object")
353  self.data[xl.unique_id].append(xl)
354  def __getitem__(self, key):
355  return self.data[key]
356  def __repr__(self):
357  ret="CrossLinkData with these entries:\n"
358  for d in self.data:
359  for xl in self.data[d]:
360  ret+=repr(xl)+'\n'
361  return ret
362  def keys(self):
363  return self.data.keys()
364  def values(self):
365  return self.data.values()
366  def __contains__(self, item):
367  return item in self.data
368  def __iter__(self):
369  return iter(self.data)
370  def __len__(self):
371  return len(self.data)
372 
374  out_dir,
375  stat_files,
376  number_of_best_scoring_models=10,
377  get_every=1,
378  score_key="SimplifiedModel_Total_Score_None",
379  feature_keys=None,
380  rmf_file_key="rmf_file",
381  rmf_file_frame_key="rmf_frame_index",
382  override_rmf_dir=None):
383  """Given a list of stat files, read them all and find the best models.
384  Save to a single RMF along with a stat file.
385  @param mdl The IMP Model
386  @param out_dir The output directory. Will save 3 files (RMF, stat, summary)
387  @param stat_files List of all stat files to collect
388  @param number_of_best_scoring_models Num best models to gather
389  @param get_every Skip frames
390  @param score_key Used for the ranking
391  @param feature_keys Keys to keep around
392  @param rmf_file_key The key that says RMF file name
393  @param rmf_file_frame_key The key that says RMF frame number
394  @param override_rmf_dir For output, change the name of the RMF directory (experiment)
395  """
396 
397  # start by splitting into jobs
398  try:
399  from mpi4py import MPI
400  comm = MPI.COMM_WORLD
401  rank = comm.Get_rank()
402  number_of_processes = comm.size
403  except ImportError:
404  rank = 0
405  number_of_processes = 1
406  my_stat_files=IMP.pmi.tools.chunk_list_into_segments(
407  stat_files,number_of_processes)[rank]
408 
409  # filenames
410  out_stat_fn = os.path.join(out_dir,"top_"+str(number_of_best_scoring_models)+".out")
411  out_rmf_fn = os.path.join(out_dir,"top_"+str(number_of_best_scoring_models)+".rmf3")
412 
413  # extract all the models
414  all_fields=[]
415  for nsf,sf in enumerate(my_stat_files):
416 
417  # get list of keywords
418  root_directory_of_stat_file = os.path.dirname(os.path.dirname(sf))
419  print("getting data from file %s" % sf)
421  all_keys = [score_key,
422  rmf_file_key,
423  rmf_file_frame_key]
424  for k in po.get_keys():
425  for fk in feature_keys:
426  if fk in k:
427  all_keys.append(k)
428  fields = po.get_fields(all_keys,
429  get_every=get_every)
430 
431  # check that all lengths are all equal
432  length_set = set([len(fields[f]) for f in fields])
433  minlen = min(length_set)
434  # if some of the fields are missing, truncate
435  # the feature files to the shortest one
436  if len(length_set) > 1:
437  print("get_best_models: the statfile is not synchronous")
438  minlen = min(length_set)
439  for f in fields:
440  fields[f] = fields[f][0:minlen]
441  if nsf==0:
442  all_fields=fields
443  else:
444  for k in fields:
445  all_fields[k]+=fields[k]
446 
447  if override_rmf_dir is not None:
448  for i in range(minlen):
449  all_fields[rmf_file_key][i]=os.path.join(
450  override_rmf_dir,os.path.basename(all_fields[rmf_file_key][i]))
451 
452  # gather info, sort, write
453  if number_of_processes!=1:
454  comm.Barrier()
455  if rank!=0:
456  comm.send(all_fields, dest=0, tag=11)
457  else:
458  for i in range(1,number_of_processes):
459  data_tmp = comm.recv(source=i, tag=11)
460  for k in all_fields:
461  all_fields[k]+=data_tmp[k]
462 
463  # sort by total score
464  order = sorted(range(len(all_fields[score_key])),
465  key=lambda i: float(all_fields[score_key][i]))
466 
467  # write the stat and RMF files
468  stat = open(out_stat_fn,'w')
469  rh0 = RMF.open_rmf_file_read_only(
470  os.path.join(root_directory_of_stat_file,all_fields[rmf_file_key][0]))
471  prots = IMP.rmf.create_hierarchies(rh0,mdl)
472  del rh0
473  outf = RMF.create_rmf_file(out_rmf_fn)
474  IMP.rmf.add_hierarchies(outf,prots)
475  for nm,i in enumerate(order[:number_of_best_scoring_models]):
476  dline=dict((k,all_fields[k][i]) for k in all_fields)
477  dline['orig_rmf_file']=dline[rmf_file_key]
478  dline['orig_rmf_frame_index']=dline[rmf_file_frame_key]
479  dline[rmf_file_key]=out_rmf_fn
480  dline[rmf_file_frame_key]=nm
481  rh = RMF.open_rmf_file_read_only(
482  os.path.join(root_directory_of_stat_file,all_fields[rmf_file_key][i]))
483  IMP.rmf.link_hierarchies(rh,prots)
484  IMP.rmf.load_frame(rh,all_fields[rmf_file_frame_key][i])
485  IMP.rmf.save_frame(outf)
486  del rh
487  stat.write(str(dline)+'\n')
488  stat.close()
489  print('wrote stats to',out_stat_fn)
490  print('wrote rmfs to',out_rmf_fn)
491 
492 
493 
494 
495 def get_best_models(stat_files,
496  score_key="SimplifiedModel_Total_Score_None",
497  feature_keys=None,
498  rmf_file_key="rmf_file",
499  rmf_file_frame_key="rmf_frame_index",
500  prefiltervalue=None,
501  get_every=1):
502  """ Given a list of stat files, read them all and find the best models.
503  Returns the best rmf filenames, frame numbers, scores, and values for feature keywords
504  """
505  rmf_file_list=[] # best RMF files
506  rmf_file_frame_list=[] # best RMF frames
507  score_list=[] # best scores
508  feature_keyword_list_dict=defaultdict(list) # best values of the feature keys
509  for sf in stat_files:
510  root_directory_of_stat_file = os.path.dirname(os.path.dirname(sf))
511  print("getting data from file %s" % sf)
513  keywords = po.get_keys()
514 
515  feature_keywords = [score_key,
516  rmf_file_key,
517  rmf_file_frame_key]
518 
519  for k in keywords:
520  for fk in feature_keys:
521  if fk in k:
522  feature_keywords.append(k)
523 
524  if prefiltervalue is None:
525  fields = po.get_fields(feature_keywords,
526  get_every=get_every)
527  else:
528  fields = po.get_fields(feature_keywords,
529  filtertuple=(score_key,"<",prefiltervalue),
530  get_every=get_every)
531 
532  # check that all lengths are all equal
533  length_set = set()
534  for f in fields:
535  length_set.add(len(fields[f]))
536 
537  # if some of the fields are missing, truncate
538  # the feature files to the shortest one
539  if len(length_set) > 1:
540  print("get_best_models: the statfile is not synchronous")
541  minlen = min(length_set)
542  for f in fields:
543  fields[f] = fields[f][0:minlen]
544 
545  # append to the lists
546  score_list += fields[score_key]
547  for rmf in fields[rmf_file_key]:
548  rmf_file_list.append(os.path.join(root_directory_of_stat_file,rmf))
549 
550  rmf_file_frame_list += fields[rmf_file_frame_key]
551 
552  if feature_keywords is not None:
553  for k in feature_keywords:
554  feature_keyword_list_dict[k] += fields[k]
555 
556  return rmf_file_list,rmf_file_frame_list,score_list,feature_keyword_list_dict
557 
558 def get_trajectory_models(stat_files,
559  score_key="SimplifiedModel_Total_Score_None",
560  rmf_file_key="rmf_file",
561  rmf_file_frame_key="rmf_frame_index",
562  get_every=1):
563  """ Given a list of stat files, read them all and find a trajectory of models.
564  Returns the rmf filenames, frame numbers, scores, and values for feature keywords
565  """
566  rmf_file_list=[] # best RMF files
567  rmf_file_frame_list=[] # best RMF frames
568  score_list=[] # best scores
569  for sf in stat_files:
570  root_directory_of_stat_file = os.path.dirname(os.path.dirname(sf))
571  print("getting data from file %s" % sf)
573  keywords = po.get_keys()
574 
575  feature_keywords = [score_key,
576  rmf_file_key,
577  rmf_file_frame_key]
578 
579  fields = po.get_fields(feature_keywords,
580  get_every=get_every)
581 
582  # check that all lengths are all equal
583  length_set = set()
584  for f in fields:
585  length_set.add(len(fields[f]))
586 
587  # if some of the fields are missing, truncate
588  # the feature files to the shortest one
589  if len(length_set) > 1:
590  print("get_best_models: the statfile is not synchronous")
591  minlen = min(length_set)
592  for f in fields:
593  fields[f] = fields[f][0:minlen]
594 
595  # append to the lists
596  score_list += fields[score_key]
597  for rmf in fields[rmf_file_key]:
598  rmf_file_list.append(os.path.join(root_directory_of_stat_file,rmf))
599 
600  rmf_file_frame_list += fields[rmf_file_frame_key]
601 
602  return rmf_file_list,rmf_file_frame_list,score_list
603 
604 
606  rmf_tuples,
607  alignment_components=None,
608  rmsd_calculation_components=None):
609  """ Read in coordinates of a set of RMF tuples.
610  Returns the coordinates split as requested (all, alignment only, rmsd only) as well as
611  RMF file names (as keys in a dictionary, with values being the rank number) and just a plain list
612  @param model The IMP model
613  @param rmf_tuples [score,filename,frame number,original order number, rank]
614  @param alignment_components Tuples to specify what you're aligning on
615  @param rmsd_calculation_components Tuples to specify what components are used for RMSD calc
616  """
617  all_coordinates = []
618  rmsd_coordinates = []
619  alignment_coordinates = []
620  all_rmf_file_names = []
621  rmf_file_name_index_dict = {} # storing the features
622 
623  for cnt, tpl in enumerate(rmf_tuples):
624  rmf_file = tpl[1]
625  frame_number = tpl[2]
626 
627  prot = IMP.pmi.analysis.get_hier_from_rmf(model,
628  frame_number,
629  rmf_file)
630  if not prot:
631  continue
632 
633  # getting the particles
635  all_particles=[pp for key in part_dict for pp in part_dict[key]]
636  all_ps_set=set(all_particles)
637  # getting the coordinates
638  model_coordinate_dict = {}
639  template_coordinate_dict={}
640  rmsd_coordinate_dict={}
641  for pr in part_dict:
642  model_coordinate_dict[pr] = np.array(
643  [np.array(IMP.core.XYZ(i).get_coordinates()) for i in part_dict[pr]])
644 
645  if alignment_components is not None:
646  for pr in alignment_components:
647  if type(alignment_components[pr]) is str:
648  name=alignment_components[pr]
649  s=IMP.atom.Selection(prot,molecule=name)
650  elif type(alignment_components[pr]) is tuple:
651  name=alignment_components[pr][2]
652  rend=alignment_components[pr][1]
653  rbegin=alignment_components[pr][0]
654  s=IMP.atom.Selection(prot,molecule=name,residue_indexes=range(rbegin,rend+1))
655  ps=s.get_selected_particles()
656  filtered_particles=[p for p in ps if p in all_ps_set]
657  template_coordinate_dict[pr] = \
658  [list(map(float,IMP.core.XYZ(i).get_coordinates())) for i in filtered_particles]
659 
660  if rmsd_calculation_components is not None:
661  for pr in rmsd_calculation_components:
662  if type(rmsd_calculation_components[pr]) is str:
663  name=rmsd_calculation_components[pr]
664  s=IMP.atom.Selection(prot,molecule=name)
665  elif type(rmsd_calculation_components[pr]) is tuple:
666  name=rmsd_calculation_components[pr][2]
667  rend=rmsd_calculation_components[pr][1]
668  rbegin=rmsd_calculation_components[pr][0]
669  s=IMP.atom.Selection(prot,molecule=name,residue_indexes=range(rbegin,rend+1))
670  ps=s.get_selected_particles()
671  filtered_particles=[p for p in ps if p in all_ps_set]
672  rmsd_coordinate_dict[pr] = \
673  [list(map(float,IMP.core.XYZ(i).get_coordinates())) for i in filtered_particles]
674 
675  all_coordinates.append(model_coordinate_dict)
676  alignment_coordinates.append(template_coordinate_dict)
677  rmsd_coordinates.append(rmsd_coordinate_dict)
678  frame_name = rmf_file + '|' + str(frame_number)
679  all_rmf_file_names.append(frame_name)
680  rmf_file_name_index_dict[frame_name] = tpl[4]
681  return all_coordinates,alignment_coordinates,rmsd_coordinates,rmf_file_name_index_dict,all_rmf_file_names
682 
683 def get_bead_sizes(model,rmf_tuple,rmsd_calculation_components=None):
684  '''
685  @param model The IMP model
686  @param rmf_tuple score,filename,frame number,original order number, rank
687  @param rmsd_calculation_components Tuples to specify what components are used for RMSD calc
688  '''
689  rmf_file = rmf_tuple[1]
690  frame_number = rmf_tuple[2]
691 
692  prot = IMP.pmi.analysis.get_hier_from_rmf(model,
693  frame_number,
694  rmf_file)
695 
696  # getting the particles
698  all_particles=[pp for key in part_dict for pp in part_dict[key]]
699  all_ps_set=set(all_particles)
700  # getting the coordinates
701  rmsd_bead_size_dict={}
702 
703  if rmsd_calculation_components is not None:
704  for pr in rmsd_calculation_components:
705  if type(rmsd_calculation_components[pr]) is str:
706  name=rmsd_calculation_components[pr]
707  s=IMP.atom.Selection(prot,molecule=name)
708  elif type(rmsd_calculation_components[pr]) is tuple:
709  name=rmsd_calculation_components[pr][2]
710  rend=rmsd_calculation_components[pr][1]
711  rbegin=rmsd_calculation_components[pr][0]
712  s=IMP.atom.Selection(prot,molecule=name,residue_indexes=range(rbegin,rend+1))
713  ps=s.get_selected_particles()
714  filtered_particles=[p for p in ps if p in all_ps_set]
715  rmsd_bead_size_dict[pr] = \
716  [len(IMP.pmi.tools.get_residue_indexes(p)) for p in filtered_particles]
717 
718 
719  return rmsd_bead_size_dict
atom::Hierarchies create_hierarchies(RMF::FileConstHandle fh, kernel::Model *m)
void save_frame(RMF::FileHandle file, unsigned int, std::string name="")
Definition: frames.h:42
A class for reading stat files.
Definition: output.py:613
def parse_dssp
read dssp file, get SSEs.
def get_best_models
Given a list of stat files, read them all and find the best models.
Miscellaneous utilities.
Definition: tools.py:1
A class for storing groups of crosslinks.
void load_frame(RMF::FileConstHandle file, unsigned int frame)
Definition: frames.h:27
def add_range
Add some stuff to this subsequence.
A light class to store multiple not-necessarily-contiguous residue ranges.
def add_cross_link
Add a crosslink.
def __init__
Setup a CrossLinkData object.
def get_trajectory_models
Given a list of stat files, read them all and find a trajectory of models.
def __init__
Create subsequence and optionally pass the first contiguous range.
def __init__
Setup groups of subsequences.
def parse_xlinks_davis
Format from Trisha Davis.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
def get_particles_at_resolution_one
Get particles at res 1, or any beads, based on the name.
void add_hierarchies(RMF::NodeHandle fh, const atom::Hierarchies &hs)
Group a bunch of subsequences with certain labels Use cases: storing lists of secondary structures fr...
Tools for clustering and cluster analysis.
Definition: pmi/Analysis.py:1
def get_selection
Create an IMP Selection from this subsequence.
Classes for writing output files and processing them.
Definition: output.py:1
def save_best_models
Given a list of stat files, read them all and find the best models.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
void link_hierarchies(RMF::FileConstHandle fh, const atom::Hierarchies &hs)
def read_coordinates_of_rmfs
Read in coordinates of a set of RMF tuples.
Python classes to represent, score, sample and analyze models.
Functionality for loading, creating, manipulating and scoring atomic structures.
Select hierarchy particles identified by the biological name.
Definition: Selection.h:62
Support for the RMF file format for storing hierarchical molecular data and markup.
def get_residue_indexes
Retrieve the residue indexes for the given particle.
Definition: tools.py:930
def add_subsequence
Append a new cross-link to a given unique ID.